install.packages("Stat2Data") library(Stat2Data) data(Fitch) Fitch # Species Order Wt Skull Palate #1 Coyote Carnivora 14.500 19.70 9.47 #2 Grizzly bear Carnivora 306.000 41.10 20.30 #3 Wolverine Carnivora 15.000 16.70 8.24 #4 Lion Carnivora 175.000 37.40 18.10 #5 Leopard Carnivora 50.300 22.60 9.41 #6 Gray Wolf Carnivora 33.000 27.80 13.60 #7 Kit fox Carnivora 1.820 11.39 5.78 #8 Fennec Fox Carnivora 1.250 8.86 4.25 #9 Tiger Carnivora 113.000 32.70 15.20 #10 Jaguar Carnivora 89.000 26.20 10.98 #11 Cougar Carnivora 70.900 21.30 8.40 #12 Maned wolf Carnivora 23.800 22.70 10.78 #13 Fisher Carnivora 6.800 13.06 6.29 #14 Leopard seal Carnivora 270.000 28.60 12.60 #15 Gorilla Primates 170.000 30.80 11.82 #16 Baboon Primates 32.500 16.70 7.31 #17 Mouse lemur Primates 0.070 3.19 1.33 #18 Bush-baby Primates 0.210 4.45 1.59 #19 Capuhcin Primates 2.200 9.80 3.55 #20 Black howler Primates 6.700 12.88 4.89 #21 Macaque Primates 3.820 10.90 4.53 #22 Proboscis monkey Primates 14.000 12.16 4.54 #23 Orangutan Primates 54.400 23.95 10.30 #24 Dwarf lemur Primates 0.389 5.68 2.44 #25 Avahi Primates 0.900 5.44 1.50 #26 Sportive lemur Primates 0.700 4.94 1.73 #27 Potto Primates 1.200 6.36 2.25 #28 Monk saki Primates 1.200 8.95 3.30 attach(Fitch) par(mfrow=c(2,2)) model1<-lm(Skull ~ Wt) plot(model1) ### There are several problems: ### (1) the residuals vs fitted (top left) is strongly curved ### the QQ plot is largely ok but there three marked influential points ### point 14 is indicated as being of high leverage in the Cook's distance plot ### Check point 14 and see if you can think why it might be an outlier Fitch[14,] ### Species Order Wt Skull Palate ### 14 Leopard seal Carnivora 270 28.6 12.6 ### It is the only aquatic species and mass is less important in water ### so we will remove it newdata<-Fitch[-14,] detach(Fitch) attach(newdata) par(mfrow=c(2,2)) model2<-lm(Skull ~ Wt) plot(model2) ### The outlier problem has gone away but the top left plot is bad ### think about the data, we have Wt which is a function of volume and a linear length, so we might expect that taking cube root might overcome the problem cuberootWt<-Wt^0.33 par(mfrow=c(2,2)) model3<-lm(Skull ~ cuberootWt) plot(model3) ### This has nicely flattened our residuals plot but we now have a somewhat concave QQ plot ### Play around with the data but we can find no better solution so we now have to think about the outliers (6,11,12), all Carnivora ### The way to do this is to see whether removing each in turn affects our results summary(model3) #Call: #lm(formula = Skull ~ cuberootWt) #Residuals: # Min 1Q Median 3Q Max #-4.7740 -1.6727 -0.0696 0.9425 7.2000 # #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 1.5317 0.9483 1.615 0.119 #cuberootWt 6.0145 0.3092 19.451 <2e-16 *** #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 2.704 on 25 degrees of freedom #Multiple R-squared: 0.938, Adjusted R-squared: 0.9355 #F-statistic: 378.3 on 1 and 25 DF, p-value: < 2.2e-16 ### analyse just for primates par(mfrow=c(2,2)) model4<-lm(Skull[14:27,] ~ cuberootWt[14:27,]) plot(model4) summary(model4) #Call: #lm(formula = Skull ~ cuberootWt) # #Residuals: # Min 1Q Median 3Q Max #-4.7740 -1.6727 -0.0696 0.9425 7.2000 # #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) 1.5317 0.9483 1.615 0.119 #cuberootWt 6.0145 0.3092 19.451 <2e-16 *** #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 2.704 on 25 degrees of freedom #Multiple R-squared: 0.938, Adjusted R-squared: 0.9355 #F-statistic: 378.3 on 1 and 25 DF, p-value: < 2.2e-16