## Initialization

> library(NCStats)
> library(plotrix)  #histStack()
> library(car)      #bootCase()

# Bat Morphology Example

## Background

Researchers measured tthe canine tooth height (among other things) from two subspecies of bats () found in Hawaii. Their primary question was to determine if canine tooth height differed between the subspecies and, more importantly to them, could the canine tooth height be used to predict the subspecies of bat.

> setwd("C:/aaaWork/Web/GitHub/NCMTH207/modules/LogisticRegression")
> str(bat)
'data.frame':   118 obs. of  7 variables:
$subsp : Factor w/ 2 levels "cinereus","semotus": 2 2 2 2 2 2 2 2 2 2 ...$ bodymass   : num  19.5 16.2 17 16.5 14.3 ...
$skulllength: num 1.6 1.55 1.56 1.56 1.53 ...$ canine     : num  0.326 0.308 0.291 0.287 0.301 0.305 0.277 0.313 0.289 0.293 ...
$coronoid : num 0.303 0.282 0.292 0.303 0.279 0.284 0.286 0.281 0.278 0.28 ...$ wingspan   : num  0.358 0.358 0.359 0.353 0.351 0.361 0.351 0.363 0.34 0.365 ...
$hab : Factor w/ 3 levels "A","B","C": 1 1 1 1 1 1 1 1 2 2 ... > bat$canine10 <- bat$canine*10 > xlbl <- "Canine Tooth Height (x10,mm)" > ylbl <- "Subspecies Code" ## Explorations > hist(canine10~subsp,data=bat,breaks=seq(2.6,3.8,0.1),xlim=c(2.6,3.8), xlab=xlbl,nrow=2,ncol=1) > histStack(canine10~subsp,data=bat,breaks=seq(2.6,3.8,0.1),xlim=c(2.6,3.8), col="gray.colors",xlab=xlbl,right=FALSE) > plotBinResp(subsp~canine10,data=bat,breaks=seq(2.6,3.8,0.1),xlim=c(2.6,3.8), xlab=xlbl,ylab=ylbl) ## Model Fitting and Examination > glm1 <- glm(subsp~canine10,data=bat,family=binomial) > fitPlot(glm1,breaks=seq(2.6,3.8,0.1),xlab=xlbl,ylab=ylbl,main="") > summary(glm1)  Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) 35.516 6.428 5.525 3.29e-08 canine10 -11.112 2.005 -5.543 2.97e-08 (Dispersion parameter for binomial family taken to be 1) Null deviance: 163.040 on 117 degrees of freedom Residual deviance: 97.178 on 116 degrees of freedom AIC: 101.18 Number of Fisher Scoring iterations: 5 > confint(glm1) Waiting for profiling to be done...  2.5 % 97.5 % (Intercept) 24.21685 49.66132 canine10 -15.52430 -7.58941 ## Lecture Support – Interpretation of Slope > x1 <- c(3,4) # purposely pick two canine10 values 1 unit apart > ( p1 <- predict(glm1,data.frame(canine10=x1)) )  1 2 2.179940 -8.931994  > p1[2]-p1[1]  2 -11.11193  > exp(-11.112) # back-transformed 'slope' from summary() above [1] 1.493206e-05 > ( bp1 <- exp(p1) )  1 2 8.8457728416 0.0001320944  > bp1[2]/bp1[1]  2 1.493306e-05  ## Predicting Probabilities > ( p2 <- predict(glm1,data.frame(canine10=c(3,3.4))) )  1 2 2.179940 -2.264834  > exp(p2)/(1+exp(p2))  1 2 0.89843357 0.09407761  > predict(glm1,data.frame(canine10=c(3,3.4)),type="response")  1 2 0.89843357 0.09407761  ## X for a Certain Proportion > ( cfs <- coef(glm1) ) (Intercept) canine10 35.51574 -11.11193  > p <- 0.5 # canine tooth height where subspecies ratio is 50/50 > ( x <- (log(p/(1-p))-cfs[1])/cfs[2] ) (Intercept) 3.19618  > predict(glm1,data.frame(canine10=x),type="response") # test the answer (Intercept) 0.5  > p <- 0.9 # length where 90% are semotus, 10% are cinereus > (log(p/(1-p))-cfs[1])/cfs[2] (Intercept) 2.998444  ## Bootstrapping > bc1 <- bootCase(glm1) # bootstrapping, be patient! > head(bc1)  (Intercept) canine10 [1,] 47.63420 -14.748328 [2,] 42.07082 -13.054643 [3,] 27.77390 -8.869351 [4,] 31.33596 -9.864708 [5,] 39.55912 -12.234736 [6,] 41.28284 -12.840528 > confint(bc1)  95% LCI 95% UCI (Intercept) 26.88953 52.688428 canine10 -16.50715 -8.470158 > #hist(bc1,breaks=15) > predProb <- function(x,alpha,beta1) exp(alpha+beta1*x)/(1+exp(alpha+beta1*x)) > predProb(3,coef(glm1)[1],coef(glm1)[2]) (Intercept) 0.8984336  > p3 <- predProb(3,bc1[,1],bc1[,2]) > head(p3) [1] 0.9673657 0.9481860 0.7623938 0.8509199 0.9455720 0.9405459 > quantile(p3,c(0.025,0.975))  2.5% 97.5% 0.8129323 0.9688198  > predX <- function(p,alpha,beta1) (log(p/(1-p))-alpha)/beta1 > x50 <- predX(0.5,bc1[,1],bc1[,2]) > head(x50) [1] 3.229803 3.222671 3.131447 3.176572 3.233345 3.215042 > quantile(x50,c(0.025,0.975))  2.5% 97.5% 3.150347 3.243366  # Solar Panel Offer Data ## Background Households were asked if they would accept an offer to put solar panels on the roof of their house if they would receive a 50% subsidy from the state. Demographic variables for each household such as income, size, monthly mortgage payment, and age of the head of household were also recorded. The researchers had three primary questions: • At what income will 25% of households accept? • What is the probability of acceptance for a household with an income of$80000?
• How much does odds of acceptance change for each $1000 increase in household income? > sp <- read.csv("SolarOffer.csv") > str(sp) 'data.frame': 30 obs. of 5 variables:$ income   : int  80 60 35 45 29 43 34 104 102 59 ...
$age : int 30 34 25 27 23 28 24 43 46 36 ...$ takeoffer: Factor w/ 2 levels "decline","take": 2 2 1 1 1 1 1 2 2 1 ...
$mortgage : int 2000 2100 1500 1800 1900 1600 1500 2400 2700 2600 ...$ famsize  : int  4 3 2 4 2 3 1 5 3 2 ...
> xlbl <- "Family Income (1000s)"
> ylbl <- "Response to Offer"

## Analysis

> plotBinResp(takeoffer~income,data=sp,xlab=xlbl,ylab=ylbl,breaks=seq(25,135,5))

> glm2 <- glm(takeoffer~income,data=sp,family=binomial)
> summary(glm2)

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -12.84503    6.16398  -2.084   0.0372
income        0.19934    0.09774   2.039   0.0414

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 41.455  on 29  degrees of freedom
Residual deviance: 13.035  on 28  degrees of freedom
AIC: 17.035

Number of Fisher Scoring iterations: 8
> fitPlot(glm2,xlab=xlbl,ylab=ylbl,breaks=seq(25,135,5),main="")

> p <- 0.25
> (log(p/(1-p))-coef(glm2)[1])/coef(glm2)[2]
(Intercept)
58.92646 
> predict(glm2,data.frame(income=80),type="response")
        1
0.9569831