Packages and Data

setwd("~/Downloads/MTH 250")
library(NCStats)
library(FSAdata) # for data
library(FSA)     # for vbFuns(), vbStarts(), confint.bootCase()
library(car)     # for Boot()
library(tidyverse)

angcpe <- read.csv("clean.csv",stringsAsFactors = FALSE) %>% 
  mutate(success=factor(success,levels = c("failure","success")))
xtabs(~state,data = angcpe)
xtabs(~county,data = angcpe)
df <- filterD(angcpe, county %in% c("Vilas","Iron","Oneida","Price"))

Logistic Regression for Probability of Success

For this graph I was interested in looking at the catch per effort of anglers that fished Escanaba Lake in Vilas county. To do this I used creel data from 2014-2019 and divided the total catch by the total effort (how many hours spent on the water). This gives me my catch per effort (CPE) which I then categorized into success and failure, with a CPE of zero as a failure and a CPE of greater than zero as success. From this point I used the zip codes provided by the anglers to determine the county they are from, and then I filtered my original data to just anglers from Iron, Price, Oneida, and Vilas counties (the surrounding counties of Escanaba Lake). I fitted a logistic regression and plotted the results. Then I facetted the regression to each of the counties. From the graphs I would make the assumptions that the longer you fish the greater the probability of success, catching at least one fish. I would also like to point out that for Iron and Price counties the sample size was less than 30 which is why those two graphs do not support the trend of Oneida and Vilas counties. I chose to add a point on the graph in which the total effort equaled 50% probability of success, this is to illustrate just how hard it is to catch a fish and to compare just one value across the counties. The color schemes and title sizing is all for the intentions of a scientific audience, as well as the blue best fit line.

glm2 <- glm(success~effort*county,data = df,
            family = binomial(link="logit"))

#R> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

(cf2<-coef(glm2))

#R>         (Intercept)              effort        countyOneida 
#R>           2.3222937          -0.5410616          -4.0280635 
#R>         countyPrice         countyVilas effort:countyOneida 
#R>         -59.3627432          -3.7236879           0.8178397 
#R>  effort:countyPrice  effort:countyVilas 
#R>          13.4608532           0.8124338

sum_50 <- data.frame(
  county=c("Iron","Oneida","Price","Vilas"),
  prop=rep(0.5,4),
  effort=c(
    -cf2[[1]]/cf2[[2]],
    -(cf2[[1]]+cf2[[3]])/(cf2[[2]]+cf2[[6]]),
    -(cf2[[1]]+cf2[[4]])/(cf2[[2]]+cf2[[7]]),
    -(cf2[[1]]+cf2[[5]])/(cf2[[2]]+cf2[[8]])
  )
)
point_lab <- data.frame(
  label=c("4.29 hr","6.16 hr","4.41 hr","5.16 hr"),
  county=c("Iron","Oneida","Price","Vilas"),
  x=c(5,3.3,6.5,12),
  y=c(0.75,0.75,0.34,0.34),
  prop=rep(0.5,4),
  effort=c(
    -cf2[[1]]/cf2[[2]],
    -(cf2[[1]]+cf2[[3]])/(cf2[[2]]+cf2[[6]]),
    -(cf2[[1]]+cf2[[4]])/(cf2[[2]]+cf2[[7]]),
    -(cf2[[1]]+cf2[[5]])/(cf2[[2]]+cf2[[8]])
))

theme_MJL <- theme_bw()+
  theme(
    plot.title = element_text(face = "bold",size = rel(1.5)),
    axis.title = element_text(face = "bold",size = rel(1.25)),
    panel.grid.major = element_line(color="gray90",linetype = "dashed",size=rel(.25)),
    panel.grid.minor = element_blank(),
    legend.margin = margin(t=1,r=1,b=1,l=1,unit="mm"),
    legend.key.size = unit(2,units="mm"),
    panel.spacing = unit(1,units="mm"),
    strip.background = element_rect(fill="black",color="black"),
    strip.text = element_text(color="white",face="bold",size=rel(1.15),
                              margin=margin(t=1,r=1,b=1,l=1,unit="mm"))
  )

ggplot(data = df,mapping = aes(x=effort,y=as.numeric(success)-1))+
  geom_point(alpha=0.25,size=2)+
  geom_smooth(method = "glm",method.args=list(family="binomial"))+
  facet_wrap(vars(county),scales="free_x")+
  geom_segment(data = sum_50,mapping = aes(x=-Inf,y=prop,xend=effort,yend=prop),
               linetype = "dashed")+
  geom_segment(data = sum_50,mapping = aes(x=effort,y=prop,xend=effort,yend=-Inf),
               linetype = "dashed")+
  scale_y_continuous(name="Probability of a Successful Fishing Trip")+
  scale_x_continuous(name="Total Effort (hr)")+
  geom_segment(data=point_lab,mapping = aes(x=x,y=y,xend=effort,yend=prop),
               arrow=arrow(length=unit(3,"mm"),angle=15,type="closed"))+
  geom_label(data=point_lab,mapping = aes(x=x,y=y,label=label,hjust="left"))+
  labs(title="Angler Fishing Success on Escanaba Lake",
       subtitle="By Angler’s Home County")+
  theme_MJL

#R> `geom_smooth()` using formula 'y ~ x'

#R> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Von Bertalanffy Growth Formula for BLC

For this graph I was interested in trying to predict the mean length in milimeters at any age for Black Crappie. I used data from the LTER in a variety of surveys. I started this graph by loading, data wrangling, and filtering my data to Waupaca county. From there I fitted a von Bertalanffy growth model which is used to predict the mean length of a fish at any age. Then I plotted the von Bertalanffy growth model using predicted values. I chose to use a gray scale because the graph is meant for a scientific audience.

library(FSA)     
library(car)     
library(tidyverse)
setwd("~/Downloads/MTH 250")

blc <- read.csv("fish_raw_data_BLC_Fyke.csv",na.strings = c("","-"),
                stringsAsFactors = FALSE) %>%
  rename(species=`Species`,tl=`Length.or.Lower.Length.MM`,
         age=`Age.observed.annuli`) %>%
  mutate(tl=as.numeric(tl)) %>%
  filterD(!is.na(age),!is.na(tl),tl>0) %>%
  select(County,Survey.Year,species,tl,age)

#R> Warning: NAs introduced by coercion

str(blc)

#R> 'data.frame':   5393 obs. of  5 variables:
#R>  $ County     : chr  "PRICE" "PRICE" "PRICE" "PRICE" ...
#R>  $ Survey.Year: int  1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 ...
#R>  $ species    : chr  "BLACK CRAPPIE" "BLACK CRAPPIE" "BLACK CRAPPIE" "BLACK CRAPPIE" ...
#R>  $ tl         : num  83.8 198.1 203.2 210.8 215.9 ...
#R>  $ age        : int  1 5 4 5 5 4 4 4 5 4 ...

xtabs(~County,data=blc)

#R> County
#R>      CLARK   COLUMBIA       DANE      DODGE    DOUGLAS   FLORENCE 
#R>          5        224        197         64        119         58 
#R> GREEN LAKE       IOWA  JEFFERSON  MANITOWOC  MARINETTE  MARQUETTE 
#R>         89         71         79        691        790         98 
#R>     OCONTO    PORTAGE      PRICE       RUSK       SAUK     SAWYER 
#R>        900          9         57          6        169         48 
#R>    SHAWANO    WAUPACA   WAUSHARA 
#R>        979        478        262

blc <- filterD(blc,County=="WAUPACA")

vb <- vbFuns(param="Typical")
f.starts <- vbStarts(tl~age,data=blc)
f.fit <- nls(tl~vb(age,Linf,K,t0),data=blc,start=f.starts)
coef(f.fit)

#R>        Linf           K          t0 
#R> 314.0180431   0.3166279   0.1712702

f.boot1 <- car::Boot(f.fit)

#R> Loading required namespace: boot

#R> 
#R>  Number of bootstraps was 998 out of 999 attempted

confint(f.boot1)

#R> Bootstrap bca confidence intervals
#R> 
#R>            2.5 %      97.5 %
#R> Linf 293.6330495 346.5004542
#R> K      0.2431984   0.3914448
#R> t0    -0.1069832   0.4076209

predict2 <- function(x) predict(x,data.frame(age=seq(-1,15,by=0.2)))
predict2(f.fit)  # just a check

#R>  [1] -140.986465 -113.066394  -86.859560  -62.260834  -39.171540
#R>  [6]  -17.499055    2.843559   21.937905   39.860581   56.683482
#R> [11]   72.474093   87.295757  101.207932  114.266426  126.523622
#R> [16]  138.028689  148.827780  158.964216  168.478658  177.409272
#R> [21]  185.791885  193.660122  201.045547  207.977787  214.484649
#R> [26]  220.592235  226.325047  231.706081  236.756923  241.497834
#R> [31]  245.947832  250.124769  254.045400  257.725453  261.179689
#R> [36]  264.421966  267.465290  270.321868  273.003161  275.519924
#R> [41]  277.882253  280.099624  282.180932  284.134527  285.968245
#R> [46]  287.689442  289.305022  290.821467  292.244860  293.580910
#R> [51]  294.834977  296.012092  297.116976  298.154063  299.127511
#R> [56]  300.041227  300.898875  301.703896  302.459519  303.168775
#R> [61]  303.834510  304.459394  305.045933  305.596482  306.113247
#R> [66]  306.598303  307.053594  307.480948  307.882078  308.258595
#R> [71]  308.612007  308.943733  309.255104  309.547368  309.821699
#R> [76]  310.079195  310.320892  310.547757  310.760701  310.960579
#R> [81]  311.148192

f.boot2 <- car::Boot(f.fit,f=predict2)

#R> 
#R>  Number of bootstraps was 998 out of 999 attempted

preds1 <- data.frame(seq(-1,15,by=0.2),
                     predict2(f.fit),
                     confint(f.boot2))
names(preds1) <- c("age","fit","LCI","UCI")
headtail(preds1)

#R>      age        fit       LCI       UCI
#R> V1  -1.0 -140.98647 -226.6449 -87.66924
#R> V2  -0.8 -113.06639 -185.7089 -66.38734
#R> V3  -0.6  -86.85956 -149.4648 -46.22109
#R> V79 14.6  310.76070  290.6448 334.18438
#R> V80 14.8  310.96058  290.7267 334.57603
#R> V81 15.0  311.14819  290.8025 334.95349

makeVBEqnLabel <- function(fit) {
  cfs <- coef(fit)
  Linf <- formatC(cfs[["Linf"]],format="f",digits=1)
  K <- formatC(cfs[["K"]],format="f",digits=3)
  t0 <- cfs[["t0"]]
  t0 <- paste0(ifelse(t0<0,"+","-"),formatC(abs(t0),format="f",digits=3))
  paste0("TL==",Linf,"~bgroup('(',1-e^{-",K,"~(Age",t0,")},')')")
}

my_title <- expression(paste("Black Crappie", italic("(Pomoxis nigromaculatus)"), 
       "Growth in Waupaca Co."))

ggplot() + 
  geom_ribbon(data=preds1,aes(x=age,ymin=LCI,ymax=UCI),fill="gray70")+
  geom_point(data=blc,aes(y=tl,x=age),size=2,alpha=0.25) +
  geom_line(data=preds1,aes(y=fit,x=age),size=0.55) +
  geom_jitter(data=blc,aes(y=tl,x=age),
              width=0.15,height=0,alpha=.55)+
  scale_y_continuous(name="Total Length (mm)",expand=c(0,0),limit=c(0,400)) +
  scale_x_continuous(name="Age (years)",expand=c(0,0),
                     limits=c(0,11),breaks=seq(0,12,2))+
  labs(title=my_title,
       subtitle="Von Bertalanffy Growth Model")+
  annotate(geom="text",label=makeVBEqnLabel(f.fit),parse=TRUE,
           size=4,x=Inf,y=-Inf,hjust=1.1,vjust=-0.5)+
  theme_MJL

#R> Warning: Removed 26 row(s) containing missing values (geom_path).

Presentation Graphs

Michael Lant

21 May, 2020

Packages and Data

Logistic Regression for Probability of Success

Von Bertalanffy Growth Formula for BLC