setwd("~/Downloads/MTH 250")
library(NCStats)
library(FSAdata) # for data
library(FSA) # for vbFuns(), vbStarts(), confint.bootCase()
library(car) # for Boot()
library(tidyverse)
angcpe <- read.csv("clean.csv",stringsAsFactors = FALSE) %>%
mutate(success=factor(success,levels = c("failure","success")))
xtabs(~state,data = angcpe)
xtabs(~county,data = angcpe)
df <- filterD(angcpe, county %in% c("Vilas","Iron","Oneida","Price"))
For this graph I was interested in looking at the catch per effort of anglers that fished Escanaba Lake in Vilas county. To do this I used creel data from 2014-2019 and divided the total catch by the total effort (how many hours spent on the water). This gives me my catch per effort (CPE) which I then categorized into success and failure, with a CPE of zero as a failure and a CPE of greater than zero as success. From this point I used the zip codes provided by the anglers to determine the county they are from, and then I filtered my original data to just anglers from Iron, Price, Oneida, and Vilas counties (the surrounding counties of Escanaba Lake). I fitted a logistic regression and plotted the results. Then I facetted the regression to each of the counties. From the graphs I would make the assumptions that the longer you fish the greater the probability of success, catching at least one fish. I would also like to point out that for Iron and Price counties the sample size was less than 30 which is why those two graphs do not support the trend of Oneida and Vilas counties. I chose to add a point on the graph in which the total effort equaled 50% probability of success, this is to illustrate just how hard it is to catch a fish and to compare just one value across the counties. The color schemes and title sizing is all for the intentions of a scientific audience, as well as the blue best fit line.
glm2 <- glm(success~effort*county,data = df,
family = binomial(link="logit"))
#R> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
(cf2<-coef(glm2))
#R> (Intercept) effort countyOneida
#R> 2.3222937 -0.5410616 -4.0280635
#R> countyPrice countyVilas effort:countyOneida
#R> -59.3627432 -3.7236879 0.8178397
#R> effort:countyPrice effort:countyVilas
#R> 13.4608532 0.8124338
sum_50 <- data.frame(
county=c("Iron","Oneida","Price","Vilas"),
prop=rep(0.5,4),
effort=c(
-cf2[[1]]/cf2[[2]],
-(cf2[[1]]+cf2[[3]])/(cf2[[2]]+cf2[[6]]),
-(cf2[[1]]+cf2[[4]])/(cf2[[2]]+cf2[[7]]),
-(cf2[[1]]+cf2[[5]])/(cf2[[2]]+cf2[[8]])
)
)
point_lab <- data.frame(
label=c("4.29 hr","6.16 hr","4.41 hr","5.16 hr"),
county=c("Iron","Oneida","Price","Vilas"),
x=c(5,3.3,6.5,12),
y=c(0.75,0.75,0.34,0.34),
prop=rep(0.5,4),
effort=c(
-cf2[[1]]/cf2[[2]],
-(cf2[[1]]+cf2[[3]])/(cf2[[2]]+cf2[[6]]),
-(cf2[[1]]+cf2[[4]])/(cf2[[2]]+cf2[[7]]),
-(cf2[[1]]+cf2[[5]])/(cf2[[2]]+cf2[[8]])
))
theme_MJL <- theme_bw()+
theme(
plot.title = element_text(face = "bold",size = rel(1.5)),
axis.title = element_text(face = "bold",size = rel(1.25)),
panel.grid.major = element_line(color="gray90",linetype = "dashed",size=rel(.25)),
panel.grid.minor = element_blank(),
legend.margin = margin(t=1,r=1,b=1,l=1,unit="mm"),
legend.key.size = unit(2,units="mm"),
panel.spacing = unit(1,units="mm"),
strip.background = element_rect(fill="black",color="black"),
strip.text = element_text(color="white",face="bold",size=rel(1.15),
margin=margin(t=1,r=1,b=1,l=1,unit="mm"))
)
ggplot(data = df,mapping = aes(x=effort,y=as.numeric(success)-1))+
geom_point(alpha=0.25,size=2)+
geom_smooth(method = "glm",method.args=list(family="binomial"))+
facet_wrap(vars(county),scales="free_x")+
geom_segment(data = sum_50,mapping = aes(x=-Inf,y=prop,xend=effort,yend=prop),
linetype = "dashed")+
geom_segment(data = sum_50,mapping = aes(x=effort,y=prop,xend=effort,yend=-Inf),
linetype = "dashed")+
scale_y_continuous(name="Probability of a Successful Fishing Trip")+
scale_x_continuous(name="Total Effort (hr)")+
geom_segment(data=point_lab,mapping = aes(x=x,y=y,xend=effort,yend=prop),
arrow=arrow(length=unit(3,"mm"),angle=15,type="closed"))+
geom_label(data=point_lab,mapping = aes(x=x,y=y,label=label,hjust="left"))+
labs(title="Angler Fishing Success on Escanaba Lake",
subtitle="By Angler’s Home County")+
theme_MJL
#R> `geom_smooth()` using formula 'y ~ x'
#R> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
For this graph I was interested in trying to predict the mean length in milimeters at any age for Black Crappie. I used data from the LTER in a variety of surveys. I started this graph by loading, data wrangling, and filtering my data to Waupaca county. From there I fitted a von Bertalanffy growth model which is used to predict the mean length of a fish at any age. Then I plotted the von Bertalanffy growth model using predicted values. I chose to use a gray scale because the graph is meant for a scientific audience.
library(FSA)
library(car)
library(tidyverse)
setwd("~/Downloads/MTH 250")
blc <- read.csv("fish_raw_data_BLC_Fyke.csv",na.strings = c("","-"),
stringsAsFactors = FALSE) %>%
rename(species=`Species`,tl=`Length.or.Lower.Length.MM`,
age=`Age.observed.annuli`) %>%
mutate(tl=as.numeric(tl)) %>%
filterD(!is.na(age),!is.na(tl),tl>0) %>%
select(County,Survey.Year,species,tl,age)
#R> Warning: NAs introduced by coercion
str(blc)
#R> 'data.frame': 5393 obs. of 5 variables:
#R> $ County : chr "PRICE" "PRICE" "PRICE" "PRICE" ...
#R> $ Survey.Year: int 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 ...
#R> $ species : chr "BLACK CRAPPIE" "BLACK CRAPPIE" "BLACK CRAPPIE" "BLACK CRAPPIE" ...
#R> $ tl : num 83.8 198.1 203.2 210.8 215.9 ...
#R> $ age : int 1 5 4 5 5 4 4 4 5 4 ...
xtabs(~County,data=blc)
#R> County
#R> CLARK COLUMBIA DANE DODGE DOUGLAS FLORENCE
#R> 5 224 197 64 119 58
#R> GREEN LAKE IOWA JEFFERSON MANITOWOC MARINETTE MARQUETTE
#R> 89 71 79 691 790 98
#R> OCONTO PORTAGE PRICE RUSK SAUK SAWYER
#R> 900 9 57 6 169 48
#R> SHAWANO WAUPACA WAUSHARA
#R> 979 478 262
blc <- filterD(blc,County=="WAUPACA")
vb <- vbFuns(param="Typical")
f.starts <- vbStarts(tl~age,data=blc)
f.fit <- nls(tl~vb(age,Linf,K,t0),data=blc,start=f.starts)
coef(f.fit)
#R> Linf K t0
#R> 314.0180431 0.3166279 0.1712702
f.boot1 <- car::Boot(f.fit)
#R> Loading required namespace: boot
#R>
#R> Number of bootstraps was 998 out of 999 attempted
confint(f.boot1)
#R> Bootstrap bca confidence intervals
#R>
#R> 2.5 % 97.5 %
#R> Linf 293.6330495 346.5004542
#R> K 0.2431984 0.3914448
#R> t0 -0.1069832 0.4076209
predict2 <- function(x) predict(x,data.frame(age=seq(-1,15,by=0.2)))
predict2(f.fit) # just a check
#R> [1] -140.986465 -113.066394 -86.859560 -62.260834 -39.171540
#R> [6] -17.499055 2.843559 21.937905 39.860581 56.683482
#R> [11] 72.474093 87.295757 101.207932 114.266426 126.523622
#R> [16] 138.028689 148.827780 158.964216 168.478658 177.409272
#R> [21] 185.791885 193.660122 201.045547 207.977787 214.484649
#R> [26] 220.592235 226.325047 231.706081 236.756923 241.497834
#R> [31] 245.947832 250.124769 254.045400 257.725453 261.179689
#R> [36] 264.421966 267.465290 270.321868 273.003161 275.519924
#R> [41] 277.882253 280.099624 282.180932 284.134527 285.968245
#R> [46] 287.689442 289.305022 290.821467 292.244860 293.580910
#R> [51] 294.834977 296.012092 297.116976 298.154063 299.127511
#R> [56] 300.041227 300.898875 301.703896 302.459519 303.168775
#R> [61] 303.834510 304.459394 305.045933 305.596482 306.113247
#R> [66] 306.598303 307.053594 307.480948 307.882078 308.258595
#R> [71] 308.612007 308.943733 309.255104 309.547368 309.821699
#R> [76] 310.079195 310.320892 310.547757 310.760701 310.960579
#R> [81] 311.148192
f.boot2 <- car::Boot(f.fit,f=predict2)
#R>
#R> Number of bootstraps was 998 out of 999 attempted
preds1 <- data.frame(seq(-1,15,by=0.2),
predict2(f.fit),
confint(f.boot2))
names(preds1) <- c("age","fit","LCI","UCI")
headtail(preds1)
#R> age fit LCI UCI
#R> V1 -1.0 -140.98647 -226.6449 -87.66924
#R> V2 -0.8 -113.06639 -185.7089 -66.38734
#R> V3 -0.6 -86.85956 -149.4648 -46.22109
#R> V79 14.6 310.76070 290.6448 334.18438
#R> V80 14.8 310.96058 290.7267 334.57603
#R> V81 15.0 311.14819 290.8025 334.95349
makeVBEqnLabel <- function(fit) {
cfs <- coef(fit)
Linf <- formatC(cfs[["Linf"]],format="f",digits=1)
K <- formatC(cfs[["K"]],format="f",digits=3)
t0 <- cfs[["t0"]]
t0 <- paste0(ifelse(t0<0,"+","-"),formatC(abs(t0),format="f",digits=3))
paste0("TL==",Linf,"~bgroup('(',1-e^{-",K,"~(Age",t0,")},')')")
}
my_title <- expression(paste("Black Crappie", italic("(Pomoxis nigromaculatus)"),
"Growth in Waupaca Co."))
ggplot() +
geom_ribbon(data=preds1,aes(x=age,ymin=LCI,ymax=UCI),fill="gray70")+
geom_point(data=blc,aes(y=tl,x=age),size=2,alpha=0.25) +
geom_line(data=preds1,aes(y=fit,x=age),size=0.55) +
geom_jitter(data=blc,aes(y=tl,x=age),
width=0.15,height=0,alpha=.55)+
scale_y_continuous(name="Total Length (mm)",expand=c(0,0),limit=c(0,400)) +
scale_x_continuous(name="Age (years)",expand=c(0,0),
limits=c(0,11),breaks=seq(0,12,2))+
labs(title=my_title,
subtitle="Von Bertalanffy Growth Model")+
annotate(geom="text",label=makeVBEqnLabel(f.fit),parse=TRUE,
size=4,x=Inf,y=-Inf,hjust=1.1,vjust=-0.5)+
theme_MJL
#R> Warning: Removed 26 row(s) containing missing values (geom_path).