Packages and Data

setwd("~/mth250")
library(tidyverse)

df <- read.csv("Dropout Final Project Data.csv",skip=1,
               stringsAsFactors=FALSE,na.strings=c("","‡")) %>%
  slice(-c(1:3)) %>%
  slice(-(61:nrow(.))) %>%
  select(-starts_with("X")) %>%
  rename(state=State.or.jurisdiction) %>%
  filter(!is.na(state)) %>%
  mutate(state=gsub("2","",state)) %>%
  mutate_at(vars(starts_with("Grade")),
            .funs=function(x) as.numeric(gsub(",","",x)))

#R> Warning in (function (x) : NAs introduced by coercion

#R> Warning in (function (x) : NAs introduced by coercion

#R> Warning in (function (x) : NAs introduced by coercion

#R> Warning in (function (x) : NAs introduced by coercion

#R> Warning in (function (x) : NAs introduced by coercion

#R> Warning in (function (x) : NAs introduced by coercion

head(df)

#R>        state Grade.7 Grade.8 Grade.9 Grade.10 Grade.11 Grade.12
#R> 1    Alabama      NA      NA   64569    53604    47538    42672
#R> 2     Alaska   10886   10857   11934    10664     9625     8766
#R> 3    Arizona   80011   78747   92028    82686    75384    71161
#R> 4   Arkansas      NA      NA   38359    35869    31995    28698
#R> 5 California  496757  502927  554935   501661   462940   412605
#R> 6   Colorado   59548   59397   64446    57678    52770    50387

df2 <- pivot_longer(df,-state,names_to="Grade",values_to="Dropouts")%>%
  mutate(Grade=as.numeric(gsub("Grade.","",Grade)))
head(df2)

#R> # A tibble: 6 x 3
#R>   state   Grade Dropouts
#R>   <chr>   <dbl>    <dbl>
#R> 1 Alabama     7       NA
#R> 2 Alabama     8       NA
#R> 3 Alabama     9    64569
#R> 4 Alabama    10    53604
#R> 5 Alabama    11    47538
#R> 6 Alabama    12    42672

Module 2

Shelby Winchell

22 May, 2020

Packages and Data

Dropouts per State in 2004-2005 School Year