Data from: ECOTOX Knowledgebase (https://cfpub.epa.gov/ecotox/explore.cfm?cgid=36)
Aquatic and Terrestrial data downloaded on April 8, 2020
Data summarized to number of records and number of references by chemical CASRN (Chemical Abstracts Service Registry Number) and species group
all_refsbychem<- read.csv("PFAS_allrefsbychem.csv", header=TRUE)
setDT(all_refsbychem)
# Order for the Species Groups to appear on figure
sp_grp_order <- c("All", "Mammals", "Fish", "Amphibians", "Reptiles", "Birds",
"Crustaceans", "Molluscs", "Worms", "Insects/Spiders", "Other Invertebrates",
"Flowers, Trees, Shrubs, Ferns", "Algae", "Fungi", "Miscellaneous")
# Set Group (factor) order to match above
all_refsbychem$Group <- factor(all_refsbychem$Group, levels = c(sp_grp_order))
head(all_refsbychem)
#R> CASRN Chemical.Name Name
#R> 1: 76-05-1 2,2,2-Trifluoroacetic acid TFAA
#R> 2: 76-05-1 2,2,2-Trifluoroacetic acid TFAA
#R> 3: 76-05-1 2,2,2-Trifluoroacetic acid TFAA
#R> 4: 76-05-1 2,2,2-Trifluoroacetic acid TFAA
#R> 5: 307-24-4 2,2,3,3,4,4,5,5,6,6,6-Undecafluorohexanoic acid PFHxA
#R> 6: 307-24-4 2,2,3,3,4,4,5,5,6,6,6-Undecafluorohexanoic acid PFHxA
#R> Group n.refs n.records
#R> 1: Fish 2 6
#R> 2: Other Invertebrates 1 3
#R> 3: Flowers, Trees, Shrubs, Ferns 4 157
#R> 4: All 7 166
#R> 5: Fish 4 94
#R> 6: Amphibians 1 1
# Extract category 'All', which is collapsed across species groups
all_refs <- all_refsbychem[Group=="All"]
# Use # of references in 'All' to order the chemicals (CASRN is a factor)
all_refs <- all_refs %>%
arrange(n.refs) %>%
mutate(CASRN = factor(CASRN, levels = unique(CASRN))) %>%
select(-Chemical.Name)%>%
setDT(all_refs)
tail(all_refs)
#R> rn CASRN Name Group n.refs n.records
#R> 1: 107 272451-65-7 Flubendiamide All 43 520
#R> 2: 108 45298-90-6 PFOS ion All 61 1776
#R> 3: 109 86479-06-3 Hexaflumuron All 74 864
#R> 4: 110 103055-07-8 Lufenuron All 76 1596
#R> 5: 111 335-67-1 PFOA All 128 2690
#R> 6: 112 2795-39-3 K-PFOS All 141 3307
# Heatmap - all chemicals
p <- all_refsbychem %>%
arrange(n.refs) %>%
mutate(CASRN = factor(CASRN, levels = unique(all_refs[,CASRN]))) %>%
ggplot(aes(Group, CASRN)) +
geom_tile(aes(fill = n.refs)) +
scale_fill_gradient(low = "white", high = "darkred") +
theme_dark()+
labs(x = "",y="CASRN",title="# of Refs by Chemical CASRN & Species Group",
fill = "# Refs")
p1 <- p + theme(axis.text.x = element_text(face="bold", angle = 45, hjust = 0),
axis.text.y = element_text(size = 6))+
scale_x_discrete(position="top")
p1
# Heatmap - Top 20
# Use short names for chemicals ('Name') when for heat map of Top 20
my_labs <- c(as.character(all_refs[,Name]))
p2 <- p + coord_cartesian(ylim = c(93,112))+
theme(axis.text.x = element_text(face="bold", angle = 45, hjust = 0),
axis.ticks.y = element_blank())+
scale_y_discrete(labels=c(rep("",92),my_labs[93:112]))+
scale_x_discrete(position="top")+
geom_text(aes(label = n.refs), size = 4) +
labs(x = "",y="",
title="Top 20 Chemicals")
p2