knitr::opts_chunk$set(echo = TRUE, warning = F, message = F, fig.path = 'figs/', dev.args = list(family = 'serif'))
library(tidyverse)
library(sf)
library(mapview)
library(patchwork)
library(lubridate)
library(ggrepel)
library(condprob2)
library(here)
library(gridExtra)
library(grid)
library(FactoMineR)
library(missMDA)
library(vegan)
library(ggord)
library(scales)
library(margins)
prj <- 4326 # wgs84
source('R/funcs.R')
pbase <- theme_bw(base_family = 'serif', base_size = 14) +
theme(
strip.background = element_blank(),
legend.position = 'top',
legend.title = element_blank()
)
data(wqdat)
data(tsdat)
data(upwell)
# scripps and santa cruz only
stat_pair <- tsdat %>%
filter(location %in% c('Scripps Pier', 'Santa Cruz Wharf')) %>%
filter(year %in% c(2012:2015))
# all wq data as sf
tsdatgeo <- tsdat %>%
select(location, Long, Lat) %>%
unique %>%
st_as_sf(., coords = c('Long', 'Lat'), crs = prj)
knitr::include_graphics(here('figs', 'model.png'))
Locations with lat/lon, summarized by numbers of years sampled:
wqdatgeosum <- wqdat %>%
filter(!is.na(Lat)) %>%
group_by(`Station ID`) %>%
nest %>%
mutate(
nyrs = purrr::map(data, function(x) length(unique(x$Year))),
Lat = purrr::map(data, function(x) mean(x$Lat, na.rm = T)),
Long = purrr::map(data, function(x) mean(x$Long, na.rm = T))
) %>%
dplyr::select(-data) %>%
unnest %>%
st_as_sf(., coords = c('Long', 'Lat'), crs = prj)
mapview(wqdatgeosum, zcol = 'nyrs', layer.name = "Years sampled")
Locations:
mapview(tsdatgeo, homebutton = F, layer.name = F)
Time series (from north to south):
toplo <- tsdat %>%
dplyr::select(date, location, Lat, da, pntot, chla) %>%
mutate(
da = 1000*da, # from ng/mL to mg/L
da.edit = ifelse(da > 400, 401, da),
text.da = ifelse(da > 400, 'Y', 'N'),
zero.da = ifelse(da > 0, 'N', 'Y'),
pn = pntot / 1000, # cell/L to cells/ml
pn.edit = ifelse(pn > 400, 401, pn),
text.pn = ifelse(pn > 400, 'Y', 'N'),
zero.pn = ifelse(pn > 0, 'N', 'Y'),
chla.edit = ifelse(chla > 20, 21, chla),
text.chla = ifelse(chla > 20, 'Y', 'N'),
zero.chla = ifelse(chla > 0, 'N', 'Y')
)
dalabs <- filter(toplo, text.da == 'Y')
pnlabs <- filter(toplo, text.pn == 'Y')
chlalabs <- filter(toplo, text.chla == 'Y')
# point scales
scls <- c(1, 8)
# color ramp
cols <- c('lightgreen', 'tomato1') #c('gray50', 'grey15')
leglab <- expression(paste('conc. (mg ', L^-1, ')'))
brks <- c(0, 50, 100, 150, 200, 400)
labs <- c('>0-50', '50-100', '100-150', '150-200', '200-400', '>400')
p1 <- ggplot(toplo, aes(x = date, y = location)) +
geom_point(aes(size = da.edit, shape = zero.da, colour = da.edit), alpha = 0.8) +
# geom_label_repel(data = dalabs, aes(label = floor(da)),
# box.padding = 1,
# point.padding = NA,
# size = 4, alpha = 0.7,
# min.segment.length = 0,
# colour = 'tomato1'
# ) +
theme_bw(base_family = 'serif') +
theme(axis.title = element_blank()) +
scale_size_continuous(leglab,
breaks = brks,
range = scls,
labels = labs
) +
scale_colour_gradientn(leglab,
element_text('test'),
colours = cols,
breaks = brks,
labels = labs
) +
scale_shape_manual(values = c(16, 1), guide = F) +
guides(colour = guide_legend(), size = guide_legend()) +
ggtitle('(a) Domoic Acid (DA)')
leglab <- expression(paste('cells (', mL^-1, ')'))
brks <- c(0, 50, 100, 150, 200, 400)
labs <- c('>0-50', '50-100', '100-150', '150-200', '200-400', '>400')
p2 <- ggplot(toplo, aes(x = date, y = location)) +
geom_point(aes(size = pn.edit, shape = zero.pn, colour = pn.edit), alpha = 0.8) +
# geom_label_repel(data = pnlabs, aes(label = floor(pn)),
# box.padding = 1,
# point.padding = NA,
# size = 4, alpha = 0.7,
# min.segment.length = 0,
# colour = 'tomato1'
# ) +
theme_bw(base_family = 'serif') +
theme(axis.title = element_blank()) +
scale_size_continuous(leglab,
breaks = brks,
range = scls,
labels = labs
) +
scale_colour_gradientn(leglab,
element_text('test'),
colours = cols,
breaks = brks,
labels = labs
) +
scale_shape_manual(values = c(16, 1), guide = F) +
guides(colour = guide_legend(), size = guide_legend()) +
ggtitle('(b) Pseudo-nitzschia')
leglab <- expression(paste('conc. (ug ', L^-1, ')'))
brks <- c(0, 1, 5, 10, 15, 20)
labs <- c('>0-1', '1-5', '5-10', '10-15', '15-20', '>20')
p3 <- ggplot(toplo, aes(x = date, y = location)) +
geom_point(aes(size = chla.edit, shape = zero.chla, colour = chla.edit), alpha = 0.8) +
# geom_label_repel(data = chlalabs, aes(label = floor(chla)),
# box.padding = 1,
# point.padding = NA,
# size = 4, alpha = 0.7,
# min.segment.length = 0,
# colour = 'tomato1'
# ) +
theme_bw(base_family = 'serif') +
theme(axis.title = element_blank()) +
scale_size_continuous(leglab,
breaks = brks,
range = scls,
labels = labs
) +
scale_colour_gradientn(leglab,
element_text('test'),
colours = cols,
breaks = brks,
labels = labs
) +
scale_shape_manual(values = c(16, 1), guide = F) +
guides(colour = guide_legend(), size = guide_legend()) +
ggtitle('(c) Chlorophyll-a')
p1 + p2 + p3 + plot_layout(ncol = 1)
Temperature anomalies by station:
pthm <- theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
axis.title.x = element_blank(),
legend.position = 'none'
)
p1 <- ggplot(tsdat, aes(x = date)) +
geom_line(aes(y = temp)) +
geom_line(aes(y = tempseas), colour = 'red') +
facet_wrap(~location, ncol = 1) +
labs(y = 'Temperature (C)') +
pthm
p2 <- ggplot(tsdat, aes(x = date)) +
geom_segment(aes(y = 0, yend = tempanom, xend = date, colour = tempanom)) +
geom_hline(yintercept = 0, color = 'red') +
facet_wrap(~location, ncol = 1) +
labs(y = 'Anomalies (C)') +
scale_colour_gradient2(low = 'tomato1', mid = 'lightgreen', high = 'tomato1', midpoint= 0) +
pthm
p1 + p2 + plot_layout(ncol = 2)
Upwelling:
toplo <- unnest(upwell) %>%
filter(year(date) >= 2008) %>%
mutate(dec_time = decimal_date(date)) %>%
group_by(stat) %>%
rename(upwell = ind) %>%
mutate(
upwellseas = predict(lm(upwell ~ sin(2*pi*dec_time) + cos(2*pi*dec_time), na.action = na.exclude)),
upwellanom = upwell - upwellseas
) %>%
ungroup
pthm <- theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
axis.title.x = element_blank(),
legend.position = 'none'
)
p1 <- ggplot(toplo, aes(x = date)) +
geom_line(aes(y = upwell)) +
geom_line(aes(y = upwellseas), colour = 'red') +
facet_wrap(~stat, ncol = 1) +
labs(y = 'Upwelling (m3/s/100m)') +
pthm
p2 <- ggplot(toplo, aes(x = date)) +
geom_segment(aes(y = 0, yend = upwellanom, xend = date, colour = upwellanom)) +
geom_hline(yintercept = 0, color = 'red') +
facet_wrap(~stat, ncol = 1) +
labs(y = 'Anomalies') +
scale_colour_gradient2(low = 'tomato1', mid = 'lightgreen', high = 'tomato1', midpoint= 0) +
pthm
p1 + p2 + plot_layout(ncol = 2)
Scatterplots of chl vs DA, psuedo nitzschia concentration:
toplo1 <- tsdat %>%
filter(!is.na(chla) & !is.na(da))
ylb <- expression(paste('Domoic acid conc. (ng ', L^-1, ')'))
p1 <- ggplot(toplo1, aes(x = chla, y = da)) +
geom_point(alpha = 0.7) +
theme_bw(base_family = 'serif') +
theme(strip.background = element_blank()) +
facet_wrap(~reorder(location, -Lat), ncol = 1) +
geom_smooth(method = 'lm', colour = 'lightblue') +
scale_x_log10(expression(paste('Chl-a (ug ', L^-1, ')'))) +
scale_y_log10(ylb)
toplo2 <- tsdat %>%
filter(!is.na(chla) & !is.na(pntot))
ylb <- expression(paste('Pseudonitzchia cells ( ', L^-1, ')'))
p2 <- ggplot(toplo2, aes(x = chla, y = pntot)) +
geom_point(alpha = 0.7) +
theme_bw(base_family = 'serif') +
theme(strip.background = element_blank()) +
facet_wrap(~reorder(location, -Lat), ncol = 1) +
geom_smooth(method = 'lm', colour = 'lightblue') +
scale_x_log10(expression(paste('Chl-a (ug ', L^-1, ')'))) +
scale_y_log10(ylb)
p1 + p2 + plot_layout(ncol = 2)
Logistic regression of likelihood of seeing DA with chl, all Bight stations:
toplo1 <- tsdat %>%
mutate(
dapa = ifelse(da > 0, 1, 0)
) %>%
filter(!is.na(chla) & !is.na(da))
leglab <- expression(paste('Domoic acid\nconc. (ng ', L^-1, ')'))
p1 <- ggplot(toplo1, aes(x = chla, y = dapa)) +
geom_point(aes(size = da), alpha = 0.7) +
theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
legend.position = 'top'
) +
facet_wrap(~reorder(location, -Lat), ncol = 1) +
geom_smooth(method = "glm", method.args = list(family = "binomial"), colour = 'lightblue') +
scale_x_log10(expression(paste('Chl-a (ug ', L^-1, ')'))) +
ylab('Likelihood of domoic acid') +
scale_size_continuous(leglab, range = c(1, 8))
toplo2 <- tsdat %>%
filter(!is.na(chla) & !is.na(pntot)) %>%
mutate(
pnhilo = ifelse(pntot > 100, 1, 0)
)
leglab <- expression(paste('Cells (', L^-1, ')'))
p2 <- ggplot(toplo2, aes(x = chla, y = pnhilo)) +
geom_point(aes(size = pntot), alpha = 0.7) +
theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
legend.position = 'top'
) +
facet_wrap(~reorder(location, -Lat), ncol = 1) +
geom_smooth(method = "glm", method.args = list(family = "binomial"), colour = 'lightblue') +
scale_x_log10(expression(paste('Chl-a (ug ', L^-1, ')'))) +
scale_y_continuous(expression(paste('Likelihood of high cell concentration (> 100 ', L^-1, ')'))) +
scale_size_continuous(leglab, range = c(1, 8))
p1 + p2 + plot_layout(ncol = 2)
Conditional probability plots, same response as above but using non-linear method. The first set shows the likelihood of observing domoic acid or cell concentration greater than 100 L\(^{-1}\) at concentrations of chlorophyll less than or equal to those on the x-axis.
locs <- reorder(tsdat$location, -tsdat$Lat) %>% levels
ends <- c('da', 'pntot')
thrs <- c(0, 100)
labs <- c('Likelihood of Domoic acid', expression(paste('Likelihood of high cell concentration (> 100 ', L^-1, ')')))
names(thrs) <- ends
names(labs) <- ends
plos <- crossing(locs, ends) %>%
group_by(locs, ends) %>%
mutate(
plo = pmap(list(locs, ends), function(locs, ends){
# inputs
xval <- tsdat$chla[tsdat$location %in% locs]
yval <- tsdat[tsdat$location %in% locs, ] %>% pull({ends})
thrs <- thrs[ends]
ylbs <- labs[ends]
# cond. prob results
res <- condprob(xval, yval, thrs, 'gt', 'lte', T, R = 100)
# plots
p <- plot(res) +
scale_x_log10() +
theme_bw(base_family = 'serif') +
theme(
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, size= 8)
) +
ggtitle(locs)
return(p)
})
) %>%
pull(plo)
# text par
gpval <- gpar(fontfamily = 'serif')
# unholy grid arrange
grid.arrange(
arrangeGrob(
left = textGrob(labs[1], rot = 90, gp = gpval),
plos[[1]], plos[[3]], plos[[5]], plos[[7]], plos[[9]], plos[[11]], plos[[13]], ncol = 1
),
arrangeGrob(
left = textGrob(labs[2], rot = 90, gp = gpval),
plos[[2]], plos[[4]], plos[[6]], plos[[8]], plos[[10]], plos[[12]], plos[[14]], ncol = 1
),
bottom = textGrob(expression(paste('Chl-a (ug ', L^-1, '), less than or equal to')), gp = gpval),
ncol = 2
)
The second set shows the likelihood of observing domoic acid or cell concentration greater than 10 L\(^{-1}\) at concentrations of chlorophyll greater than or equal to those on the x-axis.
locs <- reorder(tsdat$location, -tsdat$Lat) %>% levels
ends <- c('da', 'pntot')
thrs <- c(0, 100)
labs <- c('Likelihood of Domoic acid', expression(paste('Likelihood of high cell concentration (> 100 ', L^-1, ')')))
names(thrs) <- ends
names(labs) <- ends
plos <- crossing(locs, ends) %>%
group_by(locs, ends) %>%
mutate(
plo = pmap(list(locs, ends), function(locs, ends){
# inputs
xval <- tsdat$chla[tsdat$location %in% locs]
yval <- tsdat[tsdat$location %in% locs, ] %>% pull({ends})
thrs <- thrs[ends]
ylbs <- labs[ends]
# cond. prob results
res <- condprob(xval, yval, thrs, 'gt', 'gte', T, R = 100)
# plots
p <- plot(res) +
scale_x_log10() +
theme_bw(base_family = 'serif') +
theme(
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, size= 10)
) +
ggtitle(locs)
return(p)
})
) %>%
pull(plo)
# text par
gpval <- gpar(fontfamily = 'serif')
# unholy grid arrange
grid.arrange(
arrangeGrob(
left = textGrob(labs[1], rot = 90, gp = gpval),
plos[[1]], plos[[3]], plos[[5]], plos[[7]], plos[[9]], plos[[11]], plos[[13]], ncol = 1
),
arrangeGrob(
left = textGrob(labs[2], rot = 90, gp = gpval),
plos[[2]], plos[[4]], plos[[6]], plos[[8]], plos[[10]], plos[[12]], plos[[14]], ncol = 1
),
bottom = textGrob(expression(paste('Chl-a (ug ', L^-1, '), greater than or equal to')), gp = gpval),
ncol = 2
)
Histograms of predictors:
tomod <- tsdat %>%
select(tempanom, upwell, din, phosphorus, ntop, siton) %>%
rename(
`N:P` = ntop,
`Si:N` = siton,
`T,anom` = tempanom
) %>%
gather('var', 'val', everything())
mythm2 <- theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
text = element_text(family = 'serif', size = 12),
strip.placement = 'outside'
)
ggplot(tomod, aes(x = val)) +
geom_histogram() +
facet_wrap(~var, scales = 'free', strip.position = 'bottom') +
labs(x = NULL) +
mythm2
Marginal effects plots for logistic models describing HAB response variables:
# prep
prep <- tsdat %>%
mutate(
Pn = ifelse(pntot > 100, 1, 0),
DA = ifelse(da > 0, 1, 0)
) %>%
rename(
`NP` = ntop,
`SiN` = siton,
`Tanom` = tempanom
) %>%
select(DA, Pn, chla, `Tanom`, upwell, din, phosphorus, `NP`, `SiN`, qrt) %>%
gather('resp', 'rsval', DA, Pn, chla)
# all
mrgsall <- prep %>%
group_by(resp) %>%
nest %>%
mutate(
frm = purrr::pmap(list(resp, data), function(resp, data){
yval <- 'rsval'
if(resp == 'chla')
yval <- 'log10(rsval)'
frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')
if(resp %in% c('DA', 'Pn'))
mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})
if(resp %in% 'chla')
mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})
if(inherits(mod, 'try-error'))
return(NA)
marg <- margins(mod) %>% summary
p <- ggplot(marg, aes(x = factor, y = AME)) +
geom_hline(yintercept = 0, color = 'grey', size = 1) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
mythm2 +
scale_y_continuous(limits = c(-0.12, 0.06)) +
labs(x = NULL, y = 'Margin effect', title = paste(resp, 'all dates', sep = ', ')) +
coord_flip()
return(p)
})
)
# by quarter
mrgs <- prep %>%
group_by(qrt, resp) %>%
nest %>%
mutate(
frm = purrr::pmap(list(resp, qrt, data), function(resp, qrt, data){
yval <- 'rsval'
if(resp == 'chla')
yval <- 'log10(rsval)'
frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')
if(resp %in% c('DA', 'Pn'))
mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})
if(resp %in% 'chla')
mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})
if(inherits(mod, 'try-error'))
return(NA)
marg <- margins(mod) %>% summary
p <- ggplot(marg, aes(x = factor, y = AME)) +
geom_hline(yintercept = 0, color = 'grey', size = 1) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
mythm2 +
scale_y_continuous(limits = c(-0.12, 0.06)) +
labs(x = NULL, y = 'Margin effect', title = paste(resp, qrt, sep = ', ')) +
coord_flip()
return(p)
})
)
plosall <- mrgsall$frm
plos <- mrgs$frm
grid.arrange(
arrangeGrob(plosall[[1]], plos[[3]], plos[[4]], plos[[1]], plos[[2]], ncol = 1),
arrangeGrob(plosall[[2]], plos[[7]], plos[[8]], plos[[5]], plos[[6]], ncol = 1),
arrangeGrob(plosall[[3]], plos[[11]], plos[[12]], plos[[9]], plos[[10]], ncol = 1),
ncol = 3
)
Histograms of predictors:
tomod <- stat_pair %>%
filter(location %in% 'Santa Cruz Wharf') %>%
select(tempanom, upwell, din, phosphorus, ntop, siton) %>%
rename(
`N:P` = ntop,
`Si:N` = siton,
`T,anom` = tempanom
) %>%
gather('var', 'val', everything())
mythm2 <- theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
text = element_text(family = 'serif', size = 12),
strip.placement = 'outside'
)
ggplot(tomod, aes(x = val)) +
geom_histogram() +
facet_wrap(~var, scales = 'free', strip.position = 'bottom') +
labs(x = NULL) +
mythm2
Marginal effects plots for logistic models describing HAB response variables:
# prep data
prep <- stat_pair %>%
filter(location %in% 'Santa Cruz Wharf') %>%
mutate(
Pn = ifelse(pntot > 100, 1, 0),
DA = ifelse(da > 0, 1, 0)
) %>%
rename(
`NP` = ntop,
`SiN` = siton,
`Tanom` = tempanom
) %>%
select(DA, Pn, chla, `Tanom`, upwell, `NP`, `SiN`, qrt) %>%
gather('resp', 'rsval', DA, Pn, chla)
# all
mrgsall <- prep %>%
group_by(resp) %>%
nest %>%
mutate(
frm = purrr::pmap(list(resp, data), function(resp, data){
yval <- 'rsval'
if(resp == 'chla')
yval <- 'log10(rsval)'
frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')
if(resp %in% c('DA', 'Pn'))
mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})
if(resp %in% 'chla')
mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})
if(inherits(mod, 'try-error'))
return(NA)
marg <- margins(mod) %>% summary
p <- ggplot(marg, aes(x = factor, y = AME)) +
geom_hline(yintercept = 0, color = 'grey', size = 1) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
mythm2 +
scale_y_continuous(limits = c(-0.16, 0.15)) +
labs(x = NULL, y = 'Margin effect', title = paste(resp, 'all dates', sep = ', ')) +
coord_flip()
return(p)
})
)
# by quarter
mrgs <- prep %>%
group_by(qrt, resp) %>%
nest %>%
mutate(
frm = purrr::pmap(list(resp, qrt, data), function(resp, qrt, data){
yval <- 'rsval'
if(resp == 'chla')
yval <- 'log10(rsval)'
frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')
if(resp %in% c('DA', 'Pn'))
mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})
if(resp %in% 'chla')
mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})
if(inherits(mod, 'try-error'))
return(NA)
marg <- margins(mod) %>% summary
p <- ggplot(marg, aes(x = factor, y = AME)) +
geom_hline(yintercept = 0, color = 'grey', size = 1) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
mythm2 +
scale_y_continuous(limits = c(-0.16, 0.15)) +
labs(x = NULL, y = 'Margin effect', title = paste(resp, qrt, sep = ', ')) +
coord_flip()
return(p)
})
)
plosall <- mrgsall$frm
plos <- mrgs$frm
grid.arrange(
arrangeGrob(plosall[[1]], plos[[3]], plos[[4]], plos[[1]], plos[[2]], ncol = 1),
arrangeGrob(plosall[[2]], plos[[7]], plos[[8]], plos[[5]], plos[[6]], ncol = 1),
arrangeGrob(plosall[[3]], plos[[11]],plos[[12]], plos[[9]], plos[[10]], ncol = 1),
ncol = 3
)
Histograms of predictors:
tomod <- stat_pair %>%
filter(location %in% 'Scripps Pier') %>%
select(tempanom, upwell, din, phosphorus, ntop, siton) %>%
rename(
`N:P` = ntop,
`Si:N` = siton,
`T,anom` = tempanom
) %>%
gather('var', 'val', everything())
mythm2 <- theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
text = element_text(family = 'serif', size = 12),
strip.placement = 'outside'
)
ggplot(tomod, aes(x = val)) +
geom_histogram() +
facet_wrap(~var, scales = 'free', strip.position = 'bottom') +
labs(x = NULL) +
mythm2
Marginal effects plots for logistic models describing HAB response variables:
# prep data
prep <- stat_pair %>%
filter(location %in% 'Scripps Pier') %>%
mutate(
Pn = ifelse(pntot > 100, 1, 0),
DA = ifelse(da > 0, 1, 0)
) %>%
rename(
`NP` = ntop,
`SiN` = siton,
`Tanom` = tempanom
) %>%
select(DA, Pn, chla, `Tanom`, upwell, `NP`, `SiN`, qrt) %>%
gather('resp', 'rsval', DA, Pn, chla)
# all
mrgsall <- prep %>%
group_by(resp) %>%
nest %>%
mutate(
frm = purrr::pmap(list(resp, data), function(resp, data){
yval <- 'rsval'
if(resp == 'chla')
yval <- 'log10(rsval)'
frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')
if(resp %in% c('DA', 'Pn'))
mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})
if(resp %in% 'chla')
mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})
if(inherits(mod, 'try-error'))
return(NA)
marg <- margins(mod) %>% summary
p <- ggplot(marg, aes(x = factor, y = AME)) +
geom_hline(yintercept = 0, color = 'grey', size = 1) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
mythm2 +
scale_y_continuous(limits = c(-0.16, 0.08)) +
labs(x = NULL, y = 'Margin effect', title = paste(resp, 'all dates', sep = ', ')) +
coord_flip()
return(p)
})
)
# by quarter
mrgs <- prep %>%
group_by(qrt, resp) %>%
nest %>%
mutate(
frm = purrr::pmap(list(resp, qrt, data), function(resp, qrt, data){
yval <- 'rsval'
if(resp == 'chla')
yval <- 'log10(rsval)'
frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')
if(resp %in% c('DA', 'Pn'))
mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})
if(resp %in% 'chla')
mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})
if(inherits(mod, 'try-error'))
return(NA)
marg <- margins(mod) %>% summary
p <- ggplot(marg, aes(x = factor, y = AME)) +
geom_hline(yintercept = 0, color = 'grey', size = 1) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
mythm2 +
scale_y_continuous(limits = c(-0.16, 0.08)) +
labs(x = NULL, y = 'Margin effect', title = paste(resp, qrt, sep = ', ')) +
coord_flip()
return(p)
})
)
plosall <- mrgsall$frm
plos <- mrgs$frm
grid.arrange(
arrangeGrob(plosall[[1]], plos[[3]], plos[[4]], plos[[1]], plos[[2]], ncol = 1),
arrangeGrob(plosall[[2]], plos[[7]], plos[[8]], plos[[5]], plos[[6]], ncol = 1),
arrangeGrob(plosall[[3]], plos[[11]], plos[[12]], plos[[9]], plos[[10]], ncol = 1),
ncol = 3
)
dimymax <- 45
mythm2 <- theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
text = element_text(family = 'serif', size = 12)
)
# all dates
tomodall <- tsdat %>%
mutate(
temp = ifelse(temp == 0, NA, temp)
) %>%
select(chla, da, din, phosphorus, pntot, silicate, temp) %>%
rename(`T` = temp) %>%
mutate_at(vars(-`T`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>%
as.data.frame %>%
imputePCA %>%
.$completeObs
pcamod <- PCA(tomodall, scale.unit = T, graph = F)
# all time series
p1 <- ggord(pcamod, size = tsdat$Lat, vec_ext = 6, alpha = 0.5, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') +
mythm2 +
ggtitle('All dates') +
scale_size(range = c(0.3, 1.5))
# explained variance by dimensions
p1exp <- pcamod %>%
.$eig %>%
data.frame %>%
rownames_to_column() %>%
rename(vrs = percentage.of.variance) %>%
mutate(rowname = gsub('comp\\s', '', rowname)) %>%
# .[1:3, ] %>%
ggplot(aes(x = rowname, y = vrs, fill = vrs)) +
geom_bar(stat = 'identity') +
labs(x = 'Dimension', y = '% variance') +
scale_y_continuous(limits = c(0, dimymax)) +
scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) +
ggtitle('All dates') +
mythm2
# by season
pcamodgrp <- tsdat %>%
group_by(qrt) %>%
nest %>%
mutate(
plo = purrr::pmap(list(qrt, data), function(qrt, data){
tomod <- data %>%
mutate(
temp = ifelse(temp == 0, NA, temp)
) %>%
select(chla, da, din, phosphorus, pntot, silicate, temp) %>%
rename(`T` = temp) %>%
mutate_at(vars(-`T`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>%
as.data.frame %>%
imputePCA %>%
.$completeObs
pcamod <- PCA(tomod, scale.unit = T, graph = F)
# ggord
p1 <- ggord(pcamod, vec_ext = 6, alpha = 0.5, size = data$Lat, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') +
mythm2 +
ggtitle(qrt)+
scale_size(range = c(0.3, 1.5))
# explained variance by dimensions
p2 <- pcamod %>%
.$eig %>%
data.frame %>%
rownames_to_column() %>%
rename(vrs = percentage.of.variance) %>%
mutate(rowname = gsub('comp\\s', '', rowname)) %>%
# .[1:3, ] %>%
ggplot(aes(x = rowname, y = vrs, fill = vrs)) +
geom_bar(stat = 'identity') +
labs(x = 'Dimension', y = '% variance') +
scale_y_continuous(limits = c(0, dimymax)) +
scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) +
ggtitle(qrt) +
mythm2
out <- list(p1, p2)
return(out)
})
) %>%
pull(plo)
grid.arrange(
arrangeGrob(
p1, pcamodgrp[[3]][[1]], pcamodgrp[[4]][[1]], pcamodgrp[[1]][[1]], pcamodgrp[[2]][[1]],
ncol = 1
),
arrangeGrob(
p1exp, pcamodgrp[[3]][[2]], pcamodgrp[[4]][[2]], pcamodgrp[[1]][[2]], pcamodgrp[[2]][[2]],
ncol = 1
) ,
ncol = 2, widths = c(1, 0.8)
)
dimymax <- 45
mythm2 <- theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
text = element_text(family = 'serif', size = 12)
)
# all dates
tomodall <- tsdat %>%
select(chla, da, pntot, ntop, siton, upwell, tempanom) %>%
rename(
`N:P` = ntop,
`Si:N` = siton,
`T,anom` = tempanom
) %>%
mutate_at(vars(-`T,anom`, -`Si:N`, -`N:P`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>%
as.data.frame %>%
imputePCA %>%
.$completeObs
pcamod <- PCA(tomodall, scale.unit = T, graph = F)
# all time series
p1 <- ggord(pcamod, size = tsdat$Lat, vec_ext = 6, alpha = 0.5, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') +
mythm2 +
ggtitle('All dates') +
scale_size(range = c(0.3, 1.5))
# explained variance by dimensions
p1exp <- pcamod %>%
.$eig %>%
data.frame %>%
rownames_to_column() %>%
rename(vrs = percentage.of.variance) %>%
mutate(rowname = gsub('comp\\s', '', rowname)) %>%
# .[1:3, ] %>%
ggplot(aes(x = rowname, y = vrs, fill = vrs)) +
geom_bar(stat = 'identity') +
labs(x = 'Dimension', y = '% variance') +
scale_y_continuous(limits = c(0, dimymax)) +
scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) +
ggtitle('All dates') +
mythm2
# by season
pcamodgrp <- tsdat %>%
group_by(qrt) %>%
nest %>%
mutate(
plo = purrr::pmap(list(qrt, data), function(qrt, data){
tomod <- data %>%
select(chla, da, pntot, ntop, siton, tempanom, upwell) %>%
rename(
`N:P` = ntop,
`Si:N` = siton,
`T,anom` = tempanom
) %>%
mutate_at(vars(-`T,anom`, -`Si:N`, -`N:P`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>%
as.data.frame %>%
imputePCA %>%
.$completeObs
pcamod <- PCA(tomod, scale.unit = T, graph = F)
# ggord
p1 <- ggord(pcamod, vec_ext = 6, alpha = 0.5, size = data$Lat, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') +
mythm2 +
ggtitle(qrt)+
scale_size(range = c(0.3, 1.5))
# explained variance by dimensions
p2 <- pcamod %>%
.$eig %>%
data.frame %>%
rownames_to_column() %>%
rename(vrs = percentage.of.variance) %>%
mutate(rowname = gsub('comp\\s', '', rowname)) %>%
# .[1:3, ] %>%
ggplot(aes(x = rowname, y = vrs, fill = vrs)) +
geom_bar(stat = 'identity') +
labs(x = 'Dimension', y = '% variance') +
scale_y_continuous(limits = c(0, dimymax)) +
scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) +
ggtitle(qrt) +
mythm2
out <- list(p1, p2)
return(out)
})
) %>%
pull(plo)
grid.arrange(
arrangeGrob(
p1, pcamodgrp[[3]][[1]], pcamodgrp[[4]][[1]], pcamodgrp[[1]][[1]], pcamodgrp[[2]][[1]],
ncol = 1
),
arrangeGrob(
p1exp, pcamodgrp[[3]][[2]], pcamodgrp[[4]][[2]], pcamodgrp[[1]][[2]], pcamodgrp[[2]][[2]],
ncol = 1
) ,
ncol = 2, widths = c(1, 0.8)
)
dimymax <- 45
mythm2 <- theme_bw(base_family = 'serif') +
theme(
strip.background = element_blank(),
text = element_text(family = 'serif', size = 12)
)
# all dates
tomodall <- stat_pair %>%
select(chla, da, pntot, ntop, siton, upwell, tempanom) %>%
rename(
`N:P` = ntop,
`Si:N` = siton,
`T,anom` = tempanom
) %>%
mutate_at(vars(-`T,anom`, -`Si:N`, -`N:P`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>%
as.data.frame %>%
imputePCA %>%
.$completeObs
pcamod <- PCA(tomodall, scale.unit = T, graph = F)
# all time series
p1 <- ggord(pcamod, grp_in = stat_pair$location, size = stat_pair$Lat, vec_ext = 6, alpha = 0.5, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') +
mythm2 +
ggtitle('All dates') +
scale_size(range = c(0.3, 1.5))
# explained variance by dimensions
p1exp <- pcamod %>%
.$eig %>%
data.frame %>%
rownames_to_column() %>%
rename(vrs = percentage.of.variance) %>%
mutate(rowname = gsub('comp\\s', '', rowname)) %>%
# .[1:3, ] %>%
ggplot(aes(x = rowname, y = vrs, fill = vrs)) +
geom_bar(stat = 'identity') +
labs(x = 'Dimension', y = '% variance') +
scale_y_continuous(limits = c(0, dimymax)) +
scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) +
ggtitle('All dates') +
mythm2
# by season
pcamodgrp <- stat_pair %>%
group_by(qrt) %>%
nest %>%
mutate(
plo = purrr::pmap(list(qrt, data), function(qrt, data){
tomod <- data %>%
select(chla, da, pntot, ntop, siton, tempanom, upwell) %>%
rename(
`N:P` = ntop,
`Si:N` = siton,
`T,anom` = tempanom
) %>%
mutate_at(vars(-`T,anom`, -`Si:N`, -`N:P`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>%
as.data.frame %>%
imputePCA %>%
.$completeObs
pcamod <- PCA(tomod, scale.unit = T, graph = F)
# ggord
p1 <- ggord(pcamod, grp_in = data$location, vec_ext = 6, alpha = 0.5, size = data$Lat, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') +
mythm2 +
ggtitle(qrt)+
scale_size(range = c(0.3, 1.5))
# explained variance by dimensions
p2 <- pcamod %>%
.$eig %>%
data.frame %>%
rownames_to_column() %>%
rename(vrs = percentage.of.variance) %>%
mutate(rowname = gsub('comp\\s', '', rowname)) %>%
# .[1:3, ] %>%
ggplot(aes(x = rowname, y = vrs, fill = vrs)) +
geom_bar(stat = 'identity') +
labs(x = 'Dimension', y = '% variance') +
scale_y_continuous(limits = c(0, dimymax)) +
scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) +
ggtitle(qrt) +
mythm2
out <- list(p1, p2)
return(out)
})
) %>%
pull(plo)
grid.arrange(
arrangeGrob(
p1, pcamodgrp[[1]][[1]], pcamodgrp[[2]][[1]], pcamodgrp[[3]][[1]], pcamodgrp[[4]][[1]],
ncol = 1
),
arrangeGrob(
p1exp, pcamodgrp[[1]][[2]], pcamodgrp[[2]][[2]], pcamodgrp[[3]][[2]], pcamodgrp[[4]][[2]],
ncol = 1
) ,
ncol = 2, widths = c(1, 0.6)
)