1 Exploration of DA Bight dataset

knitr::opts_chunk$set(echo = TRUE, warning = F, message = F, fig.path = 'figs/', dev.args = list(family = 'serif'))

library(tidyverse)
library(sf)
library(mapview)
library(patchwork)
library(lubridate)
library(ggrepel)
library(condprob2)
library(here)
library(gridExtra)
library(grid)
library(FactoMineR)
library(missMDA)
library(vegan)
library(ggord)
library(scales)
library(margins)

prj <- 4326 # wgs84

source('R/funcs.R')

pbase <- theme_bw(base_family = 'serif', base_size = 14) +
  theme(
    strip.background = element_blank(), 
    legend.position = 'top', 
    legend.title = element_blank()
  )

data(wqdat)
data(tsdat)
data(upwell)

# scripps and santa cruz only
stat_pair <- tsdat %>% 
  filter(location %in% c('Scripps Pier', 'Santa Cruz Wharf')) %>% 
  filter(year %in% c(2012:2015))

# all wq data as sf
tsdatgeo <- tsdat %>% 
  select(location, Long, Lat) %>% 
  unique %>% 
  st_as_sf(., coords = c('Long', 'Lat'), crs = prj)

1.1 Conceptual model

knitr::include_graphics(here('figs', 'model.png'))

1.2 Locations and years sampled

Locations with lat/lon, summarized by numbers of years sampled:

wqdatgeosum <- wqdat %>%    
  filter(!is.na(Lat)) %>%   
  group_by(`Station ID`) %>%    
  nest %>%  
  mutate(   
    nyrs = purrr::map(data, function(x) length(unique(x$Year))),    
    Lat = purrr::map(data, function(x) mean(x$Lat, na.rm = T)),     
    Long = purrr::map(data, function(x) mean(x$Long, na.rm = T))    
  ) %>%     
  dplyr::select(-data) %>%  
  unnest %>%    
  st_as_sf(., coords = c('Long', 'Lat'), crs = prj) 
 mapview(wqdatgeosum, zcol = 'nyrs', layer.name = "Years sampled")

1.3 LTM time series

Locations:

mapview(tsdatgeo, homebutton = F, layer.name = F)

Time series (from north to south):

toplo <- tsdat %>% 
  dplyr::select(date, location, Lat, da, pntot, chla) %>%
  mutate(
    da = 1000*da, # from ng/mL to mg/L
    da.edit = ifelse(da > 400, 401, da),
    text.da = ifelse(da > 400, 'Y', 'N'),
    zero.da = ifelse(da > 0, 'N', 'Y'), 
    pn = pntot / 1000, # cell/L to cells/ml
    pn.edit = ifelse(pn > 400, 401, pn),
    text.pn = ifelse(pn > 400, 'Y', 'N'),
    zero.pn = ifelse(pn > 0, 'N', 'Y'),
    chla.edit = ifelse(chla > 20, 21, chla),
    text.chla = ifelse(chla > 20, 'Y', 'N'),
    zero.chla = ifelse(chla > 0, 'N', 'Y')
  )
dalabs <- filter(toplo, text.da == 'Y')
pnlabs <- filter(toplo, text.pn == 'Y')
chlalabs <- filter(toplo, text.chla == 'Y')

# point scales
scls <- c(1, 8)

# color ramp
cols <- c('lightgreen', 'tomato1') #c('gray50', 'grey15')

leglab <- expression(paste('conc. (mg ', L^-1, ')'))
brks <- c(0, 50, 100, 150, 200, 400)
labs <- c('>0-50', '50-100', '100-150', '150-200', '200-400', '>400')
p1 <- ggplot(toplo, aes(x = date, y = location)) + 
  geom_point(aes(size = da.edit, shape = zero.da, colour = da.edit), alpha = 0.8) + 
  # geom_label_repel(data = dalabs, aes(label = floor(da)),
  #   box.padding = 1,
  #   point.padding = NA,
  #   size = 4, alpha = 0.7,
  #   min.segment.length = 0,
  #   colour = 'tomato1'
  #   ) +
  theme_bw(base_family = 'serif') + 
  theme(axis.title = element_blank()) +
  scale_size_continuous(leglab,
    breaks = brks,
    range = scls, 
    labels = labs
    ) + 
  scale_colour_gradientn(leglab, 
    element_text('test'),
    colours = cols,
    breaks = brks,
    labels = labs
    ) + 
  scale_shape_manual(values = c(16, 1), guide = F) +
  guides(colour = guide_legend(), size = guide_legend()) +
  ggtitle('(a) Domoic Acid (DA)') 

leglab <- expression(paste('cells (', mL^-1, ')'))
brks <- c(0, 50, 100, 150, 200, 400)
labs <- c('>0-50', '50-100', '100-150', '150-200', '200-400', '>400')
p2 <- ggplot(toplo, aes(x = date, y = location)) + 
  geom_point(aes(size = pn.edit, shape = zero.pn, colour = pn.edit), alpha = 0.8) + 
  # geom_label_repel(data = pnlabs, aes(label = floor(pn)), 
  #   box.padding = 1,
  #   point.padding = NA, 
  #   size = 4, alpha = 0.7, 
  #   min.segment.length = 0,
  #   colour = 'tomato1'
  #   ) +
  theme_bw(base_family = 'serif') + 
  theme(axis.title = element_blank()) +
    scale_size_continuous(leglab,
    breaks = brks,
    range = scls, 
    labels = labs
    ) + 
  scale_colour_gradientn(leglab, 
    element_text('test'),
    colours = cols,
    breaks = brks,
    labels = labs
    ) + 
  scale_shape_manual(values = c(16, 1), guide = F) +
  guides(colour = guide_legend(), size = guide_legend()) +
  ggtitle('(b) Pseudo-nitzschia') 

leglab <- expression(paste('conc. (ug ', L^-1, ')'))
brks <- c(0, 1, 5, 10, 15, 20)
labs <- c('>0-1', '1-5', '5-10', '10-15', '15-20', '>20')
p3 <- ggplot(toplo, aes(x = date, y = location)) + 
  geom_point(aes(size = chla.edit, shape = zero.chla, colour = chla.edit), alpha = 0.8) + 
  # geom_label_repel(data = chlalabs, aes(label = floor(chla)), 
  #   box.padding = 1,
  #   point.padding = NA, 
  #   size = 4, alpha = 0.7, 
  #   min.segment.length = 0,
  #   colour = 'tomato1'
  #   ) +
  theme_bw(base_family = 'serif') + 
  theme(axis.title = element_blank()) +
    scale_size_continuous(leglab,
    breaks = brks,
    range = scls, 
    labels = labs
    ) + 
  scale_colour_gradientn(leglab, 
    element_text('test'),
    colours = cols,
    breaks = brks,
    labels = labs
    ) + 
  scale_shape_manual(values = c(16, 1), guide = F) +
  guides(colour = guide_legend(), size = guide_legend()) +
  ggtitle('(c) Chlorophyll-a') 

p1 + p2 + p3 + plot_layout(ncol = 1)

Temperature anomalies by station:

pthm <- theme_bw(base_family = 'serif') + 
  theme(
    strip.background = element_blank(), 
    axis.title.x = element_blank(),
    legend.position = 'none'
  )

p1 <- ggplot(tsdat, aes(x = date)) + 
  geom_line(aes(y = temp)) +
  geom_line(aes(y = tempseas), colour = 'red') + 
  facet_wrap(~location, ncol = 1) + 
  labs(y = 'Temperature (C)') +
  pthm

p2 <- ggplot(tsdat, aes(x = date)) + 
  geom_segment(aes(y = 0, yend = tempanom, xend = date, colour = tempanom)) +  
  geom_hline(yintercept = 0, color = 'red') +
  facet_wrap(~location, ncol = 1) +
  labs(y = 'Anomalies (C)') + 
  scale_colour_gradient2(low = 'tomato1', mid = 'lightgreen', high = 'tomato1', midpoint= 0) + 
  pthm

p1 + p2 + plot_layout(ncol = 2)

Upwelling:

toplo <- unnest(upwell) %>% 
  filter(year(date) >= 2008) %>% 
  mutate(dec_time = decimal_date(date)) %>% 
  group_by(stat) %>% 
  rename(upwell = ind) %>% 
  mutate(
    upwellseas = predict(lm(upwell ~ sin(2*pi*dec_time) + cos(2*pi*dec_time), na.action = na.exclude)), 
    upwellanom = upwell - upwellseas
  ) %>% 
  ungroup

pthm <- theme_bw(base_family = 'serif') + 
  theme(
    strip.background = element_blank(), 
    axis.title.x = element_blank(),
    legend.position = 'none'
  )

p1 <- ggplot(toplo, aes(x = date)) + 
  geom_line(aes(y = upwell)) +
  geom_line(aes(y = upwellseas), colour = 'red') + 
  facet_wrap(~stat, ncol = 1) + 
  labs(y = 'Upwelling (m3/s/100m)') +
  pthm

p2 <- ggplot(toplo, aes(x = date)) + 
  geom_segment(aes(y = 0, yend = upwellanom, xend = date, colour = upwellanom)) +  
  geom_hline(yintercept = 0, color = 'red') +
  facet_wrap(~stat, ncol = 1) +
  labs(y = 'Anomalies') + 
  scale_colour_gradient2(low = 'tomato1', mid = 'lightgreen', high = 'tomato1', midpoint= 0) + 
  pthm

p1 + p2 + plot_layout(ncol = 2)

1.4 Conditional probability analyses

Scatterplots of chl vs DA, psuedo nitzschia concentration:

toplo1 <- tsdat %>%
  filter(!is.na(chla) & !is.na(da))

ylb <- expression(paste('Domoic acid conc. (ng  ', L^-1, ')'))

p1 <- ggplot(toplo1, aes(x = chla, y = da)) +
  geom_point(alpha = 0.7) +
  theme_bw(base_family = 'serif') +
  theme(strip.background = element_blank()) +
  facet_wrap(~reorder(location, -Lat), ncol = 1) +
  geom_smooth(method = 'lm', colour = 'lightblue') +
  scale_x_log10(expression(paste('Chl-a (ug ', L^-1, ')'))) +
  scale_y_log10(ylb)

toplo2 <- tsdat %>%
  filter(!is.na(chla) & !is.na(pntot))

ylb <- expression(paste('Pseudonitzchia cells ( ', L^-1, ')'))

p2 <- ggplot(toplo2, aes(x = chla, y = pntot)) +
  geom_point(alpha = 0.7) +
  theme_bw(base_family = 'serif') +
  theme(strip.background = element_blank()) +
  facet_wrap(~reorder(location, -Lat), ncol = 1) +
  geom_smooth(method = 'lm', colour = 'lightblue') +
  scale_x_log10(expression(paste('Chl-a (ug ', L^-1, ')'))) +
  scale_y_log10(ylb)

p1 + p2 + plot_layout(ncol = 2)

Logistic regression of likelihood of seeing DA with chl, all Bight stations:

toplo1 <- tsdat %>%
  mutate(
    dapa = ifelse(da > 0, 1, 0)
  ) %>%
  filter(!is.na(chla) & !is.na(da))

leglab <- expression(paste('Domoic acid\nconc. (ng ', L^-1, ')'))

p1 <- ggplot(toplo1, aes(x = chla, y = dapa)) +
  geom_point(aes(size = da), alpha = 0.7) +
  theme_bw(base_family = 'serif') +
  theme(
    strip.background = element_blank(),
    legend.position = 'top'
    ) +
  facet_wrap(~reorder(location, -Lat), ncol = 1) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), colour = 'lightblue') +
  scale_x_log10(expression(paste('Chl-a (ug ', L^-1, ')'))) +
  ylab('Likelihood of domoic acid') +
  scale_size_continuous(leglab, range = c(1, 8))

toplo2 <- tsdat %>%
  filter(!is.na(chla) & !is.na(pntot)) %>%
  mutate(
    pnhilo = ifelse(pntot > 100, 1, 0)
  )

leglab <- expression(paste('Cells (', L^-1, ')'))

p2 <- ggplot(toplo2, aes(x = chla, y = pnhilo)) +
  geom_point(aes(size = pntot), alpha = 0.7) +
  theme_bw(base_family = 'serif') +
  theme(
    strip.background = element_blank(),
    legend.position = 'top'
    ) +
  facet_wrap(~reorder(location, -Lat), ncol = 1) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), colour = 'lightblue') +
  scale_x_log10(expression(paste('Chl-a (ug ', L^-1, ')'))) +
  scale_y_continuous(expression(paste('Likelihood of high cell concentration (> 100    ', L^-1, ')'))) +
  scale_size_continuous(leglab, range = c(1, 8))

p1 + p2 + plot_layout(ncol = 2)

Conditional probability plots, same response as above but using non-linear method. The first set shows the likelihood of observing domoic acid or cell concentration greater than 100 L\(^{-1}\) at concentrations of chlorophyll less than or equal to those on the x-axis.

locs <- reorder(tsdat$location, -tsdat$Lat) %>% levels
ends <- c('da', 'pntot')
thrs <- c(0, 100)
labs <- c('Likelihood of Domoic acid', expression(paste('Likelihood of high cell concentration (> 100    ', L^-1, ')')))
names(thrs) <- ends
names(labs) <- ends
                                                
plos <- crossing(locs, ends) %>% 
  group_by(locs, ends) %>% 
  mutate(
    plo = pmap(list(locs, ends), function(locs, ends){

      # inputs
      xval <- tsdat$chla[tsdat$location %in% locs]
      yval <- tsdat[tsdat$location %in% locs, ] %>% pull({ends})
      thrs <- thrs[ends]
      ylbs <- labs[ends]
      
      # cond. prob results
      res <- condprob(xval, yval, thrs, 'gt', 'lte', T, R = 100)
      
      # plots
      p <- plot(res) +
        scale_x_log10() +
        theme_bw(base_family = 'serif') +
        theme(
          axis.title = element_blank(), 
          plot.title = element_text(hjust = 0.5, size= 8)
          ) +
        ggtitle(locs)

      return(p)
      
    })
  ) %>% 
  pull(plo)

# text par
gpval <- gpar(fontfamily = 'serif')

# unholy grid arrange
grid.arrange(
  arrangeGrob(
    left = textGrob(labs[1], rot = 90, gp = gpval), 
    plos[[1]], plos[[3]], plos[[5]], plos[[7]], plos[[9]], plos[[11]], plos[[13]], ncol = 1
    ),
  arrangeGrob(
    left = textGrob(labs[2], rot = 90, gp = gpval),
    plos[[2]], plos[[4]], plos[[6]], plos[[8]], plos[[10]], plos[[12]], plos[[14]], ncol = 1
  ),
  bottom = textGrob(expression(paste('Chl-a (ug ', L^-1, '), less than or equal to')), gp = gpval), 
  ncol = 2
)

The second set shows the likelihood of observing domoic acid or cell concentration greater than 10 L\(^{-1}\) at concentrations of chlorophyll greater than or equal to those on the x-axis.

locs <- reorder(tsdat$location, -tsdat$Lat) %>% levels
ends <- c('da', 'pntot')
thrs <- c(0, 100)
labs <- c('Likelihood of Domoic acid', expression(paste('Likelihood of high cell concentration (> 100    ', L^-1, ')')))
names(thrs) <- ends
names(labs) <- ends
                                                
plos <- crossing(locs, ends) %>% 
  group_by(locs, ends) %>% 
  mutate(
    plo = pmap(list(locs, ends), function(locs, ends){

      # inputs
      xval <- tsdat$chla[tsdat$location %in% locs]
      yval <- tsdat[tsdat$location %in% locs, ] %>% pull({ends})
      thrs <- thrs[ends]
      ylbs <- labs[ends]
      
      # cond. prob results
      res <- condprob(xval, yval, thrs, 'gt', 'gte', T, R = 100)
      
      # plots
      p <- plot(res) +
        scale_x_log10() +
        theme_bw(base_family = 'serif') +
        theme(
          axis.title = element_blank(), 
          plot.title = element_text(hjust = 0.5, size= 10)
          ) +
        ggtitle(locs)

      return(p)
      
    })
  ) %>% 
  pull(plo)

# text par
gpval <- gpar(fontfamily = 'serif')

# unholy grid arrange
grid.arrange(
  arrangeGrob(
    left = textGrob(labs[1], rot = 90, gp = gpval), 
    plos[[1]], plos[[3]], plos[[5]], plos[[7]], plos[[9]], plos[[11]], plos[[13]], ncol = 1
    ),
  arrangeGrob(
    left = textGrob(labs[2], rot = 90, gp = gpval),
    plos[[2]], plos[[4]], plos[[6]], plos[[8]], plos[[10]], plos[[12]], plos[[14]], ncol = 1
  ),
  bottom = textGrob(expression(paste('Chl-a (ug ', L^-1, '), greater than or equal to')), gp = gpval), 
  ncol = 2
)

1.5 Linear models

1.5.1 All stations

Histograms of predictors:

tomod <- tsdat %>%
  select(tempanom, upwell, din, phosphorus, ntop, siton) %>%
  rename(
    `N:P` = ntop,
    `Si:N` = siton,
    `T,anom` = tempanom
    ) %>%
  gather('var', 'val', everything())

mythm2 <- theme_bw(base_family = 'serif') +
  theme(
    strip.background = element_blank(),
    text = element_text(family = 'serif', size = 12),
    strip.placement = 'outside'
  )

ggplot(tomod, aes(x = val)) +
  geom_histogram() +
  facet_wrap(~var, scales = 'free', strip.position = 'bottom') +
  labs(x = NULL) +
  mythm2

Marginal effects plots for logistic models describing HAB response variables:

# prep
prep <- tsdat %>%
  mutate(
    Pn = ifelse(pntot > 100, 1, 0),
    DA = ifelse(da > 0, 1, 0)
  ) %>%
  rename(
    `NP` = ntop,
    `SiN` = siton,
    `Tanom` = tempanom
  ) %>% 
  select(DA, Pn, chla, `Tanom`, upwell, din, phosphorus, `NP`, `SiN`, qrt) %>%
  gather('resp', 'rsval', DA, Pn, chla)

# all
mrgsall <- prep %>% 
  group_by(resp) %>%
  nest %>%
  mutate(
    frm = purrr::pmap(list(resp, data), function(resp, data){

      yval <- 'rsval'
      if(resp == 'chla')
        yval <- 'log10(rsval)'

      frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')

      if(resp %in% c('DA', 'Pn'))
        mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})

      if(resp %in% 'chla')
        mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})

      if(inherits(mod, 'try-error'))
        return(NA)

      marg <- margins(mod) %>% summary

      p <- ggplot(marg, aes(x = factor, y = AME)) +
        geom_hline(yintercept = 0, color = 'grey', size = 1) +
        geom_point(size = 2) + 
        geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) + 
        mythm2 + 
        scale_y_continuous(limits = c(-0.12, 0.06)) +
        labs(x = NULL, y = 'Margin effect', title = paste(resp, 'all dates', sep = ', ')) + 
        coord_flip() 
      
      return(p)

    })
  )

# by quarter
mrgs <- prep %>% 
  group_by(qrt, resp) %>%
  nest %>%
  mutate(
    frm = purrr::pmap(list(resp, qrt, data), function(resp, qrt, data){

      yval <- 'rsval'
      if(resp == 'chla')
        yval <- 'log10(rsval)'

      frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')

      if(resp %in% c('DA', 'Pn'))
        mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})

      if(resp %in% 'chla')
        mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})

      if(inherits(mod, 'try-error'))
        return(NA)

      marg <- margins(mod) %>% summary

      p <- ggplot(marg, aes(x = factor, y = AME)) +
        geom_hline(yintercept = 0, color = 'grey', size = 1) +
        geom_point(size = 2) + 
        geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) + 
        mythm2 + 
        scale_y_continuous(limits = c(-0.12, 0.06)) +
        labs(x = NULL, y = 'Margin effect', title = paste(resp, qrt, sep = ', ')) + 
        coord_flip() 
      
      return(p)

    })
  )

plosall <- mrgsall$frm
plos <- mrgs$frm
grid.arrange(
  arrangeGrob(plosall[[1]], plos[[3]], plos[[4]], plos[[1]], plos[[2]], ncol = 1),
  arrangeGrob(plosall[[2]], plos[[7]], plos[[8]], plos[[5]], plos[[6]], ncol = 1),
  arrangeGrob(plosall[[3]], plos[[11]], plos[[12]], plos[[9]], plos[[10]], ncol = 1),
  ncol = 3
)

1.5.2 Santa Cruz

Histograms of predictors:

tomod <- stat_pair %>%
  filter(location %in% 'Santa Cruz Wharf') %>% 
  select(tempanom, upwell, din, phosphorus, ntop, siton) %>%
  rename(
    `N:P` = ntop,
    `Si:N` = siton,
    `T,anom` = tempanom
    ) %>%
  gather('var', 'val', everything())

mythm2 <- theme_bw(base_family = 'serif') +
  theme(
    strip.background = element_blank(),
    text = element_text(family = 'serif', size = 12),
    strip.placement = 'outside'
  )

ggplot(tomod, aes(x = val)) +
  geom_histogram() +
  facet_wrap(~var, scales = 'free', strip.position = 'bottom') +
  labs(x = NULL) +
  mythm2

Marginal effects plots for logistic models describing HAB response variables:

# prep data
prep <- stat_pair %>%
  filter(location %in% 'Santa Cruz Wharf') %>%  
  mutate(
    Pn = ifelse(pntot > 100, 1, 0),
    DA = ifelse(da > 0, 1, 0)
  ) %>%
  rename(
    `NP` = ntop,
    `SiN` = siton,
    `Tanom` = tempanom
  ) %>% 
  select(DA, Pn, chla, `Tanom`, upwell, `NP`, `SiN`, qrt) %>%
  gather('resp', 'rsval', DA, Pn, chla) 

# all
mrgsall <- prep %>% 
  group_by(resp) %>%
  nest %>%
  mutate(
    frm = purrr::pmap(list(resp, data), function(resp, data){

      yval <- 'rsval'
      if(resp == 'chla')
        yval <- 'log10(rsval)'

      frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')

      if(resp %in% c('DA', 'Pn'))
        mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})

      if(resp %in% 'chla')
        mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})

      if(inherits(mod, 'try-error'))
        return(NA)

      marg <- margins(mod) %>% summary

      p <- ggplot(marg, aes(x = factor, y = AME)) +
        geom_hline(yintercept = 0, color = 'grey', size = 1) +
        geom_point(size = 2) + 
        geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) + 
        mythm2 + 
        scale_y_continuous(limits = c(-0.16, 0.15)) +
        labs(x = NULL, y = 'Margin effect', title = paste(resp, 'all dates', sep = ', ')) + 
        coord_flip() 
      
      return(p)

    })
  )

# by quarter
mrgs <- prep %>% 
  group_by(qrt, resp) %>%
  nest %>%
  mutate(
    frm = purrr::pmap(list(resp, qrt, data), function(resp, qrt, data){

      yval <- 'rsval'
      if(resp == 'chla')
        yval <- 'log10(rsval)'

      frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')

      if(resp %in% c('DA', 'Pn'))
        mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})

      if(resp %in% 'chla')
        mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})

      if(inherits(mod, 'try-error'))
        return(NA)

      marg <- margins(mod) %>% summary

      p <- ggplot(marg, aes(x = factor, y = AME)) +
        geom_hline(yintercept = 0, color = 'grey', size = 1) +
        geom_point(size = 2) + 
        geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) + 
        mythm2 + 
        scale_y_continuous(limits = c(-0.16, 0.15)) +
        labs(x = NULL, y = 'Margin effect', title = paste(resp, qrt, sep = ', ')) + 
        coord_flip() 
      
      return(p)

    })
  )

plosall <- mrgsall$frm
plos <- mrgs$frm
grid.arrange(
  arrangeGrob(plosall[[1]], plos[[3]], plos[[4]], plos[[1]], plos[[2]], ncol = 1),
  arrangeGrob(plosall[[2]], plos[[7]], plos[[8]], plos[[5]], plos[[6]], ncol = 1),
  arrangeGrob(plosall[[3]], plos[[11]],plos[[12]], plos[[9]], plos[[10]], ncol = 1),
  ncol = 3
)

1.5.3 Scripps

Histograms of predictors:

tomod <- stat_pair %>%
  filter(location %in% 'Scripps Pier') %>% 
  select(tempanom, upwell, din, phosphorus, ntop, siton) %>%
  rename(
    `N:P` = ntop,
    `Si:N` = siton,
    `T,anom` = tempanom
    ) %>%
  gather('var', 'val', everything())

mythm2 <- theme_bw(base_family = 'serif') +
  theme(
    strip.background = element_blank(),
    text = element_text(family = 'serif', size = 12),
    strip.placement = 'outside'
  )

ggplot(tomod, aes(x = val)) +
  geom_histogram() +
  facet_wrap(~var, scales = 'free', strip.position = 'bottom') +
  labs(x = NULL) +
  mythm2

Marginal effects plots for logistic models describing HAB response variables:

# prep data
prep <- stat_pair %>%
  filter(location %in% 'Scripps Pier') %>%  
  mutate(
    Pn = ifelse(pntot > 100, 1, 0),
    DA = ifelse(da > 0, 1, 0)
  ) %>%
  rename(
    `NP` = ntop,
    `SiN` = siton,
    `Tanom` = tempanom
  ) %>% 
  select(DA, Pn, chla, `Tanom`, upwell, `NP`, `SiN`, qrt) %>%
  gather('resp', 'rsval', DA, Pn, chla) 

# all
mrgsall <- prep %>% 
  group_by(resp) %>%
  nest %>%
  mutate(
    frm = purrr::pmap(list(resp, data), function(resp, data){

      yval <- 'rsval'
      if(resp == 'chla')
        yval <- 'log10(rsval)'

      frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')

      if(resp %in% c('DA', 'Pn'))
        mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})

      if(resp %in% 'chla')
        mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})

      if(inherits(mod, 'try-error'))
        return(NA)

      marg <- margins(mod) %>% summary

      p <- ggplot(marg, aes(x = factor, y = AME)) +
        geom_hline(yintercept = 0, color = 'grey', size = 1) +
        geom_point(size = 2) + 
        geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) + 
        mythm2 + 
        scale_y_continuous(limits = c(-0.16, 0.08)) +
        labs(x = NULL, y = 'Margin effect', title = paste(resp, 'all dates', sep = ', ')) + 
        coord_flip() 
      
      return(p)

    })
  )

# by quarter
mrgs <- prep %>% 
  group_by(qrt, resp) %>%
  nest %>%
  mutate(
    frm = purrr::pmap(list(resp, qrt, data), function(resp, qrt, data){

      yval <- 'rsval'
      if(resp == 'chla')
        yval <- 'log10(rsval)'

      frm <- paste0(yval, ' ~ `Tanom` + upwell + `NP` + `SiN`')

      if(resp %in% c('DA', 'Pn'))
        mod <- try({glm(frm, data = data, family = binomial(link = "logit"))})

      if(resp %in% 'chla')
        mod <- try({glm(frm, data = data, family = gaussian(link = "identity"))})

      if(inherits(mod, 'try-error'))
        return(NA)

      marg <- margins(mod) %>% summary

      p <- ggplot(marg, aes(x = factor, y = AME)) +
        geom_hline(yintercept = 0, color = 'grey', size = 1) +
        geom_point(size = 2) + 
        geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) + 
        mythm2 + 
        scale_y_continuous(limits = c(-0.16, 0.08)) +
        labs(x = NULL, y = 'Margin effect', title = paste(resp, qrt, sep = ', ')) + 
        coord_flip() 
      
      return(p)

    })
  )

plosall <- mrgsall$frm
plos <- mrgs$frm
grid.arrange(
  arrangeGrob(plosall[[1]], plos[[3]], plos[[4]], plos[[1]], plos[[2]], ncol = 1),
  arrangeGrob(plosall[[2]], plos[[7]], plos[[8]], plos[[5]], plos[[6]], ncol = 1),
  arrangeGrob(plosall[[3]], plos[[11]], plos[[12]], plos[[9]], plos[[10]], ncol = 1),
  ncol = 3
)

1.6 Multivariate analyses

1.6.1 PCA round 1

dimymax <- 45
mythm2 <- theme_bw(base_family = 'serif') +
  theme(
    strip.background = element_blank(), 
    text = element_text(family = 'serif', size = 12)
  ) 

# all dates
tomodall <- tsdat %>% 
  mutate(
    temp = ifelse(temp == 0, NA, temp)
    ) %>% 
  select(chla, da, din, phosphorus, pntot, silicate, temp) %>% 
  rename(`T` = temp) %>% 
  mutate_at(vars(-`T`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>%
  as.data.frame %>% 
  imputePCA %>% 
  .$completeObs
  
pcamod <- PCA(tomodall, scale.unit = T, graph = F)

# all time series
p1 <- ggord(pcamod, size = tsdat$Lat, vec_ext = 6, alpha = 0.5, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') + 
  mythm2 +
  ggtitle('All dates') + 
  scale_size(range = c(0.3, 1.5))

# explained variance by dimensions
p1exp <- pcamod %>% 
  .$eig %>% 
  data.frame %>% 
  rownames_to_column() %>% 
  rename(vrs = percentage.of.variance) %>% 
  mutate(rowname = gsub('comp\\s', '', rowname)) %>% 
  # .[1:3, ] %>% 
  ggplot(aes(x = rowname, y = vrs, fill = vrs)) + 
  geom_bar(stat = 'identity') + 
  labs(x = 'Dimension', y = '% variance') + 
  scale_y_continuous(limits = c(0, dimymax)) + 
  scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) +
  ggtitle('All dates') + 
  mythm2

# by season
pcamodgrp <- tsdat %>% 
  group_by(qrt) %>% 
  nest %>% 
  mutate(
    plo = purrr::pmap(list(qrt, data), function(qrt, data){
      
      tomod <- data %>% 
        mutate(
          temp = ifelse(temp == 0, NA, temp)
          ) %>% 
        select(chla, da, din, phosphorus, pntot, silicate, temp) %>% 
        rename(`T` = temp) %>% 
        mutate_at(vars(-`T`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>% 
        as.data.frame %>% 
        imputePCA %>% 
        .$completeObs
  
      pcamod <- PCA(tomod, scale.unit = T, graph = F)

      # ggord
      p1 <- ggord(pcamod, vec_ext = 6, alpha = 0.5, size = data$Lat, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') + 
        mythm2 +
        ggtitle(qrt)+ 
        scale_size(range = c(0.3, 1.5))
  
      # explained variance by dimensions
      p2 <- pcamod %>% 
        .$eig %>% 
        data.frame %>% 
        rownames_to_column() %>%
        rename(vrs = percentage.of.variance) %>% 
        mutate(rowname = gsub('comp\\s', '', rowname)) %>% 
        # .[1:3, ] %>% 
        ggplot(aes(x = rowname, y = vrs, fill = vrs)) + 
        geom_bar(stat = 'identity') + 
        labs(x = 'Dimension', y = '% variance') + 
        scale_y_continuous(limits = c(0, dimymax)) + 
        scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) + 
        ggtitle(qrt) + 
        mythm2
    
      out <- list(p1, p2)
      
      return(out)
      
    })
  ) %>% 
  pull(plo)

grid.arrange(
  arrangeGrob(
    p1, pcamodgrp[[3]][[1]], pcamodgrp[[4]][[1]], pcamodgrp[[1]][[1]], pcamodgrp[[2]][[1]],
    ncol = 1
  ),
  arrangeGrob(
    p1exp, pcamodgrp[[3]][[2]], pcamodgrp[[4]][[2]], pcamodgrp[[1]][[2]], pcamodgrp[[2]][[2]],
    ncol = 1
  ) ,
  ncol = 2, widths = c(1, 0.8)
)

1.6.2 PCA round 2

dimymax <- 45
mythm2 <- theme_bw(base_family = 'serif') +
  theme(
    strip.background = element_blank(), 
    text = element_text(family = 'serif', size = 12)
  ) 

# all dates
tomodall <- tsdat %>% 
  select(chla, da, pntot, ntop, siton, upwell, tempanom) %>% 
  rename(
    `N:P` = ntop, 
    `Si:N` = siton, 
    `T,anom` = tempanom
    ) %>% 
  mutate_at(vars(-`T,anom`, -`Si:N`, -`N:P`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>%
  as.data.frame %>% 
  imputePCA %>% 
  .$completeObs
  
pcamod <- PCA(tomodall, scale.unit = T, graph = F)

# all time series
p1 <- ggord(pcamod, size = tsdat$Lat, vec_ext = 6, alpha = 0.5, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') + 
  mythm2 +
  ggtitle('All dates') + 
  scale_size(range = c(0.3, 1.5))

# explained variance by dimensions
p1exp <- pcamod %>% 
  .$eig %>% 
  data.frame %>% 
  rownames_to_column() %>% 
  rename(vrs = percentage.of.variance) %>% 
  mutate(rowname = gsub('comp\\s', '', rowname)) %>% 
  # .[1:3, ] %>% 
  ggplot(aes(x = rowname, y = vrs, fill = vrs)) + 
  geom_bar(stat = 'identity') + 
  labs(x = 'Dimension', y = '% variance') + 
  scale_y_continuous(limits = c(0, dimymax)) + 
  scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) +
  ggtitle('All dates') + 
  mythm2

# by season
pcamodgrp <- tsdat %>% 
  group_by(qrt) %>% 
  nest %>% 
  mutate(
    plo = purrr::pmap(list(qrt, data), function(qrt, data){
      
      tomod <- data %>% 
        select(chla, da, pntot, ntop, siton, tempanom, upwell) %>% 
        rename(
          `N:P` = ntop, 
          `Si:N` = siton, 
          `T,anom` = tempanom
          ) %>% 
        mutate_at(vars(-`T,anom`, -`Si:N`, -`N:P`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>% 
        as.data.frame %>% 
        imputePCA %>% 
        .$completeObs
  
      pcamod <- PCA(tomod, scale.unit = T, graph = F)

      # ggord
      p1 <- ggord(pcamod, vec_ext = 6, alpha = 0.5, size = data$Lat, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') + 
        mythm2 +
        ggtitle(qrt)+ 
        scale_size(range = c(0.3, 1.5))
  
      # explained variance by dimensions
      p2 <- pcamod %>% 
        .$eig %>% 
        data.frame %>% 
        rownames_to_column() %>%
        rename(vrs = percentage.of.variance) %>% 
        mutate(rowname = gsub('comp\\s', '', rowname)) %>% 
        # .[1:3, ] %>% 
        ggplot(aes(x = rowname, y = vrs, fill = vrs)) + 
        geom_bar(stat = 'identity') + 
        labs(x = 'Dimension', y = '% variance') + 
        scale_y_continuous(limits = c(0, dimymax)) + 
        scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) + 
        ggtitle(qrt) + 
        mythm2
    
      out <- list(p1, p2)
      
      return(out)
      
    })
  ) %>% 
  pull(plo)

grid.arrange(
  arrangeGrob(
    p1, pcamodgrp[[3]][[1]], pcamodgrp[[4]][[1]], pcamodgrp[[1]][[1]], pcamodgrp[[2]][[1]],
    ncol = 1
  ),
  arrangeGrob(
    p1exp, pcamodgrp[[3]][[2]], pcamodgrp[[4]][[2]], pcamodgrp[[1]][[2]], pcamodgrp[[2]][[2]],
    ncol = 1
  ) ,
  ncol = 2, widths = c(1, 0.8)
)

1.6.3 PCA Santa Cruz and Scripps

dimymax <- 45
mythm2 <- theme_bw(base_family = 'serif') +
  theme(
    strip.background = element_blank(), 
    text = element_text(family = 'serif', size = 12)
  ) 

# all dates
tomodall <- stat_pair %>% 
  select(chla, da, pntot, ntop, siton, upwell, tempanom) %>% 
  rename(
    `N:P` = ntop, 
    `Si:N` = siton, 
    `T,anom` = tempanom
    ) %>% 
  mutate_at(vars(-`T,anom`, -`Si:N`, -`N:P`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>%
  as.data.frame %>% 
  imputePCA %>% 
  .$completeObs
  
pcamod <- PCA(tomodall, scale.unit = T, graph = F)

# all time series
p1 <- ggord(pcamod, grp_in = stat_pair$location, size = stat_pair$Lat, vec_ext = 6, alpha = 0.5, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') + 
  mythm2 +
  ggtitle('All dates') + 
  scale_size(range = c(0.3, 1.5))

# explained variance by dimensions
p1exp <- pcamod %>% 
  .$eig %>% 
  data.frame %>% 
  rownames_to_column() %>% 
  rename(vrs = percentage.of.variance) %>% 
  mutate(rowname = gsub('comp\\s', '', rowname)) %>% 
  # .[1:3, ] %>% 
  ggplot(aes(x = rowname, y = vrs, fill = vrs)) + 
  geom_bar(stat = 'identity') + 
  labs(x = 'Dimension', y = '% variance') + 
  scale_y_continuous(limits = c(0, dimymax)) + 
  scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) +
  ggtitle('All dates') + 
  mythm2

# by season
pcamodgrp <- stat_pair %>% 
  group_by(qrt) %>% 
  nest %>% 
  mutate(
    plo = purrr::pmap(list(qrt, data), function(qrt, data){
      
      tomod <- data %>% 
        select(chla, da, pntot, ntop, siton, tempanom, upwell) %>% 
        rename(
          `N:P` = ntop, 
          `Si:N` = siton, 
          `T,anom` = tempanom
          ) %>% 
        mutate_at(vars(-`T,anom`, -`Si:N`, -`N:P`), decostand, method = 'log', MARGIN = 2 , logbase = 10, na.rm = T) %>% 
        as.data.frame %>% 
        imputePCA %>% 
        .$completeObs
  
      pcamod <- PCA(tomod, scale.unit = T, graph = F)

      # ggord
      p1 <- ggord(pcamod, grp_in = data$location, vec_ext = 6, alpha = 0.5, size = data$Lat, txt = 4, arrow = 0.2, repel = F, coord_fix = F, labcol = 'tomato1', veccol = 'tomato1', sizelab = 'Latitude') + 
        mythm2 +
        ggtitle(qrt)+ 
        scale_size(range = c(0.3, 1.5))
  
      # explained variance by dimensions
      p2 <- pcamod %>% 
        .$eig %>% 
        data.frame %>% 
        rownames_to_column() %>%
        rename(vrs = percentage.of.variance) %>% 
        mutate(rowname = gsub('comp\\s', '', rowname)) %>% 
        # .[1:3, ] %>% 
        ggplot(aes(x = rowname, y = vrs, fill = vrs)) + 
        geom_bar(stat = 'identity') + 
        labs(x = 'Dimension', y = '% variance') + 
        scale_y_continuous(limits = c(0, dimymax)) + 
        scale_fill_gradient(limits = c(0, dimymax), low = 'tomato1', high = 'lightgreen', guide = F) + 
        ggtitle(qrt) + 
        mythm2
    
      out <- list(p1, p2)
      
      return(out)
      
    })
  ) %>% 
  pull(plo)

grid.arrange(
  arrangeGrob(
    p1, pcamodgrp[[1]][[1]], pcamodgrp[[2]][[1]], pcamodgrp[[3]][[1]], pcamodgrp[[4]][[1]],
    ncol = 1
  ),
  arrangeGrob(
    p1exp, pcamodgrp[[1]][[2]], pcamodgrp[[2]][[2]], pcamodgrp[[3]][[2]], pcamodgrp[[4]][[2]],
    ncol = 1
  ) ,
  ncol = 2, widths = c(1, 0.6)
)