## ======================================
## Figure 5: Salmonella time series for hospitalization rates in LA, Southern CA, and Northern CA
## ======================================

  rm(list=ls())
  
  #Assume base R is running
  my.wd <- getSrcDirectory(function(x){x})
  
  #Check for errors
  if(grepl("error", tolower(class(my.wd)[1])) | my.wd==""){
    #Try to access working directory through R Studio API
    my.wd <- tryCatch(dirname(rstudioapi::getActiveDocumentContext()$path),
                      error = function(e) e)
    
  }
  
  #Set working directory
  setwd(my.wd)

## ==========================
## Read in data
## ==========================  

  #County population
  county.pop.1964.2015 <- read.csv("../Data/county_population_1964_2015.csv", stringsAsFactors = FALSE)
  
  #Hospitalizations
  hosp.cty.yr.1983.2009 <- read.csv("../Data/hosp_county_year_1983_2009.csv", stringsAsFactors = FALSE)

## ==========================
## Aggregate population
## ==========================  

  # SoCal county names
  socal <- c("Imperial", "Kern","Orange","San Bernardino","San Diego", "San Luis Obispo","Santa Barbara", "Riverside", "Ventura")
  
  #County population CA (non-LA, non-SoCal)
  norcal.pop <- aggregate(pop~year, FUN = sum,
                      data=subset(county.pop.1964.2015, !Name %in% c(socal, "Los Angeles")))
  
  #County population LA only
  la.pop <- aggregate(pop~year, 
                      data = subset(county.pop.1964.2015, Name == "Los Angeles"), FUN = sum)
  
  #County population SoCal only
  socal.pop <- aggregate(pop~year, 
                         data = subset(county.pop.1964.2015, Name%in%socal), FUN = sum)

## ==========================
## Aggregate hospitalizations
## ==========================  
  
  ## Add binary indicators for county groupings
  hosp.cty.yr.1983.2009$la <- as.numeric(hosp.cty.yr.1983.2009$county.name=="Los Angeles")
  hosp.cty.yr.1983.2009$socal <- as.numeric(hosp.cty.yr.1983.2009$county.name%in%socal)
  
  #Illness name to aggregate
  fbi.name <- c("salm.0030")
  
  #Aggregate counts
  year.norcal.agg <- aggregate(formula(paste0("cbind(", paste0(fbi.name, collapse=","),")~year")), 
                           data = subset(hosp.cty.yr.1983.2009, la == 0 & socal == 0), 
                           FUN = sum)
  year.la.agg <- aggregate(formula(paste0("cbind(", paste0(fbi.name, collapse=","),")~year")), 
                           data = subset(hosp.cty.yr.1983.2009, la == 1), 
                           FUN = sum)
  year.socal.agg <- aggregate(formula(paste0("cbind(", paste0(fbi.name, collapse=","),")~year")), 
                              data = subset(hosp.cty.yr.1983.2009, socal == 1), 
                              FUN = sum)
  
  #Add time varying population
  year.la.agg <- merge(year.la.agg, la.pop, by = c("year"))
  year.socal.agg <- merge(year.socal.agg, socal.pop, by = c("year"))
  year.norcal.agg <- merge(year.norcal.agg, norcal.pop, by = c("year"))
  
  #Create rates
  year.la.agg$rate <- 100000*year.la.agg[,c(fbi.name)]/year.la.agg$pop
  year.socal.agg$rate <- 100000*year.socal.agg[,c(fbi.name)]/year.socal.agg$pop
  year.norcal.agg$rate <- 100000*year.norcal.agg[,c(fbi.name)]/year.norcal.agg$pop

## ==========================
## Output: Plot
## ==========================  

  #Colors for plot
  la.color <- rgb(1,0,0,0.8)
  ca.color <- rgb(0,0,1,0.8)
  
  #Code for plot
  par(mfrow=c(1,2), mar=c(3,3,2,1), mgp=c(1.5,0.5,0), tcl=-0.3)
  
  #LA vs Norcal trends
  plot(year.la.agg$year, year.la.agg$rate, type = "n", col = ca.color,
       ylab = "Rate", xlab = "Year", main = "Salmonella Hospitalizations", ylim = c(0,3))
  rect(xleft = 1995, xright = 1999, ytop = 8, ybottom = -1, border = NA, col = rgb(0,0,0,0.1))
  abline(v=1997.5,col="white")
  lines(year.la.agg$year, year.la.agg$rate, col = la.color, lwd=1.5)
  lines(year.norcal.agg$year, year.norcal.agg$rate, lty=5, col=ca.color,lwd=1)
  text(x = 1986, y = 1.6, label = "Northern\nCA", col = ca.color)
  text(x = 1984, y = 2.8, label = "LA", col = la.color)
  
  #LA vs SoCal trends
  plot(year.la.agg$year, year.la.agg$rate, type = "n", col = ca.color,
       ylab = "Rate", xlab = "Year", main = "Southern CA", ylim = c(0,3))
  rect(xleft = 1995, xright = 1999, ytop = 8, ybottom = -1, border = NA, col = rgb(0,0,0,0.1))
  abline(v=1997.5,col="white")
  lines(year.la.agg$year, year.la.agg$rate, col = la.color, lwd=1.5)
  lines(year.socal.agg$year, year.socal.agg$rate, lty=5, col=rgb(1,0,1,1),lwd=1)
  text(x = 1986, y = 1.6, label = "Southern\nCA", col = "purple")
  text(x = 1984, y = 2.8, label = "LA", col = la.color)


