On Text Editors

Dates in R

Epidemic Curves From Reporting Data

yemen <- read.csv("https://web.stanford.edu/class/earthsys214/data/yemen-cases-2017.txt", skip=2, header=TRUE)
## cumulative cases
plot(as.Date(yemen$Date, "%d-%m-%Y"), yemen$Cases, type="l", lwd=3, col= "#660066", ylab="Total Reported Cases", xlab="Date")

# incidence
dates <- as.Date(yemen$Date, "%d-%m-%Y")
inc <- diff(yemen$Cases)/as.numeric(diff(dates))
plot(dates[-1],inc, type="h",  lwd=5, col= "#660066", ylab="New Cases", xlab="Date")

Curve Fitting

library(splines)
sinc <- lm(inc ~ ns(dates[-1], df=5))
plot(dates[-1],inc, type="h",  lwd=5, col= "#660066", ylab="New Cases", xlab="Date")
lines(dates[-1], predict(sinc, data.frame(dates=dates)[-1,]), lwd=3, col="black")

Download Data from Web

library(htmltab)
url <- "https://www.cdc.gov/std/stats16/tables/1.htm"
cnames <- c("Year", "Syphilis_All_Cases","Syphilis_All_Rate", "Syphilis_Primary_Secondary_Cases", "Syphilis_Primary_Secondary_Rate", "Syphilis_Early_Latent_Cases", "Syphilis_Early_Latent_Rate",
"Syphilis_Late_Latent_Cases", "Syphilis_Late_Latent_Rate", "Syphilis_Congenital_Cases", "Syphilis_Congenital_Rate", "Chlamydia_Cases","Chlamydia_Rate", "Gonorrhea_Cases", "Gonorrhea_Rate", "Chancroid_Cases", "Chancroid_Rate") 
stitable <- htmltab(doc = url, which = "//th[text() = 'Year*']/ancestor::table", colNames=cnames, rm_nodata_rows=FALSE, rm_nodata_cols=FALSE)
## remove commas and convert from text to numeric
## note that gsub() works on vectors, so need to cycle through the cols of the data frame
for(i in 1:17){
  tmp <- stitable[,i]
  stitable[,i] <- as.numeric(gsub(",", "", tmp))
}
## the way I actually do this is to use apply(), which will convert to matrix -- need to change back to data.frame
## for some reason, R Markdown won't compile when I use this approach...
# dropcomma <- function(x) gsub(",", "", x)
## this will generate a warning but you actually want NAs introduced!
#stitable <- as.data.frame(apply(stitable,2,dropcomma))
with(stitable, plot(Year, Gonorrhea_Cases/1e05, type="l", lwd=3, col="yellow4",
                    xlab="Year", ylab="Gonorrhea Cases (10,000)"))
title("The Disco Surge")