library(RCurl)
library(reshape)
library(htmltab)
library(ggplot2)
library(scales)
#get the table from the url
theurl <- getURL("https://en.wikipedia.org/wiki/Template:2016USDem", ssl.verifyPeer=FALSE)
table <- htmltab(theurl)
#keep only the useful columns and name them
df <- table[, c(1, 2, 9, 12)]
names(df) <- c("Date", "State", "Clinton", "Sanders")
#transform strings into dates and numbers
df$Date = as.Date(substr(df[, 1], 9, 18))
for (i in 3:4) {
df[, i] = as.numeric(df[, i])
}
df = na.omit(df)
#set up two data frames:
#df2 has the delegates won per candidate and per date
#df3 has the cumulatative delegate count per candidate and per date
df2 <- data.frame(Date=unique(df$Date))
df3 <- df2
for (i in 3:dim(df)[2]) {
df2[[names(df)[i]]] = rep(NA, length(df2$Date))
df3[[names(df)[i]]] = rep(NA, length(df2$Date))
for (j in 1:length(df2$Date)) {
df2[[names(df)[i]]][j] = sum(subset(df, Date==df2$Date[j])[[names(df)[i]]], na.rm=TRUE)
df3[[names(df)[i]]][j] = sum(df2[[names(df)[i]]][1:j], na.rm=TRUE)
}
}
#reshape data so that candidate becomes a variable
mdata <- melt(df2, id=c("Date"))
mdata2 <- melt(df3, id=c("Date"))
names(mdata) <- c("Date", "Candidate", "Delegates")
mdata[["Cumulative"]] <- mdata2[, 3]
#remove rows with N/A
results <- na.omit(mdata)
#dataframe for last result to be displayed as number
lastresult = subset(results, Date==results$Date[nrow(results)])
#nomination threshold
nomination <- data.frame(yintercept=c(2383, 2026),
name=c("Nomination", "Majority"))
#Clinton is gold, Sanders is green
colors <- c("#D4AA00", "#228b22")
#generate plot
d <- ggplot(results, aes(x=Date, y=Cumulative, group=Candidate, colour=Candidate)) +
geom_hline(aes(yintercept=yintercept, linetype=name), data=nomination, show.legend=TRUE) +
geom_path(size=1) +
scale_color_manual(values=colors) +
scale_y_continuous(breaks=seq(0, 2500, 250), minor_breaks=seq(0, 2500, 50)) +
scale_x_date(limits=c(as.Date("2016/2/1"), as.Date("2016/7/28")),
breaks=sort(c(seq(as.Date("2016/2/1"), as.Date("2016/7/28"), "month"),
seq(as.Date("2016/2/15"), as.Date("2016/7/28"), "month"),
as.Date("2016/7/28"))),
labels=date_format("%b %d")) +
scale_linetype_manual(values=c("dotted", "dashed")) +
labs(x="Date", y="Pledged delegates", title="Pledged delegate count", linetype="Thresholds") +
geom_text(data=lastresult, show.legend=F,
aes(x=Date, y=Cumulative, label=Cumulative), size=4, hjust=-0.1) +
theme(plot.background = element_rect(fill="transparent",colour = NA)) +
theme(legend.background = element_rect(fill="transparent",colour = NA))
#display plot
svg(filename="count.svg",
width=8,
height=5,
pointsize=12,
bg="transparent")
d
dev.off()