#' # Poststratification of 2016 American National Election Study data
#' The following data file was created in an earlier script / notebook.
load("anes-2016-prevote.RData")
#' There must not be any missing values in the stratifying variables.
anes_2016_vprevote <- subset(anes_2016_prevote,
vote16 != "Inap" &
recall12 != "Inap"
)
#' In order to make poststratification possible, we need to make sure that the
#' levels of the stratification variables match the population
#' information. Therefore we relabel the variables "recall12" and "vote16".
#' The following makes use of the *memisc* package. You may need to install it from
#' [CRAN](https://cran.r-project.org/package=memisc) using the code
#' `install.packages("memisc")` if you want to run this on your computer. (The
#' package is already installed on the notebook container, however.)
library(memisc)
anes_2016_vprevote <- within(anes_2016_vprevote,{
recall12 <- recall12[,drop=TRUE]
vote16 <- vote16[,drop=TRUE]
recall12 <- relabel(recall12,"Did not vote"="No vote")
vote16 <- relabel(vote16,
"Will not vote/Not registered"="No vote")
})
save(anes_2016_vprevote,file="anes-2016-vprevote.RData")
#' Finally, we set up a survey design object. The following makes use of the *survey* package. You may need to install it from [CRAN](https://cran.r-project.org/package=survey) using the code
#' `install.packages("survey")` if you want to run this on your computer. (The
#' package is already installed on the notebook container, however.)
library(survey)
anes_2016_vprevote_desgn <- svydesign(id = ~psu_f2f,
strata = ~strat_f2f,
weights = ~pre_w_f2f,
data = anes_2016_vprevote,
nest = TRUE)
save(anes_2016_vprevote_desgn,file="anes-2016-vprevote-design.RData")
#' We collect the electoral results of 2012 to prepare poststratification.
result.2012 = c(Obama = 65915795,
Romney = 60933504,
# Other candidates are combined
Other = sum(c(
Johson = 1275971,
Stein = 469627,
Others = 490510
)))
#' The number of non-voters is computed from the sum of the results and census
#' data on the population in voting age.
result.2012 <- c(result.2012,
"No vote" = 235248000 - sum(result.2012))
# Here we collect the results for 2016
result.2016 <- c(Clinton = 65853514,
Trump = 62984828,
Other = sum(c(
Johnson = 4489341,
Stein = 1457218,
McMullin = 731991,
Others = 1154084
)))
result.2016 <- c(result.2016,
"No vote" = 250056000 - sum(result.2016))
#' The poststratification function expects population data to be in the form of
#' data frames:
pop.vote16 <- data.frame(
vote16=names(result.2016),
Freq=result.2016)
pop.recall12 <- data.frame(
recall12=names(result.2012),
Freq=result.2012/sum(result.2012)*sum(result.2016)
)
save(pop.recall12,pop.vote16,file="popl-results.RData")
#' We poststratify the sample design object by recalled vote in 2012
anes_2016_prevote_desgn_post <- postStratify(
anes_2016_vprevote_desgn,~recall12,population=pop.recall12)
#' We compare the estimated percentages of 2012 votes:
100*svymean(~recall12,design=anes_2016_vprevote_desgn)
100*svymean(~recall12,design=anes_2016_prevote_desgn_post)
#' As should be expected, post-stratification eliminates the uncertainty about
#' 2012 votes. It also corrects for turnout overreporting.
#' We now compare the estimated percentages of 2016 votes
100*svymean(~vote16,design=anes_2016_vprevote_desgn)
100*svymean(~vote16,design=anes_2016_prevote_desgn_post)
#' The percentages of Clinton voters and Trump voters are closer after
#' poststratification.
#' We save the poststratified data for later use.
save(anes_2016_prevote_desgn_post,file="anes-2016-prevote-desgn-post.RData")