Generate a Codebook of a Data Set

Description

Function codebook collects documentation about an item, or the items in a data set or external data file. It returns an object that, when ``show``n, print this documentation in a nicely formatted way.

Usage

codebook(x)
## S4 method for signature 'item'
codebook(x)
## S4 method for signature 'atomic'
codebook(x)
## S4 method for signature 'factor'
codebook(x)
## S4 method for signature 'data.set'
codebook(x)
## S4 method for signature 'data.frame'
codebook(x)
## S4 method for signature 'importer'
codebook(x)

Arguments

x

an item, numeric or character vector, factor, data.set, data.frame or importer object for codebook()

Value

An object of class “codebook”, for which a show method exists that produces a nicely formatted output.

Examples

Data <- data.set(
         vote = sample(c(1,2,3,8,9,97,99),size=300,replace=TRUE),
         region = sample(c(rep(1,3),rep(2,2),3,99),size=300,replace=TRUE),
         income = exp(rnorm(300,sd=.7))*2000
         )
Data <- within(Data,{
 description(vote) <- "Vote intention"
 description(region) <- "Region of residence"
 description(income) <- "Household income"
 wording(vote) <- "If a general election would take place next tuesday,
                   the candidate of which party would you vote for?"
 wording(income) <- "All things taken into account, how much do all
                   household members earn in sum?"
 foreach(x=c(vote,region),{
   measurement(x) <- "nominal"
   })
 measurement(income) <- "ratio"
 labels(vote) <- c(
                   Conservatives         =  1,
                   Labour                =  2,
                   "Liberal Democrats"   =  3,
                   "Don't know"          =  8,
                   "Answer refused"      =  9,
                   "Not applicable"      = 97,
                   "Not asked in survey" = 99)
 labels(region) <- c(
                   England               =  1,
                   Scotland              =  2,
                   Wales                 =  3,
                   "Not applicable"      = 97,
                   "Not asked in survey" = 99)
 foreach(x=c(vote,region,income),{
   annotation(x)["Remark"] <- "This is not a real survey item, of course ..."
   })
 missing.values(vote) <- c(8,9,97,99)
 missing.values(region) <- c(97,99)
})
description(Data)
vote   'Vote intention'
region 'Region of residence'
income 'Household income'
codebook(Data)
================================================================================

   vote 'Vote intention'

   "If a general election would take place next tuesday, the candidate of which
   party would you vote for?"

--------------------------------------------------------------------------------

   Storage mode: double
   Measurement: nominal
   Missing values: 8, 9, 97, 99

            Values and labels    N    Percent

    1   'Conservatives'         48   16.0
    2   'Labour'                38   12.7
    3   'Liberal Democrats'     47   15.7
    8 M 'Don't know'            47   15.7
    9 M 'Answer refused'        47   15.7
   97 M 'Not applicable'        31   10.3
   99 M 'Not asked in survey'   42   14.0

   Remark:
       This is not a real survey item, of course ...

================================================================================

   region 'Region of residence'

--------------------------------------------------------------------------------

   Storage mode: double
   Measurement: nominal
   Missing values: 97, 99

            Values and labels    N    Percent

    1   'England'              123   41.0
    2   'Scotland'              92   30.7
    3   'Wales'                 39   13.0
   99 M 'Not asked in survey'   46   15.3

   Remark:
       This is not a real survey item, of course ...

================================================================================

   income 'Household income'

   "All things taken into account, how much do all household members earn in
   sum?"

--------------------------------------------------------------------------------

   Storage mode: double
   Measurement: ratio

            Min:    345.805
            Max:  11832.528
           Mean:   2538.772
       Std.Dev.:   1811.529
       Skewness:      1.944
       Kurtosis:      5.266

   Remark:
       This is not a real survey item, of course ...
## Not run:
##
## Write(description(Data),
##            file="Data-desc.txt")
## Write(codebook(Data),
##            file="Data-cdbk.txt")
##
## End(Not run)