Groups memisc 0.99.26.3

Operate on grouped data in data frames and data sets

Description

Group creates a grouped variant of an object of class “data.frame” or of class “data.set”, for which methods for with and within are defined, so that these well-known functions can be applied “groupwise”.

Usage

# Create an object of class "grouped.data" from a
# data frame or a data set.
Groups(data,by,...)
## S4 method for signature 'data.frame'
Groups(data,by,...)
## S4 method for signature 'data.set'
Groups(data,by,...)
## S4 method for signature 'grouped.data'
Groups(data,by,...)

# Recombine grouped data into a data fame or a data set
recombine(x,...)
## S4 method for signature 'grouped.data.frame'
recombine(x,...)
## S4 method for signature 'grouped.data.set'
recombine(x,...)

# Recombine grouped data and coerce the result appropriately:
## S4 method for signature 'grouped.data'
as.data.frame(x,...)
## S4 method for signature 'grouped.data.frame'
as.data.set(x,row.names=NULL,...)
## S4 method for signature 'grouped.data.set'
as.data.set(x,row.names=NULL,...)

# Methods of the generics "with" and "within" for grouped data
## S4 method for signature 'grouped.data'
with(data,expr,...)
## S4 method for signature 'grouped.data'
within(data,expr,recombine=FALSE,...)

# This is equivalent to with(Groups(data,by),expr,...)
withGroups(data,by,expr,...)
# This is equivalent to within(Groups(data,by),expr,recombine,...)
withinGroups(data,by,expr,recombine=TRUE,...)

Arguments

data

an object of the classes “data.frame”, “data.set” if an argument to Groups, withGroups, withinGroups,

by

a formula with the factors the levels of which define the groups.

expr

an expression, or several expressions enclosed in curly braces.

recombine

a logical vector; should the resulting grouped data be recombined?

x

an object of class “grouped.data”.

row.names

an optional character vector with row names.

...

other arguments, ignored.

Details

When applied to a data frame Groups returns an object with class attributes “grouped.data.frame”, “grouped.data”, and “data.frame”, when applied do an object with class “data.set”, it returns an object with class attributes “grouped.data.set”, “grouped.data”, and “data.set”.

When applied to objects with class attributed “grouped.data”, both the functions with() amd within() evaluate expr separately for each group defined by Groups. with() returns an array composed of the results of expr, while within() returns a modified copy of its data argument, which will be a “grouped.data” object (“grouped.data.frame” or “grouped.data.set”), unless the argument recombine=TRUE is set.

The expression expr may contain references to the variables n_, N_, and i_. n_ is equal to the size of the respective group (the number of rows belonging to it), while N_ is equal to the total number of observations in all groups. The variable i_ equals to the indices of the rows belonging to the respective group of observations.

Examples

some.data <- data.frame(x=rnorm(n=100))
some.data <- within(some.data,{
   f <- factor(rep(1:4,each=25),labels=letters[1:4])
   g <- factor(rep(1:5,each=4,5),labels=LETTERS[1:5])
   y <- x + rep(1:4,each=25) +  0.75*rep(1:5,each=4,5)
})
# For demonstration purposes, we create an
# 'empty' group:
some.data <- subset(some.data,
                      f!="a" | g!="C")
some.grouped.data <- Groups(some.data,
                          ~f+g)

# Computing the means of y for each combination f and g
group.means <- with(some.grouped.data,
                   mean(y))
group.means
   g
f          A        B        C        D        E
  a 1.967418 3.052784       NA 4.486448 4.435203
  b 3.459411 3.911713 4.400992 4.978906 6.433718
  c 3.701949 4.022867 4.259162 5.696182 6.509524
  d 4.573041 5.050737 5.769607 6.776469 7.496301
# Obtaining a groupwise centered variant of y
some.grouped.data <- within(some.grouped.data,{
   y.cent <- y - mean(y)
},recombine=FALSE)

# The groupwise centered variable should have zero mean
# whithin each group
group.means <- with(some.grouped.data,
                   round(mean(y.cent),15))
group.means
   g
f   A B  C D E
  a 0 0 NA 0 0
  b 0 0  0 0 0
  c 0 0  0 0 0
  d 0 0  0 0 0
# The following demonstrates the use of n_, N_, and i_
# An external copy of y
y1 <- some.data$y
group.means.n <- with(some.grouped.data,
                     c(mean(y),  # Group means for y
                       n_,       # Group sizes
                       sum(y)/n_,# Group means for y
                       n_/N_,    # Relative group sizes
                       sum(y1)/N_,# NOT the grand mean
                       sum(y1[i_])/n_)) # Group mean for y1
group.means.n
, , g = A

                f
                          a          b          c          d
  mean(y)        1.96741840 3.45941056 3.70194898 4.57304119
  n_             8.00000000 4.00000000 4.00000000 4.00000000
  sum(y)/n_      1.96741840 3.45941056 3.70194898 4.57304119
  n_/N_          0.08333333 0.04166667 0.04166667 0.04166667
  sum(y1)/N_     4.76831099 4.76831099 4.76831099 4.76831099
  sum(y1[i_])/n_ 1.96741840 3.45941056 3.70194898 4.57304119

, , g = B

                f
                          a          b          c          d
  mean(y)        3.05278434 3.91171267 4.02286727 5.05073728
  n_             5.00000000 7.00000000 4.00000000 4.00000000
  sum(y)/n_      3.05278434 3.91171267 4.02286727 5.05073728
  n_/N_          0.05208333 0.07291667 0.04166667 0.04166667
  sum(y1)/N_     4.76831099 4.76831099 4.76831099 4.76831099
  sum(y1[i_])/n_ 3.05278434 3.91171267 4.02286727 5.05073728

, , g = C

                f
                  a        b        c          d
  mean(y)        NA 4.400992 4.259162 5.76960684
  n_             NA 6.000000 6.000000 4.00000000
  sum(y)/n_      NA 4.400992 4.259162 5.76960684
  n_/N_          NA 0.062500 0.062500 0.04166667
  sum(y1)/N_     NA 4.768311 4.768311 4.76831099
  sum(y1[i_])/n_ NA 4.400992 4.259162 5.76960684

, , g = D

                f
                          a          b          c          d
  mean(y)        4.48644766 4.97890616 5.69618230 6.77646924
  n_             4.00000000 4.00000000 7.00000000 5.00000000
  sum(y)/n_      4.48644766 4.97890616 5.69618230 6.77646924
  n_/N_          0.04166667 0.04166667 0.07291667 0.05208333
  sum(y1)/N_     4.76831099 4.76831099 4.76831099 4.76831099
  sum(y1[i_])/n_ 4.48644766 4.97890616 5.69618230 6.77646924

, , g = E

                f
                          a          b          c          d
  mean(y)        4.43520348 6.43371812 6.50952368 7.49630102
  n_             4.00000000 4.00000000 4.00000000 8.00000000
  sum(y)/n_      4.43520348 6.43371812 6.50952368 7.49630102
  n_/N_          0.04166667 0.04166667 0.04166667 0.08333333
  sum(y1)/N_     4.76831099 4.76831099 4.76831099 4.76831099
  sum(y1[i_])/n_ 4.43520348 6.43371812 6.50952368 7.49630102
# Names can be attached to the groupwise results
with(some.grouped.data,
    c(Centered=round(mean(y.cent),15),
      Uncentered=mean(y)))
, , g = A

            f
                    a        b        c        d
  Centered   0.000000 0.000000 0.000000 0.000000
  Uncentered 1.967418 3.459411 3.701949 4.573041

, , g = B

            f
                    a        b        c        d
  Centered   0.000000 0.000000 0.000000 0.000000
  Uncentered 3.052784 3.911713 4.022867 5.050737

, , g = C

            f
              a        b        c        d
  Centered   NA 0.000000 0.000000 0.000000
  Uncentered NA 4.400992 4.259162 5.769607

, , g = D

            f
                    a        b        c        d
  Centered   0.000000 0.000000 0.000000 0.000000
  Uncentered 4.486448 4.978906 5.696182 6.776469

, , g = E

            f
                    a        b        c        d
  Centered   0.000000 0.000000 0.000000 0.000000
  Uncentered 4.435203 6.433718 6.509524 7.496301
some.data.ungrouped <- recombine(some.grouped.data)
str(some.data.ungrouped)
'data.frame':        96 obs. of  5 variables:
 $ x     : num  0.5168 -0.0481 -1.2802 -0.4897 1.2877 ...
 $ y     : num  2.27 1.7 0.47 1.26 3.79 ...
 $ g     : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 2 2 2 2 4 4 ...
 $ f     : Factor w/ 4 levels "a","b","c","d": 1 1 1 1 1 1 1 1 1 1 ...
 $ y.cent: num  0.299 -0.266 -1.498 -0.707 0.735 ...
# It all works with "data.set" objects
some.dataset <- as.data.set(some.data)
some.grouped.dataset <- Groups(some.dataset,~f+g)
with(some.grouped.dataset,
    c(Mean=mean(y),
      Variance=var(y)))
, , g = A

          f
                   a         b        c         d
  Mean     1.9674184 3.4594106 3.701949 4.5730412
  Variance 0.8078139 0.4090148 3.456480 0.6756695

, , g = B

          f
                   a         b         c        d
  Mean     3.0527843 3.9117127 4.0228673 5.050737
  Variance 0.3902132 0.4758577 0.9795298 3.503273

, , g = C

          f
            a         b         c         d
  Mean     NA 4.4009918 4.2591618 5.7696068
  Variance NA 0.7874412 0.4159708 0.6207613

, , g = D

          f
                  a         b        c        d
  Mean     4.486448 4.9789062 5.696182 6.776469
  Variance 1.253727 0.2293157 2.132682 1.992310

, , g = E

          f
                  a         b         c         d
  Mean     4.435203 6.4337181 6.5095237 7.4963010
  Variance 1.053002 0.6742578 0.5767678 0.8263415
# The following two expressions are equivalent:
with(Groups(some.data,~f+g),mean(y))
   g
f          A        B        C        D        E
  a 1.967418 3.052784       NA 4.486448 4.435203
  b 3.459411 3.911713 4.400992 4.978906 6.433718
  c 3.701949 4.022867 4.259162 5.696182 6.509524
  d 4.573041 5.050737 5.769607 6.776469 7.496301
withGroups(some.data,~f+g,mean(y))
   g
f          A        B        C        D        E
  a 1.967418 3.052784       NA 4.486448 4.435203
  b 3.459411 3.911713 4.400992 4.978906 6.433718
  c 3.701949 4.022867 4.259162 5.696182 6.509524
  d 4.573041 5.050737 5.769607 6.776469 7.496301
# The following two expressions are equivalent:
some.data <- within(Groups(some.data,~f+g),{
   y.cent <- y - mean(y)
   y.cent.1 <- y - sum(y)/n_
})
some.data <- withinGroups(some.data,~f+g,{
   y.cent <- y - mean(y)
   y.cent.1 <- y - sum(y)/n_
})

# Both variants of groupwise centred varaibles should
# have zero groupwise means:
withGroups(some.data,~f+g,{
   c(round(mean(y.cent),15),
     round(mean(y.cent.1),15))
})
, , g = A

   f
    a b c d
  1 0 0 0 0
  2 0 0 0 0

, , g = B

   f
    a b c d
  1 0 0 0 0
  2 0 0 0 0

, , g = C

   f
     a b c d
  1 NA 0 0 0
  2 NA 0 0 0

, , g = D

   f
    a b c d
  1 0 0 0 0
  2 0 0 0 0

, , g = E

   f
    a b c d
  1 0 0 0 0
  2 0 0 0 0