Groups memisc 0.99.20.1

Operate on grouped data in data frames and data sets

Description

Group creates a grouped variant of an object of class “data.frame” or of class “data.set”, for which methods for with and within are defined, so that these well-known functions can be applied “groupwise”.

Usage

# Create an object of class "grouped.data" from a
# data frame or a data set.
Groups(data,by,...)
## S4 method for signature 'data.frame,formula'
Groups(data,by,...)
## S4 method for signature 'data.set,formula'
Groups(data,by,...)

# Recombine grouped data into a data fame or a data set
recombine(x,...)
## S4 method for signature 'grouped.data'
recombine(x,...)

# Methods of the generics "with" and "within" for grouped data
## S4 method for signature 'grouped.data'
with(data,expr,...)
## S4 method for signature 'grouped.data'
within(data,expr,recombine=FALSE,...)
## S4 method for signature 'grouped.data'
names(x)

# This is equivalent to with(Groups(data,by),expr,...)
withGroups(data,by,expr,...)
# This is equivalent to within(Groups(data,by),expr,recombine,...)
withinGroups(data,by,expr,recombine=TRUE,...)

Arguments

data

an object of the classes “data.frame”, “data.set” if an argument to Groups, withGroups, withinGroups,

by

a formula with the factors the levels of which define the groups.

expr

an expression, or several expressions enclosed in curly braces.

recombine

a logical vector; should the resulting grouped data be recombined?

x

an object of class “grouped.data”.

...

other arguments, ignored.

Examples

some.data <- data.frame(x=rnorm(n=100))
some.data <- within(some.data,{
   f <- factor(rep(1:4,each=25),labels=letters[1:4])
   g <- factor(rep(1:5,each=4,5),labels=LETTERS[1:5])
   y <- x + rep(1:4,each=25) +  0.75*rep(1:5,each=4,5)
})
some.grouped.data <- Groups(some.data,
                          ~f+g)
group.means <- with(some.grouped.data,
                   mean(y))
group.means
   g
f          A        B        C        D        E
  a 2.007375 2.180146 3.111583 3.827678 4.808839
  b 1.842287 3.321262 4.644148 5.190574 5.799566
  c 3.979328 4.459071 5.910097 6.329215 6.504937
  d 4.520250 5.343331 5.793697 6.198287 7.672812
some.grouped.data <- within(some.grouped.data,{
   y.cent <- y - mean(y)
},recombine=FALSE)
group.means <- with(some.grouped.data,
                   round(mean(y.cent),15))
group.means
   g
f   A B C D E
  a 0 0 0 0 0
  b 0 0 0 0 0
  c 0 0 0 0 0
  d 0 0 0 0 0
str(group.means)
'grouped.result' num [1:4, 1:5] 0 0 0 0 0 0 0 0 0 0 ...
- attr(*, "dimnames")=List of 2
 ..$ f: chr [1:4] "a" "b" "c" "d"
 ..$ g: chr [1:5] "A" "B" "C" "D" ...
with(some.grouped.data,
    c(Centered=round(mean(y.cent),15),
      Uncentered=mean(y)))
, , g = A

            f
                    a        b        c       d
  Centered   0.000000 0.000000 0.000000 0.00000
  Uncentered 2.007375 1.842287 3.979328 4.52025

, , g = B

            f
                    a        b        c        d
  Centered   0.000000 0.000000 0.000000 0.000000
  Uncentered 2.180146 3.321262 4.459071 5.343331

, , g = C

            f
                    a        b        c        d
  Centered   0.000000 0.000000 0.000000 0.000000
  Uncentered 3.111583 4.644148 5.910097 5.793697

, , g = D

            f
                    a        b        c        d
  Centered   0.000000 0.000000 0.000000 0.000000
  Uncentered 3.827678 5.190574 6.329215 6.198287

, , g = E

            f
                    a        b        c        d
  Centered   0.000000 0.000000 0.000000 0.000000
  Uncentered 4.808839 5.799566 6.504937 7.672812
some.data.ungrouped <- recombine(some.grouped.data)
str(some.data.ungrouped)
'data.frame':        100 obs. of  5 variables:
 $ x     : num  1.123 1.09 1.501 -0.866 -1.45 ...
 $ y     : num  2.873 2.84 3.251 0.884 1.05 ...
 $ g     : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 2 2 2 2 3 3 ...
 $ f     : Factor w/ 4 levels "a","b","c","d": 1 1 1 1 1 1 1 1 1 1 ...
 $ y.cent: num  0.866 0.833 1.244 -1.124 -1.13 ...
some.dataset <- as.data.set(some.data)
some.grouped.dataset <- Groups(some.dataset,~f+g)
with(some.grouped.dataset,
    c(Mean=mean(y),
      Variance=var(y)))
, , g = A

          f
                   a         b         c        d
  Mean     2.0073749 1.8422873 3.9793284 4.520250
  Variance 0.7429366 0.3442165 0.8261017 1.075985

, , g = B

          f
                  a        b         c        d
  Mean     2.180146 3.321262 4.4590706 5.343331
  Variance 1.587153 1.616642 0.4371489 1.191778

, , g = C

          f
                   a         b        c         d
  Mean     3.1115828 4.6441479 5.910097 5.7936973
  Variance 0.5269201 0.5720588 0.269450 0.8624274

, , g = D

          f
                   a        b        c         d
  Mean     3.8276780 5.190574 6.329215 6.1982866
  Variance 0.2159753 1.653119 1.351447 0.2229146

, , g = E

          f
                   a        b         c        d
  Mean     4.8088385 5.799566 6.5049367 7.672812
  Variance 0.1382788 1.826630 0.2188331 1.652064
with(Groups(some.data,~f+g),mean(y))
   g
f          A        B        C        D        E
  a 2.007375 2.180146 3.111583 3.827678 4.808839
  b 1.842287 3.321262 4.644148 5.190574 5.799566
  c 3.979328 4.459071 5.910097 6.329215 6.504937
  d 4.520250 5.343331 5.793697 6.198287 7.672812
some.data <- within(Groups(some.data,~f+g),{
   y.cent <- y - mean(y)
},recombine=TRUE)
round(with(some.data,
          tapply(y.cent,list(g,f),
                 mean,na.rm=TRUE)),15)
  a b c d
A 0 0 0 0
B 0 0 0 0
C 0 0 0 0
D 0 0 0 0
E 0 0 0 0
some.data <- withinGroups(some.data,~f+g,{
   y.cent <- y - mean(y)
})
round(with(some.data,
          tapply(y.cent,list(g,f),
                 mean)),15)
  a b c d
A 0 0 0 0
B 0 0 0 0
C 0 0 0 0
D 0 0 0 0
E 0 0 0 0
withGroups(some.data,~f+g,{
   round(mean(y.cent),15)
})
   g
f   A B C D E
  a 0 0 0 0 0
  b 0 0 0 0 0
  c 0 0 0 0 0
  d 0 0 0 0 0