Skip to content
Prev 345920 / 398502 Next

dplyr/summarize does not create a true data frame

Thanks to John Kane for an off-list consultation. As the following annotated transcript shows, it's the group_by() function that transforms a data frame into something else:  a "grouped_df" object that *looks* identical to the original data frame (e.g. the rows are in the original order -- *not* grouped, as arrange() would do), but does not always act like a data frame.
+ "P03", "P04", "P05", "P06", "P07", "P08", "P09", "P10"), class = "factor"), 
+     Sex = structure(c(2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L), .Label = c("Female", 
+     "Male"), class = "factor"), Height = structure(c(1L, 1L, 
+     3L, 2L, 1L, 3L, 1L, 2L, 1L, 1L), .Label = c("Short", "Medium", 
+     "Tall"), class = "factor"), Value = c(69.47, 64.61, 74.77, 
+     73.31, 64.76, 72.78, 64.64, 55.96, 60.45, 51.11)), .Names = c("Id", 
+ "Sex", "Height", "Value"), row.names = c(NA, -10L), class = "data.frame")
'data.frame':	10 obs. of  4 variables:
 $ Id    : Factor w/ 10 levels "P01","P02","P03",..: 1 2 3 4 5 6 7 8 9 10
 $ Sex   : Factor w/ 2 levels "Female","Male": 2 1 1 2 2 2 1 2 2 1
 $ Height: Factor w/ 3 levels "Short","Medium",..: 1 1 3 2 1 3 1 2 1 1
 $ Value : num  69.5 64.6 74.8 73.3 64.8 ...
Classes 'grouped_df', 'tbl_df', 'tbl' and 'data.frame':	10 obs. of  4 variables:
 $ Id    : Factor w/ 10 levels "P01","P02","P03",..: 1 2 3 4 5 6 7 8 9 10
 $ Sex   : Factor w/ 2 levels "Female","Male": 2 1 1 2 2 2 1 2 2 1
 $ Height: Factor w/ 3 levels "Short","Medium",..: 1 1 3 2 1 3 1 2 1 1
 $ Value : num  69.5 64.6 74.8 73.3 64.8 ...
 - attr(*, "vars")=List of 2
  ..$ : symbol Sex
  ..$ : symbol Height
 - attr(*, "drop")= logi TRUE
 - attr(*, "indices")=List of 5
  ..$ : int  1 6 9
  ..$ : int 2
  ..$ : int  0 4 8
  ..$ : int  3 7
  ..$ : int 5
 - attr(*, "group_sizes")= int  3 1 3 2 1
 - attr(*, "biggest_group_size")= int 3
 - attr(*, "labels")='data.frame':	5 obs. of  2 variables:
  ..$ Sex   : Factor w/ 2 levels "Female","Male": 1 1 2 2 2
  ..$ Height: Factor w/ 3 levels "Short","Medium",..: 1 3 1 2 3
  ..- attr(*, "vars")=List of 2
  .. ..$ : symbol Sex
  .. ..$ : symbol Height
Id  Sex Height Value
 [1,] TRUE TRUE   TRUE  TRUE
 [2,] TRUE TRUE   TRUE  TRUE
 [3,] TRUE TRUE   TRUE  TRUE
   ...etc.
Value
1  69.47
2  64.61
   ...etc.
Error in eval(expr, envir, enclos) : index out of bounds
Value
1  69.47
2  64.61
   ...etc.

################################## dput() code below

structure(list(Id = structure(1:10, .Label = c("P01", "P02", 
"P03", "P04", "P05", "P06", "P07", "P08", "P09", "P10"), class = "factor"), 
    Sex = structure(c(2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L), .Label = c("Female", 
    "Male"), class = "factor"), Height = structure(c(1L, 1L, 
    3L, 2L, 1L, 3L, 1L, 2L, 1L, 1L), .Label = c("Short", "Medium", 
    "Tall"), class = "factor"), Value = c(69.47, 64.61, 74.77, 
    73.31, 64.76, 72.78, 64.64, 55.96, 60.45, 51.11)), .Names = c("Id", 
"Sex", "Height", "Value"), row.names = c(NA, -10L), class = "data.frame")