Skip to content
Prev 366440 / 398502 Next

How might I work with a data.frame where each physical row represents several logical rows?

Below is my interpretation of one way to achieve your stated goal. I don't 
know what analysis you plan to perform, but this seems unlikely to be be 
my preferred analysis format (I think I would probably analyze subsets of 
the records related to specific parts of the transactions).

library(dplyr)
library(tidyr)

fifteenminutes <- 15 * 60

# for education, show intermediate results
# strip out square brackets
View(   test_data
     %>% mutate( values = sub( "^\\[(.*)\\]$", "\\1", values ) )
     )

# split the single column into multiple columns
View(   test_data
     %>% mutate( values = sub( "^\\[(.*)\\]$", "\\1", values ) )
     %>% separate( values, paste0( "value", 0:3 ), ", *" )
     )

# pull separate value columns into one column called value, with a new
# column vcol to hold the name of the original column
View(   test_data
     %>% mutate( values = sub( "^\\[(.*)\\]$", "\\1", values ) )
     %>% separate( values, paste0( "value", 0:3 ), ", *" )
     %>% gather( vcol, value, c( value0, value1, value2, value3 ) )
     )

# create a timestamp column for the individual values
View(   test_data
     %>% mutate( values = sub( "^\\[(.*)\\]$", "\\1", values ) )
     %>% separate( values, paste0( "value", 0:3 ), ", *" )
     %>% gather( vcol, value, c( value0, value1, value2, value3 ) )
     %>% mutate( timestamp = start
                           + fifteenminutes
                             * as.numeric( sub( "value", "", vcol ) ) )
     )

# remove the old vcol column now that timestamp column is created
View(   test_data
     %>% mutate( values = sub( "^\\[(.*)\\]$", "\\1", values ) )
     %>% separate( values, paste0( "value", 0:3 ), ", *" )
     %>% gather( vcol, value, c( value0, value1, value2, value3 ) )
     %>% mutate( timestamp = start
                           + fifteenminutes
                             * as.numeric( sub( "value", "", vcol ) ) )
     %>% select( -vcol )
     )

# unite several columns that currently distinguish various rows
View(   test_data
     %>% mutate( values = sub( "^\\[(.*)\\]$", "\\1", values ) )
     %>% separate( values, paste0( "value", 0:3 ), ", *" )
     %>% gather( vcol, value, c( value0, value1, value2, value3 ) )
     %>% mutate( timestamp = start
                           + fifteenminutes
                             * as.numeric( sub( "value", "", vcol ) ) )
     %>% select( -vcol )
     %>% unite( mname, mtype, nic, tcp_state, limit_type, value_type, name )
     )

# spread values out into separate columns
test_data2 <- (   test_data
               %>% mutate( values = sub( "^\\[(.*)\\]$", "\\1", values ) )
               %>% separate( values, paste0( "value", 0:3 ), ", *" )
               %>% gather( vcol, value, c( value0, value1, value2, value3 ) 
)
               %>% mutate( timestamp = start + fifteenminutes * as.numeric( 
sub( "value", "", vcol ) ) )
               %>% select( -vcol )
               %>% unite( mname, mtype, nic, tcp_state, limit_type, 
value_type, name )
               %>% spread( mname, value )
               )

View( test_data2 )
On Wed, 4 Jan 2017, David Wolfskill wrote:

            
---------------------------------------------------------------------------
Jeff Newmiller                        The     .....       .....  Go Live...
DCN:<jdnewmil at dcn.davis.ca.us>        Basics: ##.#.       ##.#.  Live Go...
                                       Live:   OO#.. Dead: OO#..  Playing
Research Engineer (Solar/Batteries            O.O#.       #.O#.  with
/Software/Embedded Controllers)               .OO#.       .OO#.  rocks...1k