Skip to content
Prev 369597 / 398503 Next

New var

# read.table is NOT part of the data.table package
#library(data.table)
DFM <- read.table( text=
'obs start end
1 2/1/2015   1/1/2017
2 4/11/2010  1/1/2011
3 1/4/2006   5/3/2007
4 10/1/2007  1/1/2008
5 6/1/2011   1/1/2012
6 10/5/2004 12/1/2004
',header = TRUE, stringsAsFactors = FALSE)
# cleaner way to compute D
DFM$start <- as.Date( DFM$start, format="%m/%d/%Y" )
DFM$end <- as.Date( DFM$end, format="%m/%d/%Y" )
DFM$D <- as.numeric( DFM$end - DFM$start, units="days" )
# categorize your data into groups
DFM$bin <- cut( DFM$D
               , breaks=c( seq( 0, 500, 100 ), Inf )
               , right=FALSE # do not include the right edge
               , ordered_result = TRUE
               )
# brute force method you should have been able to figure out to show us some work
DFM$t1 <- ifelse( DFM$D < 100, 1, 0 )
DFM$t2 <- ifelse( 100 <= DFM$D & DFM$D < 200, 1, ifelse( DFM$D < 100, -1, 0 ) )
DFM$t3 <- ifelse( 200 <= DFM$D & DFM$D < 300, 1, ifelse( DFM$D < 200, -1, 0 ) )
DFM$t4 <- ifelse( 300 <= DFM$D & DFM$D < 400, 1, ifelse( DFM$D < 300, -1, 0 ) )
DFM$t5 <- ifelse( 400 <= DFM$D & DFM$D < 500, 1, ifelse( DFM$D < 400, -1, 0 ) )
# brute force method with ordered factor
DFM$tf1 <- ifelse( "[0,100)" == DFM$bin, 1, 0 )
DFM$tf2 <- ifelse( "[100,200)" == DFM$bin, 1, ifelse( "[100,200)" < DFM$bin, 0, -1 ) )
DFM$tf3 <- ifelse( "[200,300)" == DFM$bin, 1, ifelse( "[200,300)" < DFM$bin, 0, -1 ) )
DFM$tf4 <- ifelse( "[300,400)" == DFM$bin, 1, ifelse( "[300,400)" < DFM$bin, 0, -1 ) )
DFM$tf5 <- ifelse( "[400,500)" == DFM$bin, 1, ifelse( "[400,500)" < DFM$bin, 0, -1 ) )
# less obvious approach using the fact that factors are integers
# and using the outer function to find all combinations of elements of two vectors
# and the sign function
DFM[ , paste0( "tm", 1:5 )] <- outer( as.integer( DFM$bin )
                                     , 1:5
                                     , FUN = function(x,y) {
                                           z <- sign(y-x)+1L
                                           ifelse( 2 == z, -1L, z )
                                       }
                                     )

# my result, provided using dput for precise representation
DFMresult <- structure(list(obs = 1:6, start = structure(c(16467, 14710, 
13152, 13787, 15126, 12696), class = "Date"), end = structure(c(17167,
14975, 13636, 13879, 15340, 12753), class = "Date"), D = c(700,
265, 484, 92, 214, 57), bin = structure(c(6L, 3L, 5L, 1L, 3L,
1L), .Label = c("[0,100)", "[100,200)", "[200,300)", "[300,400)",
"[400,500)", "[500,Inf)"), class = c("ordered", "factor")), t1 = c(0,
0, 0, 1, 0, 1), t2 = c(0, 0, 0, -1, 0, -1), t3 = c(0, 1, 0, -1,
1, -1), t4 = c(0, -1, 0, -1, -1, -1), t5 = c(0, -1, 1, -1, -1,
-1), tf1 = c(0, 0, 0, 1, 0, 1), tf2 = c(0, 0, 0, -1, 0, -1),
     tf3 = c(0, 1, 0, -1, 1, -1), tf4 = c(0, -1, 0, -1, -1, -1
     ), tf5 = c(0, -1, 1, -1, -1, -1), tm1 = c(0, 0, 0, 1, 0,
     1), tm2 = c(0, 0, 0, -1, 0, -1), tm3 = c(0, 1, 0, -1, 1,
     -1), tm4 = c(0, -1, 0, -1, -1, -1), tm5 = c(0, -1, 1, -1,
     -1, -1)), row.names = c(NA, -6L), .Names = c("obs", "start",
"end", "D", "bin", "t1", "t2", "t3", "t4", "t5", "tf1", "tf2",
"tf3", "tf4", "tf5", "tm1", "tm2", "tm3", "tm4", "tm5"), class = 
"data.frame")

You did not address Bert's request for some context, but I am curious how 
he or Peter would have approached this problem, so I encourage you do 
provide some insight on the list as to why you are doing this.
On Sat, 3 Jun 2017, Val wrote:

            
---------------------------------------------------------------------------
Jeff Newmiller                        The     .....       .....  Go Live...
DCN:<jdnewmil at dcn.davis.ca.us>        Basics: ##.#.       ##.#.  Live Go...
                                       Live:   OO#.. Dead: OO#..  Playing
Research Engineer (Solar/Batteries            O.O#.       #.O#.  with
/Software/Embedded Controllers)               .OO#.       .OO#.  rocks...1k