Skip to content
Prev 308486 / 398506 Next

Creating a new by variable in a dataframe

HI,
Without using "ifelse()" on the same example dataset.
d <- data.frame(stringsAsFactors = FALSE, transaction = c("T01", "T02",
"T03", "T04", "T05", "T06", "T07", "T08", "T09", "T10"),date =
c("2012-10-19", "2012-10-19", "2012-10-19", "2012-10-19", "2012-10-22",
"2012-10-23", "2012-10-23", "2012-10-23", "2012-10-23", "2012-10-23"),time
= c("08:00", "09:00", "10:00", "11:00", "12:00", "13:00", "14:00", "15:00",
"16:00", "17:00"))

d$date <- as.Date(d$date,format="%Y-%m-%d")
d$time<-strptime(d$time,format="%H:%M")$hour
d$flag<-unlist(rbind(lapply(split(d,d$date),function(x) x[3]==max(x[3]))))
d$datetime<-as.POSIXct(paste(d$date,d$time," "),format="%Y-%m-%d %H")
d1<-d[,c(1,5,4)]
?d1
#?? transaction??????????? datetime? flag
#1????????? T01 2012-10-19 08:00:00 FALSE
#2????????? T02 2012-10-19 09:00:00 FALSE
#3????????? T03 2012-10-19 10:00:00 FALSE
#4????????? T04 2012-10-19 11:00:00? TRUE
#5????????? T05 2012-10-22 12:00:00? TRUE
#6????????? T06 2012-10-23 13:00:00 FALSE
#7????????? T07 2012-10-23 14:00:00 FALSE
#8????????? T08 2012-10-23 15:00:00 FALSE
#9????????? T09 2012-10-23 16:00:00 FALSE
#10???????? T10 2012-10-23 17:00:00? TRUE

str(d1)
#'data.frame':??? 10 obs. of? 3 variables:
# $ transaction: chr? "T01" "T02" "T03" "T04" ...
# $ datetime?? : POSIXct, format: "2012-10-19 08:00:00" "2012-10-19 09:00:00" ...
# $ flag?????? : logi? FALSE FALSE FALSE TRUE TRUE FALSE ...

A.K.


----- Original Message -----
From: Flavio Barros <flaviomargarito at gmail.com>
To: William Dunlap <wdunlap at tibco.com>
Cc: "r-help at r-project.org" <r-help at r-project.org>; ramoss <ramine.mossadegh at finra.org>
Sent: Friday, October 19, 2012 4:24 PM
Subject: Re: [R] Creating a new by variable in a dataframe

I think i have a better solution

*## Example data.frame*
d <- data.frame(stringsAsFactors = FALSE, transaction = c("T01", "T02",
"T03", "T04", "T05", "T06", "T07", "T08", "T09", "T10"),date =
c("2012-10-19", "2012-10-19", "2012-10-19", "2012-10-19", "2012-10-22",
"2012-10-23", "2012-10-23", "2012-10-23", "2012-10-23", "2012-10-23"),time
= c("08:00", "09:00", "10:00", "11:00", "12:00", "13:00", "14:00", "15:00",
"16:00", "17:00"))

*## As date tranfomation*
d$date <- as.Date(d$date)
d$time <- strptime(d$time, format='%H')

library(reshape)

*## Create factor to split the data*
fdate <- factor(format(d$date, '%D'))

*## Create a list with logical TRUE when is the last transaction*
ex <- sapply(split(d, fdate), function(x)
ifelse(as.numeric(x[,'time'])==max(as.numeric(x[,'time'])),T,F))

*## Coerce to logical vector*
flag <- unlist(rbind(ex))

*## With reshape we have the transform function e can add the flag column *
d <- transform(d, flag = flag)
On Fri, Oct 19, 2012 at 3:51 PM, William Dunlap <wdunlap at tibco.com> wrote: