matched samples, dataframe, panel data - R-help

arun
Fri, Jun 14, 2013 3:05 PM #
Hi,
I changed the fun1().? Now, it should be possible to get all the possible combinations within each group.


final3New<-read.table(file="real_data_cecilia.txt",sep="\t",header=T)
final3New1<-read.csv("real_data_cecilia_new.csv")
fun1New<- function(dat,percent,number){
??? lst1<- split(dat,list(dat$year,dat$industry))
??? lst2<- lst1[lapply(lst1,nrow)>1]
??? lst3<- lapply(lst2,function(x) {
??? ??? ??? ??? ??? CombN1<-combn(seq_len(nrow(x)),2)
??? ??? ??? ??? ??? lapply(split(CombN1,col(CombN1)),function(y){
??? ??? ??? ??? ??? ??? ??? x1<-x[y,]
??? ??? ??? ??? ??? ??? ??? x1[sum(x1$dummy)==1,]
??? ??? ??? ??? ??? ??? ??? })
??? ??? ??? ??? ??? })

??????? lst4<- lapply(lst3,function(x) x[lapply(x,nrow)>0])
??? lst5<- lst4[lapply(lst4,length)>0]
??? lst6<- lapply(lst5,function(x){
??? ??? ??? ??? ?? lapply(x,function(y){
??? ??? ??? ??? ??? x1<- abs(diff(y$dimension))< number
??? ??? ??? ??? ??? x2<- y$dimension[2]+ (y$dimension[2]*percent)
??? ??? ??? ??? ??? x3<- y$dimension[2]- (y$dimension[2]*percent)
??? ??? ??? ??? ??? x4<- (y$dimension[1] < x2) & (y$dimension[1] > x3)
??? ??? ??? ??? ??? y[x4 & x1,]
??? ??? ??? ??? ??? })
??? ??? ??? ??? ??? }
??? ??? ??? ??? ??? )
??? lst7<- lapply(lst6,function(x) x[lapply(x,nrow)>0])
??? lst8<- lst7[lapply(lst7,length)>0]
??? res<- do.call(rbind,lapply(lst8,function(x){
??? ??? ??? ??? ??? ?????? do.call(rbind,x)
??? ??? ??? ??? ??? ??? }))
??? row.names(res)<- 1:nrow(res)
??? res
??? }??? 
??? ??? ??? ??? ??? 
##Applying fun1New
res5Percent<- fun1New(final3New,0.05,50)
dim(res5Percent)
#[1] 718?? 5
res5PercentHigh<- fun1New(final3New,0.05,500000)
?dim(res5PercentHigh)
#[1] 2788??? 5

res5Percent1<- fun1New(final3New1,0.05,50)
dim(res5Percent1)
#[1] 870?? 5
res5Percent1High<- fun1New(final3New1,0.05,500000)
dim(res5Percent1High)
#[1] 2902??? 5

res10Percent<- fun1New(final3New,0.10,200)
dim(res10Percent)
#[1] 2928??? 5
res10Percent1<- fun1New(final3New1,0.10,200)
dim(res10Percent1)
#[1] 3092??? 5

fun3<- function(dat){
????????? indx<- duplicated(dat)
??? ? dat1<- subset(dat[indx,],dummy==1)
??? ? dat0<- subset(dat[indx,],dummy==0)
??? ? indx1<- as.numeric(row.names(dat1))
??? ?indx11<- sort(c(indx1,indx1+1))
??? ?indx0<- as.numeric(row.names(dat0))
??? ?indx00<- sort(c(indx0,indx0-1))
??? ? indx10<- sort(c(indx11,indx00))
??? ?res <- dat[-indx10,]
??? res
??? }




#Applying fun3()
res5F3<- fun3(res5Percent)
dim(res5F3)
#[1] 278?? 5

res5F3High<- fun3(res5PercentHigh)
dim(res5F3High)
#[1] 546?? 5

res5F3_1<- fun3(res5Percent1)
#[1] 302?? 5
res5F3High_1<- fun3(res5Percent1High)
dim(res5F3High_1)
#[1] 570?? 5

res10F3<- fun3(res10Percent)
dim(res10F3)
#[1] 462?? 5
res10F3_1<- fun3(res10Percent1)
#[1] 474?? 5
nrow(subset(res5F3,dummy==0))
#[1] 139
?nrow(subset(res5F3,dummy==1))
#[1] 139


?nrow(subset(res5F3High,dummy==1))
#[1] 273
?nrow(subset(res5F3High,dummy==0))
#[1] 273


?nrow(subset(res10F3,dummy==0))
#[1] 231
?nrow(subset(res10F3,dummy==1))
#[1] 231
?nrow(subset(res10F3_1,dummy==1))
#[1] 237
?nrow(subset(res10F3_1,dummy==0))
#[1] 237
?dim(unique(res5F3))
#[1] 278?? 5
dim(unique(res5F3High))
#[1] 546?? 5

?dim(unique(res10F3_1))
#[1] 474?? 5
?dim(unique(res10F3))
#[1] 462?? 5
A.K.