Skip to content
Prev 325108 / 398503 Next

please check this

Hi,
Try this:
res10Percent<- fun1(final3New,0.1,200)

res10PercentSub1<-subset(res10Percent[duplicated(res10Percent)|duplicated(res10Percent,fromLast=TRUE),],dummy==1)
indx1<-as.numeric(row.names(res10PercentSub1))

res10PercentSub2<-res10PercentSub1[order(res10PercentSub1$dimension),]
indx11<-as.numeric(row.names(res10PercentSub2))
names(indx11)<-(seq_along(indx11)-1)%/%2+1
res10PercentSub3<-res10Percent[c(indx11,indx11+1),]
res10PercentSub3$id<- names(c(indx11,indx11+1))
?res10PercentSub4<-do.call(rbind,lapply(split(res10PercentSub3,res10PercentSub3$id),function(x) {x1<-x[-1,];x2<-x1[which.max(abs(x1$dimension[1]-x1$dimension[-1]))+1,];x3<-x[x$dummy==1,][which.min(abs(as.numeric(row.names(x[x$dummy==1,]))-as.numeric(row.names(x2)))),];rbind(x3,x2)}))
################################################
res10PercentSub0<-subset(res10Percent[duplicated(res10Percent)|duplicated(res10Percent,fromLast=TRUE),],dummy==0)
indx0<-as.numeric(row.names(res10PercentSub0))

res10PercentSub20<-res10PercentSub0[order(res10PercentSub0$dimension),]
indx00<-as.numeric(row.names(res10PercentSub20))
names(indx00)<-(seq_along(indx00)-1)%/%2+1
res10PercentSub30<- res10Percent[c(indx00-1,indx00),]
res10PercentSub30$id<- names(c(indx00-1,indx00))
res10PercentSub40<- do.call(rbind,lapply(split(res10PercentSub30,res10PercentSub30$id),function(x){x1<-subset(x,dummy==1); x2<-subset(x,dummy==0);x3<-x1[which.max(abs(x1$dimension-unique(x2$dimension))),];x4<-x2[which.min(abs(as.numeric(row.names(x3))-as.numeric(row.names(x2)))),];rbind(x3,x4)}))

row.names(res10PercentSub40)<-gsub(".*\\.","",row.names(res10PercentSub40))
indxNew<- sort(as.numeric(c(row.names(res10PercentSub5),row.names(res10PercentSub40))))
res10PercentFinal<-res10Percent[-indxNew,]
?dim(res10PercentFinal)
#[1] 454?? 5
?nrow(subset(res10PercentFinal,dummy==0))
#[1] 227
?nrow(subset(res10PercentFinal,dummy==1))
#[1] 227

nrow(unique(res10PercentFinal))
#[1] 454
which(duplicated(res10Percent)|duplicated(res10Percent,fromLast=TRUE))
# [1] 113 117 123 125 153 157 187 189 207 213 223 235 265 267 269 275 276 278 279
#[20] 283 293 301 303 305 309 317 327 331 335 339 341 343 347 351 367 369 371 379
#[39] 385 399 407 413 415 417 429 437 441 453 459 461 471 473 477 479 501 505
?res10Percent[c(113:114,117:118),]
#???????? firm year industry dummy dimension
#113 500221723 2005?????? 26???? 1????? 3147
#114 500601429 2005?????? 26???? 0????? 3076
#117 500221723 2005?????? 26???? 1????? 3147
#118 502668920 2005?????? 26???? 0????? 3249
?
res10PercentFinal[c(113:114,117:118),]? #deleted the duplicated row and the accompanying pair with the maximum difference
#???????? firm year industry dummy dimension
#113 500221723 2005?????? 26???? 1????? 3147
#114 500601429 2005?????? 26???? 0????? 3076
#119 500115362 2006?????? 26???? 1????? 6239
#120 500060223 2006?????? 26???? 0????? 6208

A.K.

row.names(res10PercentSub4)<-gsub(".*\\.","",row.names(res10PercentSub4))
res10PercentSub5<-res10PercentSub4[order(as.numeric(res10PercentSub4$id)),]

----- Original Message -----
From: Cecilia Carmo <cecilia.carmo at ua.pt>
To: arun <smartpink111 at yahoo.com>
Cc: 
Sent: Monday, June 10, 2013 1:41 PM
Subject: RE: please check this

I think it could be better to eliminate that one.
If you could do it I appreciate.

Cec?lia
Message-ID: <1370898338.4657.YahooMailNeo@web142603.mail.bf1.yahoo.com>
In-Reply-To: <104083AE5AAA634C993249DFCCE4C2030643B92F@CIPRESTE.ua.pt>