dates and time series management
Hi,
Try this:
lstf1<- list.files(pattern=".txt")
length(lstf1)
#[1] 119
fun2<- function(lstf){
?lst1<-lapply(lstf,function(x) readLines(x))
?lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
?lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
?lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
?lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
?lst6<- lapply(lst5,function(x) x[!is.na(x$V1),])
?lst7<- lapply(lst6,function(x) {
???????????????????? if((min(x$V1)>1961)|(max(x$V1)<2005)){
???????????????????????? n1<- (min(x$V1)-1961)*12
???????????????????????? x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
???????????????????????? n2<- (2005-max(x$V1))*12
???????????????????????? x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
???????????????????????? x3<- rbind(x1,x,x2)
??????????????????????? }
????????????????????????? else {
??????????????????? x
??????????????????? } })
lst8<- lapply(lst7,function(x) data.frame(col1=unlist(x[,-c(1:2)])))
???? lst9<- lapply(seq_along(lst8),function(i){
??????????????????????? x<- lst8[[i]]
??????????????????????? colnames(x)<- lstf1[i]
??????????????????????? row.names(x)<- 1:nrow(x)
??????????????????????? x
??????????????????????? })
?do.call(cbind,lst9)}
res<-fun2(lstf1)
dim(res)
#[1] 16740?? 119
res[1:5,1:3]
?# dt3011120.txt dt3011240.txt dt3011887.txt
#1????????? 1.67??????????? NA????????? 0.17
#2????????? 0.00??????????? NA????????? 0.28
#3????????? 0.00??????????? NA????????? 0.00
#4????????? 0.00??????????? NA????????? 0.30
#5????????? 0.00??????????? NA????????? 0.00
########################################
There are some formatting issues in your files:
For eg. If I run the function line by line:
?lst1<-lapply(lstf1,function(x) readLines(x))
sapply(lst1,function(x) any(grepl("\\d+-9999.99",x)))
? [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[37]? TRUE FALSE? TRUE FALSE FALSE? TRUE FALSE FALSE FALSE FALSE FALSE FALSE
?[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[73] FALSE FALSE FALSE FALSE FALSE? TRUE FALSE FALSE FALSE FALSE? TRUE FALSE
?[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE? TRUE
?[97] FALSE FALSE FALSE FALSE FALSE FALSE? TRUE? TRUE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE? TRUE FALSE FALSE FALSE FALSE FALSE? TRUE
###means some rows in the a few files have:
#-9999.99 0 0 0 0.00-9999.99 0 0.00-9999.99 0 0 0 0.00-9999.99 (no space before -9999.99)
?lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
sapply(lst2,function(x) any(grepl("\\d+-9999.99",x))) #still a few files had the problem
? [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
?[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE? TRUE FALSE
?[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE? TRUE
?[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
any(sapply(lst3,function(x) any(grepl("\\d+-9999.99",x))))
#[1] FALSE
lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
any(sapply(lst4,function(x) any(sapply(x,is.character))))
#[1] FALSE
?lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
lst6<- lapply(lst5,function(x) x[!is.na(x$V1),])
sapply(lst6,nrow)
?# [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [19] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [73] 540 540 540 540 528 492 528 540 348 540 540 480 540 540 540 540 540 540
# [91] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 528 540 540 540
#[109] 540 540 540 540 540 540 540 540 540 468 540
???? lst7<- lapply(lst6,function(x) {
???????????????????? if((min(x$V1)>1961)|(max(x$V1)<2005)){
???????????????????????? n1<- (min(x$V1)-1961)*12
???????????????????????? x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
???????????????????????? n2<- (2005-max(x$V1))*12
???????????????????????? x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
???????????????????????? x3<- rbind(x1,x,x2)
??????????????????????? }
????????????????????????? else {
??????????????????? x
??????????????????? } })
?sapply(lst7,nrow)
#? [1] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [19] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [37] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [55] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [73] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
# [91] 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540 540
#[109] 540 540 540 540 540 540 540 540 540 540 540
Hope this helps.
A.K.
From: Zilefac Elvis <zilefacelvis at yahoo.com>
To: arun <smartpink111 at yahoo.com>
Sent: Wednesday, June 5, 2013 2:05 AM
Subject: Re: dates and time series management
To: arun <smartpink111 at yahoo.com>
Sent: Wednesday, June 5, 2013 2:05 AM
Subject: Re: dates and time series management
Hi A.K, Sorry my internet connection was so bad last evening. I have attached all the files as .zip. Below is the output you requested. As I explained, the start date in 'res' should be 1961 and end date should be 2005 in all 119 files. Thanks A.K > lapply(lst1,head,3) [[1]] ? V1.V2.V3.V4.V5.V6.V7.V8.V9.V10.V11.V12.V13.V14.V15.V16.V17.V18.V19.V20.V21.V22.V23.V24.V25.V26.V27.V28.V29.V30.V31.V32.V33 1 ? ? ? ? ? ? ? ? ? ? ? ?1915 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 2 ? ? ? ? ? ? ? ? ? ? ? ?1915 2 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 3 ? ? ? ? ? ? ? ? ? ? ? ?1915 3 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA [