Skip to content
Prev 324810 / 398503 Next

dates and time series management

Hi,
Try this:
lstf1<- list.files(pattern=".txt")
length(lstf1)
#[1] 119
#I changed the function a little bit to unlist by rows to match the dates column I created.

fun2<- function(lstf){
?lst1<-lapply(lstf,function(x) readLines(x))
?lst2<-lapply(lst1,function(x) {gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
?lst3<-lapply(lst2,function(x) {x<-gsub("(\\d+)(-9999.99)","\\1 \\2",x)})
?lst4<- lapply(lst3,function(x) read.table(text=x,header=TRUE,stringsAsFactors=FALSE,sep="",fill=TRUE))
?lst5<- lapply(lst4,function(x) x[x$V1>=1961 & x$V1<=2005,])
?lst6<- lapply(lst5,function(x) x[!is.na(x$V1),])
?lst7<- lapply(lst6,function(x) {
???????????????????? if((min(x$V1)>1961)|(max(x$V1)<2005)){
???????????????????????? n1<- (min(x$V1)-1961)*12
???????????????????????? x1<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n1))
???????????????????????? n2<- (2005-max(x$V1))*12
???????????????????????? x2<- as.data.frame(matrix(NA,ncol=ncol(x),nrow=n2))
???????????????????????? x3<- rbind(x1,x,x2)
??????????????????????? }
????????????????????????? else {
??????????????????? x
??????????????????? } })
??? lst8<-lapply(lst7,function(x) data.frame(col1=unlist(data.frame(t(x)[-c(1:2),]),use.names=FALSE))) ####changed
???? lst9<- lapply(seq_along(lst8),function(i){
??????????????????????? x<- lst8[[i]]
??????????????????????? colnames(x)<- lstf1[i]
??????????????????????? row.names(x)<- 1:nrow(x)
??????????????????????? x
??????????????????????? })
?do.call(cbind,lst9)}
res<-fun2(lstf1)
dim(res)
#[1] 16740?? 119
?res[res==-9999.99]<-NA
which(res==-9999.99)
#integer(0)

dates1<-seq.Date(as.Date('1Jan1961',format="%d%b%Y"),as.Date('31Dec2005',format="%d%b%Y"),by="day")
dates2<- as.character(dates1)
sldat<- split(dates2,list(gsub("-.*","",dates2)))
?lst11<-lapply(sldat,function(x) lapply(split(x,gsub(".*-(.*)-.*","\\1",x)), function(y){x1<-as.numeric(gsub(".*-.*-(.*)","\\1",y));if((31-max(x1))>0) {x2<-seq(max(x1)+1,31,1);x3<-paste0(unique(gsub("(.*-.*-).*","\\1",y)),x2);c(y,x3)} else y} ))
any(sapply(lst1,function(x) any(lapply(x,length)!=31)))
#[1] FALSE
lst22<-lapply(lst11,function(x) unlist(x,use.names=FALSE))
sapply(lst22,length)
#1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 
# 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372 
#1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 
# 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372 
#1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 
# 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372? 372 

?dates3<-unlist(lst22,use.names=FALSE)
?length(dates3)
#[1] 16740
res1<- data.frame(dates=dates3,res,stringsAsFactors=FALSE)
str(res1)
'data.frame':??? 16740 obs. of? 120 variables:
?$ dates??????? : chr? "1961-01-01" "1961-01-02" "1961-01-03" "1961-01-04" ...
?$ dt3011120.txt: num? 1.67 0 0 0 0 0 4.17 0 0 0 ...
?$ dt3011240.txt: num? NA NA NA NA NA NA NA NA NA NA ...
?$ dt3011887.txt: num? 0.17 0.28 0 0.3 0 0 1.78 0 0.3 0 ...
?$ dt3012205.txt: num? 0.34 0.21 0 0.51 0 0 2.82 0 0.3 0 ...
-----------------------------------------------------------
res1$dates<-as.Date(res1$dates)
?res2<-res1[!is.na(res1$dates),]
res2[1:3,1:3]
#?????? dates dt3011120.txt dt3011240.txt
#1 1961-01-01????????? 1.67??????????? NA
#2 1961-01-02????????? 0.00??????????? NA
#3 1961-01-03????????? 0.00??????????? NA
?dim(res2)
#[1] 16436?? 120

Now, you can try the reshape() and the zoo().
Hope it helps.
A.K.
Message-ID: <1370474203.51290.YahooMailNeo@web142603.mail.bf1.yahoo.com>
In-Reply-To: <1370467035.61051.YahooMailNeo@web160603.mail.bf1.yahoo.com>