Skip to content

data formatting

1 message · arun

#
HI Eliza,

Suppose you have 147 data files in the same working directory.?? Here, I am using "Eliza1.txt" and a modified "Eliza2.txt" (attached).
list.files()
#[1] "Eliza1.txt" "Eliza2.txt"

lapply(list.files(),function(i) str_count(gsub(" $","",readLines(i))," ")) #count the spaces.? Used gsub as there were spaces at the end (possibly due to formatting error) #which was removed.? If there are no spaces at the end, you don't need ?gsub()
#[[1]]
?#[1] 7 7 7 7 6 7 7 7 7 7 6 6 7 7 7 7 6 7 7 7 7 7 6 6
#
#[[2]]
# [1] 7 7 7 7 6 7 7 7 7 7 6 6 7 7 7 7 6 7 7 7 7 7 6 6


res<- lapply(list.files(),function(i) {Lines2<-gsub(" $","",readLines(i));Lines2[str_count(Lines2," ")==7]<- str_replace(Lines2[str_count(Lines2," ")==7],"\\s+","???? ");Lines2[str_count(Lines2," ")==6]<- str_replace(Lines2[str_count(Lines2," ")==6],"\\s+","??? ");substr(Lines2[substr(Lines2,6,6)==0|substr(Lines2,9,9)==0],6,6)<-" ";substr(Lines2[substr(Lines2,6,6)==0|substr(Lines2,9,9)==0],9,9)<-" ";Lines2})

names(res)<-gsub("\\..*","",list.files())
res
#$Eliza1
# [1] "1911. 1. 1???? 7.87" "1911. 1. 2???? 9.26" "1911. 1. 3???? 8.06"
# [4] "1911. 1. 4???? 8.13" "1911. 1. 5??? 12.90" "1911. 2. 6???? 5.45"
# [7] "1911. 2. 7???? 3.26" "1911. 3. 8???? 5.70" "1911. 3. 9???? 9.24"
#[10] "1911. 4.10???? 7.60" "1911. 5.11??? 14.82" "1911. 5.12??? 14.10"
#[13] "1911. 6.13???? 7.87" "1911. 6.14???? 9.26" "1911. 7.15???? 8.06"
#[16] "1911. 7.16???? 8.13" "1911. 8.17??? 12.90" "1911. 8.18???? 5.45"
#[19] "1911. 9.19???? 3.26" "1911. 9.20???? 5.70" "1911.10.21???? 9.24"
#[22] "1911.10.22???? 7.60" "1911.11.23??? 14.82" "1911.12.24??? 14.10"

#$Eliza2
# [1] "1911. 1. 1???? 4.87"? "1911. 1. 2???? 11.26" "1911. 1. 3???? 6.06" 
# [4] "1911. 1. 4???? 8.13"? "1911. 1. 5??? 11.90"? "1911. 2. 6???? 5.55" 
# [7] "1911. 2. 7???? 3.16"? "1911. 3. 8???? 5.10"? "1911. 3. 9???? 9.34" 
#[10] "1911. 4.10???? 7.10"? "1911. 5.11??? 14.92"? "1911. 5.12??? 14.20" 
#[13] "1911. 6.13???? 7.77"? "1911. 6.14???? 9.36"? "1911. 7.15???? 8.66" 
#[16] "1911. 7.16???? 8.23"? "1911. 8.17??? 11.90"? "1911. 8.18???? 15.45"
#[19] "1911. 9.19???? 13.26" "1911. 9.20???? 15.77" "1911.10.21???? 19.34"
#[22] "1911.10.22???? 7.66"? "1911.11.23??? 14.84"? "1911.12.24??? 14.11" 
?lapply(res,function(x) str_count(x," "))
#$Eliza1
# [1] 7 7 7 7 6 7 7 7 7 6 5 5 6 6 6 6 5 6 6 6 5 5 4 4

#$Eliza2
# [1] 7 7 7 7 6 7 7 7 7 6 5 5 6 6 6 6 5 6 6 6 5 5 4 4
Hope this helps.
A.K.