pdf1<-"./PLAN de INSPECCIONES/05_seguridad_ciudadana.pdf"
pdf2<-"./PLAN de INSPECCIONES/2013_21SeguridadCiudadana.pdf"
exe<-"./xpdfbin-win-3.04/xpdfbin-win-3.04/bin32/pdftotext.exe"
system(paste("\"", exe, "\" \"", pdf1, "\"", sep = ""), wait = F)
system(paste("\"", exe, "\" \"", pdf2, "\"", sep = ""), wait = F)
txt1<-sub(".pdf", ".txt", pdf1)
txt2<-sub(".pdf", ".txt", pdf2)
d1<-readLines(txt1, encoding="UTF-8")
d1<-iconv(enc2utf8(d1), sub = "byte")
d2<-readLines(txt2, encoding="UTF-8")
d2<-iconv(enc2utf8(d2), sub = "byte")
df<-c(d1,d2)
corpus<-Corpus(VectorSource(df))
d<-tm_map(corpus, content_transformer(tolower))
d<-tm_map(d, stripWhitespace)
d<-tm_map(d, removePunctuation)
sw<-readLines("./StopWords.txt", encoding="UTF-8")
sw<-iconv(enc2utf8(sw), sub="byte")
d<-tm_map(d, removeWords, sw)
d<-tm_map(d, removeWords, stopwords("spanish"))
tdm<-TermDocumentMatrix(d)
m<-as.matrix(tdm)
colnames(m) = c("P05", "P13")