On Sun, Jun 16, 2013 at 9:00 PM, Nick Matzke <matzke at berkeley.edu> wrote:
Thanks *VERY* much, this is great!
I realized a few more cases, I think I've got something that covers all the
possibilities now:
library(stringr)
tmpstr = "The first number is: 32. Another one is: 32.1. Here's a number in
scientific format, 0.3523e10, and another, 0.3523e-10, and a negative,
-313.1"
patternslist = NULL
p=0
patternslist[[(p=p+1)]] = "(\\d+)" # positive
integer
patternslist[[(p=p+1)]] = "(-\\d+)" # negative
integer
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+)" # positive float
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e\\d+)" # positive float, scientific
w. positive power
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e-\\d+)" # positive float, scientific
w. negative power
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+)" # negative float
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e\\d+)" # negative float, scientific
w. positive power
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e-\\d+)"# negative float, scientific
w. negative power
patternslist[[(p=p+1)]] = "(\\d+e\\d+)" # positive int,
scientific w. positive power
patternslist[[(p=p+1)]] = "(\\d+e-\\d+)" # positive int,
scientific w. negative power
patternslist[[(p=p+1)]] = "(-\\d+e\\d+)" # negative int,
scientific w. positive power
patternslist[[(p=p+1)]] = "(-\\d+e-\\d+)" # negative int,
scientific w. negative power
pattern = paste(patternslist, collapse="|", sep="")
pattern
as.numeric(str_extract_all(tmpstr,pattern)[[1]])
# A more complex string
tmpstr = "The first number is: 32. 342 342.1 -3234e-10 3234e-1 Another
one is: 32.1. Here's a number in scientific format, 0.3523e10, and another,
0.3523e-10, and a negative, -313.1"
#pattern =
"(\\d)+|(-\\d)+|(\\d+\\.\\d+)|(-\\d+\\.\\d+)|(\\d+.\\d+e\\d+)|(\\d+\\.\\d+e-\\d+)|(-\\d+.\\d+e\\d+)|(-\\d+\\.\\d+e-\\d+)"
as.numeric(str_extract_all(tmpstr,pattern)[[1]])
This much simpler single pattern may be good enough:
library(gsubfn)
pat <- "[-+.e0-9]*\\d"
strapplyc(tmpstr, pat)[[1]]
[1] "32" "342" "342.1" "-3234e-10" "3234e-1"
[6] "32.1" "0.3523e10" "0.3523e-10" "-313.1"
strapply(tmpstr, pat, as.numeric)[[1]]
[1] 3.200e+01 3.420e+02 3.421e+02 -3.234e-07 3.234e+02 3.210e+01 3.523e+09
[8] 3.523e-11 -3.131e+02
--
Statistics & Software Consulting
GKX Group, GKX Associates Inc.
tel: 1-877-GKX-GROUP
email: ggrothendieck at gmail.com