Skip to content
Prev 355329 / 398500 Next

Regex: Combining sub/grepl with ifelse

On Oct 9, 2015, at 4:21 PM, Boris Steipe wrote:

            
That logic actually simplifies the regex strategy as well:

 sub("(.*[ \n])([-A-Z0-9]{6,12})(.*)", "\\2",
 ripley.tv$producto,
 ignore.case = T)

 
Almost succeeds, with a few all-character words, but if you require one number in the middle you get full results:

 sub("(.*[ \n])([-A-Z0-9]{3,6}[0-9][-A-Z0-9]{2,6})(.*)", "\\2",
 ripley.tv$producto,
 ignore.case = T)

 [1] "48J6400"     "40J5300"     "TC-40CS600L" "LE28F6600"   "LE40K5000N" 
 [6] "LE32B7000"   "LE32K5000N"  "LE55B8000"   "LE40B8000"   "LE24B8000"  
[11] "TC-42AS610"  "LE50K5000N"  "40JU6500"    "48JU6500"    "50JU6500"   
[16] "55JS9000"    "55JU6500"    "55JU6700"    "55JU7500"    "65JS9000"   
[21] "65JU6500"    "65JU7500"    "75JU6500"    "40LF6350"    "42LF6400"   
[26] "42LF6450"    "49LF6450"    "LF6400"      "43UF6750"    "49UF6750"   
[31] "UF6900"      "49UF7700"    "49UF8500"    "55UF7700"    "65UF7700"   
[36] "55UF8500"    "TC-55CX640W" "TC-50CX640W" "70UF7700"    "UG8700"     
[41] "LF6350"      "KDL-50FA95C" "KDL50W805C"  "KDL-40R354B" "40J5500"    
[46] "50J5500"     "32JH4005"    "50J5300"     "48J5300"     "40J6400"    
[51] "KDL-32R505C" "KDL-40R555C" "55J6400"     "40JH5005"    "43LF5410"   
[56] "32LF585B"    "49LF5900"    "KDL-65W855C" "UN48J6500"   "LE40F1551"  
[61] "TC-32AS600L" "KDL-32R304B" "55EC9300"    "LE32W454F"   "58UF8300"   
[66] "KDL-55W805C" "XBR-49X835C" "XBR-55X855C" "XBR-65X905C" "XBR-75X945C"
[71] "XBR-55X905C" "LC60UE30U"   "LC70UE30U"   "LC80UE30U"   "48J5500"    
[76] "79UG8800"    "65UF9500"    "65UF8500"    "55UF9500"    "32J4300"    
[81] "KDL-48R555C" "55UG8700"    "60UF8500"    "55LF6500"    "32LF550B"   
[86] "47LB5610"    "TC-50AS600L" "XBR-55X855B" "LC70SQ17U"   "XBR-79X905B"
[91] "TC-40A400L"  "XBR-70X855B" "55HU8700"    "LE40D3142"   "TC-42AS650L"
[96] "LC70LE660"   "LE58D3140"
David Winsemius
Alameda, CA, USA