Text to columns - Strings as factors?

Hi,
I have this simple file:

data.frame(stringsAsFactors=FALSE,
         URN = c("A", "B", "C", "D", "E", "F", "G", "H"),
    QUESTION = c("Timeliness", "Recommendation", "Timeliness", "Staff",
                 "Recommendation", "Recommendation", "Choice Dealer",
                 "Fixed Right First Time"),
    VERBATIM = c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh"),
    CONCEPTS = c("-[5|70|81]\t-[9|0|70]",
                 "+[23|427|99]\t-[32|0|103]\t-[32|294|21]\t-[32|962|92]\t-[35|0|103]\t-[35|294|21]\t-[35|669|156]\t-[38|555|114]\t-[5|1159|103]",
                 "-[10|0|216]\t-[32|0|216]\t-[32|216|215]\t-[48|431|84]\t-[5|431|84]", "-[38|0|63]\t-[38|176|38]\t-[5|95|81]",
                 "-[5|0|240]\t-[9|0|240]", "-[5|128|50]", NA, NA)
)

I have looked at some solutions such as this: Split text column into two parts or that: Split data: text to columns

but my challenge is slightly different:
All I need is splitting CONCEPTS into separate columns using following dividers:

  1. |
  2. [
  3. ]
  4. space

Basically I need +,- or a number

I'm sure the tack is easy but I have different length of each record...

Can you help?

Separating this into columns doesn't make sense to me because they have different lengths. What do the column names would represent? As an alternative you could separate it into rows and continue reshaping from there.

df <- data.frame(stringsAsFactors=FALSE,
           URN = c("A", "B", "C", "D", "E", "F", "G", "H"),
           QUESTION = c("Timeliness", "Recommendation", "Timeliness", "Staff",
                        "Recommendation", "Recommendation", "Choice Dealer",
                        "Fixed Right First Time"),
           VERBATIM = c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh"),
           CONCEPTS = c("-[5|70|81]\t-[9|0|70]",
                        "+[23|427|99]\t-[32|0|103]\t-[32|294|21]\t-[32|962|92]\t-[35|0|103]\t-[35|294|21]\t-[35|669|156]\t-[38|555|114]\t-[5|1159|103]",
                        "-[10|0|216]\t-[32|0|216]\t-[32|216|215]\t-[48|431|84]\t-[5|431|84]", "-[38|0|63]\t-[38|176|38]\t-[5|95|81]",
                        "-[5|0|240]\t-[9|0|240]", "-[5|128|50]", NA, NA)
)

library(tidyverse)
df %>%
    separate_rows(CONCEPTS, sep = "[\\[\\|\\]]") %>% 
    mutate(CONCEPTS = str_remove(CONCEPTS, "\t")) %>% 
    filter(!is.na(CONCEPTS), CONCEPTS != "")
#>    URN       QUESTION VERBATIM CONCEPTS
#> 1    A     Timeliness      aaa        -
#> 2    A     Timeliness      aaa        5
#> 3    A     Timeliness      aaa       70
#> 4    A     Timeliness      aaa       81
#> 5    A     Timeliness      aaa        -
#> 6    A     Timeliness      aaa        9
#> 7    A     Timeliness      aaa        0
#> 8    A     Timeliness      aaa       70
#> 9    B Recommendation      bbb        +
#> 10   B Recommendation      bbb       23
#> 11   B Recommendation      bbb      427
#> 12   B Recommendation      bbb       99
#> 13   B Recommendation      bbb        -
#> 14   B Recommendation      bbb       32
#> 15   B Recommendation      bbb        0
#> 16   B Recommendation      bbb      103
#> 17   B Recommendation      bbb        -
#> 18   B Recommendation      bbb       32
#> 19   B Recommendation      bbb      294
#> 20   B Recommendation      bbb       21
#> 21   B Recommendation      bbb        -
#> 22   B Recommendation      bbb       32
#> 23   B Recommendation      bbb      962
#> 24   B Recommendation      bbb       92
#> 25   B Recommendation      bbb        -
#> 26   B Recommendation      bbb       35
#> 27   B Recommendation      bbb        0
#> 28   B Recommendation      bbb      103
#> 29   B Recommendation      bbb        -
#> 30   B Recommendation      bbb       35
#> 31   B Recommendation      bbb      294
#> 32   B Recommendation      bbb       21
#> 33   B Recommendation      bbb        -
#> 34   B Recommendation      bbb       35
#> 35   B Recommendation      bbb      669
#> 36   B Recommendation      bbb      156
#> 37   B Recommendation      bbb        -
#> 38   B Recommendation      bbb       38
#> 39   B Recommendation      bbb      555
#> 40   B Recommendation      bbb      114
#> 41   B Recommendation      bbb        -
#> 42   B Recommendation      bbb        5
#> 43   B Recommendation      bbb     1159
#> 44   B Recommendation      bbb      103
#> 45   C     Timeliness      ccc        -
#> 46   C     Timeliness      ccc       10
#> 47   C     Timeliness      ccc        0
#> 48   C     Timeliness      ccc      216
#> 49   C     Timeliness      ccc        -
#> 50   C     Timeliness      ccc       32
#> 51   C     Timeliness      ccc        0
#> 52   C     Timeliness      ccc      216
#> 53   C     Timeliness      ccc        -
#> 54   C     Timeliness      ccc       32
#> 55   C     Timeliness      ccc      216
#> 56   C     Timeliness      ccc      215
#> 57   C     Timeliness      ccc        -
#> 58   C     Timeliness      ccc       48
#> 59   C     Timeliness      ccc      431
#> 60   C     Timeliness      ccc       84
#> 61   C     Timeliness      ccc        -
#> 62   C     Timeliness      ccc        5
#> 63   C     Timeliness      ccc      431
#> 64   C     Timeliness      ccc       84
#> 65   D          Staff      ddd        -
#> 66   D          Staff      ddd       38
#> 67   D          Staff      ddd        0
#> 68   D          Staff      ddd       63
#> 69   D          Staff      ddd        -
#> 70   D          Staff      ddd       38
#> 71   D          Staff      ddd      176
#> 72   D          Staff      ddd       38
#> 73   D          Staff      ddd        -
#> 74   D          Staff      ddd        5
#> 75   D          Staff      ddd       95
#> 76   D          Staff      ddd       81
#> 77   E Recommendation      eee        -
#> 78   E Recommendation      eee        5
#> 79   E Recommendation      eee        0
#> 80   E Recommendation      eee      240
#> 81   E Recommendation      eee        -
#> 82   E Recommendation      eee        9
#> 83   E Recommendation      eee        0
#> 84   E Recommendation      eee      240
#> 85   F Recommendation      fff        -
#> 86   F Recommendation      fff        5
#> 87   F Recommendation      fff      128
#> 88   F Recommendation      fff       50

Created on 2019-09-18 by the reprex package (v0.3.0.9000)

1 Like

Maybe this makes more sense, although a little bit of context would help to understand the goal.

library(tidyverse)
df %>%
    separate_rows(CONCEPTS, sep = "\t") %>% 
    separate(CONCEPTS, into = c("Sign", "N1", "N2", "N3"), sep = "\\[|\\||\\]", extra = "drop")
#>    URN               QUESTION VERBATIM Sign   N1   N2   N3
#> 1    A             Timeliness      aaa    -    5   70   81
#> 2    A             Timeliness      aaa    -    9    0   70
#> 3    B         Recommendation      bbb    +   23  427   99
#> 4    B         Recommendation      bbb    -   32    0  103
#> 5    B         Recommendation      bbb    -   32  294   21
#> 6    B         Recommendation      bbb    -   32  962   92
#> 7    B         Recommendation      bbb    -   35    0  103
#> 8    B         Recommendation      bbb    -   35  294   21
#> 9    B         Recommendation      bbb    -   35  669  156
#> 10   B         Recommendation      bbb    -   38  555  114
#> 11   B         Recommendation      bbb    -    5 1159  103
#> 12   C             Timeliness      ccc    -   10    0  216
#> 13   C             Timeliness      ccc    -   32    0  216
#> 14   C             Timeliness      ccc    -   32  216  215
#> 15   C             Timeliness      ccc    -   48  431   84
#> 16   C             Timeliness      ccc    -    5  431   84
#> 17   D                  Staff      ddd    -   38    0   63
#> 18   D                  Staff      ddd    -   38  176   38
#> 19   D                  Staff      ddd    -    5   95   81
#> 20   E         Recommendation      eee    -    5    0  240
#> 21   E         Recommendation      eee    -    9    0  240
#> 22   F         Recommendation      fff    -    5  128   50
#> 23   G          Choice Dealer      ggg <NA> <NA> <NA> <NA>
#> 24   H Fixed Right First Time      hhh <NA> <NA> <NA> <NA>
1 Like

Absolutely amazing!!! I love R more and more!!!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.