Regular expression with csplit function for splitting cells in excel

This will take care of the splitting, I will leave the intra-cell line breaks up to you :slight_smile:

# Clear workspace
rm(list=ls())

# Load libraries
library('tidyverse')
library('readxl')
library('openxlsx')

# Set functions
do_split = function(x, pattern = "\\d+\\.\\s{1}"){
  if( is_tibble(x) ){ x = pull(x) }
  num_bullets = x %>% str_extract_all("\\d+\\. ") %>% unlist
  x %>% str_split(pattern) %>% unlist %>% .[.!=""] %>% str_c(num_bullets,.) %>%
    list %>% return
}

# Read data
d = read_excel(path = '~/Desktop/master.xlsx')

# Wrangle data
o = d %>%
  mutate(Result = Result %>% do_split, Steps  = Steps %>% do_split) %>%
  unnest(Result, Steps)

# Write data
write.xlsx(x = o, file =  '~/Desktop/out.xlsx')
3 Likes