In {data.table}, I'm trying to paste multiple columns together but ignore NAs. A bit like how tidyr::unite(..., na.rm=TRUE)
works. See example below. Any thoughts? I'd really like a data.table solution for speed reasons as the real implementation is a big dataset.
library(tidyverse)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:lubridate':
#>
#> hour, isoweek, mday, minute, month, quarter, second, wday, week,
#> yday, year
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(dtplyr)
set.seed(12345)
n = 10
d=sample(c("foo","bar","baz","qux","quux"),n,replace=TRUE)
e=sample(c("foo","bar","baz","qux","quux"),n,replace=TRUE)
f=sample(c("foo","bar",NA),n,replace=TRUE)
DT <- data.table(d=d, e=e, f=f)[, def:=do.call(paste, c(.SD, sep=",")), .SD=d:f]
dfunite <- tibble(d=d, e=e, f=f) %>%
unite("def", d:f, sep=",", remove=FALSE, na.rm=TRUE) %>%
relocate(names(DT))
print(DT)
#> d e f def
#> <char> <char> <char> <char>
#> 1: baz foo <NA> baz,foo,NA
#> 2: bar qux bar bar,qux,bar
#> 3: qux qux bar qux,qux,bar
#> 4: bar bar <NA> bar,bar,NA
#> 5: quux qux foo quux,qux,foo
#> 6: baz baz foo baz,baz,foo
#> 7: bar foo <NA> bar,foo,NA
#> 8: baz quux bar baz,quux,bar
#> 9: bar qux foo bar,qux,foo
#> 10: foo bar foo foo,bar,foo
dfunite
#> # A tibble: 10 × 4
#> d e f def
#> <chr> <chr> <chr> <chr>
#> 1 baz foo <NA> baz,foo
#> 2 bar qux bar bar,qux,bar
#> 3 qux qux bar qux,qux,bar
#> 4 bar bar <NA> bar,bar
#> 5 quux qux foo quux,qux,foo
#> 6 baz baz foo baz,baz,foo
#> 7 bar foo <NA> bar,foo
#> 8 baz quux bar baz,quux,bar
#> 9 bar qux foo bar,qux,foo
#> 10 foo bar foo foo,bar,foo
Created on 2024-12-13 with reprex v2.1.0
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.4.1 (2024-06-14 ucrt)
#> os Windows 10 x64 (build 19045)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.utf8
#> ctype English_United States.utf8
#> tz America/New_York
#> date 2024-12-13
#> pandoc 3.2 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> cli 3.6.3 2024-06-21 [1] CRAN (R 4.4.1)
#> colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.4.1)
#> data.table * 1.15.4 2024-03-30 [1] CRAN (R 4.4.1)
#> digest 0.6.36 2024-06-23 [1] CRAN (R 4.4.1)
#> dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.4.1)
#> dtplyr * 1.3.1 2023-03-22 [1] CRAN (R 4.4.1)
#> evaluate 0.24.0 2024-06-10 [1] CRAN (R 4.4.1)
#> fansi 1.0.6 2023-12-08 [1] CRAN (R 4.4.1)
#> fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.4.1)
#> forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.4.1)
#> fs 1.6.4 2024-04-25 [1] CRAN (R 4.4.1)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.4.1)
#> ggplot2 * 3.5.1 2024-04-23 [1] CRAN (R 4.4.1)
#> glue 1.7.0 2024-01-09 [1] CRAN (R 4.4.1)
#> gtable 0.3.5 2024-04-22 [1] CRAN (R 4.4.1)
#> hms 1.1.3 2023-03-21 [1] CRAN (R 4.4.1)
#> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.4.1)
#> knitr 1.47 2024-05-29 [1] CRAN (R 4.4.1)
#> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.4.1)
#> lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.4.1)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.4.1)
#> munsell 0.5.1 2024-04-01 [1] CRAN (R 4.4.1)
#> pillar 1.9.0 2023-03-22 [1] CRAN (R 4.4.1)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.4.1)
#> purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.4.1)
#> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.4.1)
#> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.4.0)
#> R.oo 1.26.0 2024-01-24 [1] CRAN (R 4.4.0)
#> R.utils 2.12.3 2023-11-18 [1] CRAN (R 4.4.1)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.4.1)
#> readr * 2.1.5 2024-01-10 [1] CRAN (R 4.4.1)
#> reprex 2.1.0 2024-01-11 [1] CRAN (R 4.4.1)
#> rlang 1.1.4 2024-06-04 [1] CRAN (R 4.4.1)
#> rmarkdown 2.27 2024-05-17 [1] CRAN (R 4.4.1)
#> rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.4.1)
#> scales 1.3.0 2023-11-28 [1] CRAN (R 4.4.1)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.4.1)
#> stringi 1.8.4 2024-05-06 [1] CRAN (R 4.4.0)
#> stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.4.1)
#> styler 1.10.3 2024-04-07 [1] CRAN (R 4.4.1)
#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.4.1)
#> tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.4.1)
#> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.4.1)
#> tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.4.1)
#> timechange 0.3.0 2024-01-18 [1] CRAN (R 4.4.1)
#> tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.4.1)
#> utf8 1.2.4 2023-10-22 [1] CRAN (R 4.4.1)
#> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.4.1)
#> withr 3.0.0 2024-01-16 [1] CRAN (R 4.4.1)
#> xfun 0.45 2024-06-16 [1] CRAN (R 4.4.1)
#> yaml 2.3.8 2023-12-11 [1] CRAN (R 4.4.0)
#>
#> [1] C:/Program Files/R/R-4.4.1/library
#>
#> ──────────────────────────────────────────────────────────────────────────────