# I've written a function which prints a list of columns in a data set which I can then paste into a select statement.
# This saves time if I want to specify which columns to print and in what order.
library(tidyverse)
names_pastef <- function(my_data) {
names(my_data) %>%
paste(., collapse = ", ")
}
names_pastef(mtcars)
#> [1] "mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb"
# I want to write a similar function to use when I need to enclose each column in quotes in eg by = () eg
my_data2 <- tibble(mpg = 50,
cyl = 4,
a = 1)
mtcars2 <- mtcars %>%
left_join(my_data2, by = c("mpg", "cyl"))
# I've tried using dQuote
names_paste_quotesf <- function(my_data) {
names(my_data) %>%
paste(., dQuote) %>%
paste(., collapse = ", ")
}
names_paste_quotesf(mtcars2)
#> Error in paste(., dQuote): cannot coerce type 'closure' to vector of type 'character'
this almost does it
names_paste_quotesf <- function(my_data) {
names(my_data) %>%
paste0(., collapse = '", "') %>%
cat(.)
}
names_paste_quotesf(mtcars2)
mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear", "carb", "a
The select
function can take strings, so you could do the following instead of your first function:
mtcars %>% select(names(mtcars)[c(6,4,2)])
mtcars %>% select(3,1)
iris %>% select(starts_with("Petal"))
iris %>% select(matches("Wid|Spec"))
# Or if you want to shorten it with a function
f = function(d, ...) {
d %>% select(...)
}
mtcars %>% f(names(mtcars)[c(6,4,2)])
mtcars %>% f(3,1)
iris %>% f(starts_with("Petal"))
iris %>% f(matches("Wid|Spec"))
For the joining variables, you can also just return a string vector with the desired join columns. The intersect
function will return a vector containing the column names that two data frames have in common (for example, intersect(names(mtcars), names(my_data2))
, but left_join
already joins by common column names automatically. For example, all of these do the same thing:
left_join(mtcars, my_data2)
left_join(mtcars, my_data2, by=c("mpg", "cyl"))
join_cols = function(x, y) {
intersect(names(x), names(y))
}
mtcars %>%
left_join(my_data2, by = join_cols(., my_data2))
Do you have particular uses case in mind? We can help you construct a function to do the job if you can say more about what you're trying to accomplish.
Here's an option (EDIT: this is not what you want) that lets you choose which members of the names you want to keep. By default it keeps all of them.
library(tidyverse)
paste_names <- function(x, n = NULL) {
if (is.null(n)) n <- 1:length(x)
x %>%
names() %>%
`[`(n) %>%
paste(collapse = "', '") %>%
paste0("'", ., "'")
}
paste_names(mtcars, 1:5)
#> [1] "'mpg', 'cyl', 'disp', 'hp', 'drat'"
Created on 2020-10-07 by the reprex package (v0.3.0)
EDIT: ignore this, it doesn't actually work, because of course the collapse
makes it into a single string not a vector
library(tidyverse)
paste_names <- function(x, n = NULL) {
if (is.null(n)) n <- 1:length(x)
x %>%
names() %>%
`[`(n) %>%
paste(collapse = "', '") %>%
paste0("'", ., "'")
}
sel <- paste_names(mtcars, 1:5)
select(mtcars, all_of(sel))
#> Error: Can't subset columns that don't exist.
#> x Column `'mpg', 'cyl', 'disp', 'hp', 'drat'` doesn't exist.
Created on 2020-10-07 by the reprex package (v0.3.0)
this works:
library(tidyverse)
paste_names <- function(x, n = NULL) {
if (is.null(n)) n <- 1:length(x)
x %>%
names() %>%
`[`(n)
}
sel <- paste_names(mtcars, 1:5)
sel
#> [1] "mpg" "cyl" "disp" "hp" "drat"
select(head(mtcars), all_of(sel))
#> mpg cyl disp hp drat
#> Mazda RX4 21.0 6 160 110 3.90
#> Mazda RX4 Wag 21.0 6 160 110 3.90
#> Datsun 710 22.8 4 108 93 3.85
#> Hornet 4 Drive 21.4 6 258 110 3.08
#> Hornet Sportabout 18.7 8 360 175 3.15
#> Valiant 18.1 6 225 105 2.76
but then you might as well just go back to
select(mtcars, 1:5)
it’s no different. It's just circular.
Created on 2020-10-07 by the reprex package (v0.3.0)
This is a function that will produce a named vector that can be used as the by
argument to the dplyr::*_join
functions. Is that what you were after?
library(dplyr, warn.conflicts = FALSE)
# names for the named vector need to come from the *left-hand df* ; df1, here
make_bylist <- function(df1, df2, right_vars, left_vars = NULL) {
# ought to check that if (!is.null(left_vars)) that length(left_vars) == length(right_vars)
if (!is.null(left_vars)) {
bylistnames <- names(df1)[left_vars]
} else {
bylistnames <- NULL
}
bylist <- names(df2)[right_vars]
names(bylist) <- bylistnames
return(bylist)
}
a <- tibble(a = 1:5, b = letters[1:5], c = c("Sleepy", "Grumpy", "Dopey", "Sneezy", "Bashful"))
b <- tibble(d = letters[3:9], e = c("Dopey", "Sneezy", "Grumpy", "Bashful", "Sleepy", "Happy", "Doc"), f = month.abb[3:9])
bylist <- make_bylist(a, b, 1:2, 2:3)
bylist
#> b c
#> "d" "e"
left_join(a, b, by = bylist)
#> # A tibble: 5 x 4
#> a b c f
#> <int> <chr> <chr> <chr>
#> 1 1 a Sleepy <NA>
#> 2 2 b Grumpy <NA>
#> 3 3 c Dopey Mar
#> 4 4 d Sneezy Apr
#> 5 5 e Bashful <NA>
Created on 2020-10-07 by the reprex package (v0.3.0)
Thanks very much. These tips will certainly save me time when I'm writing code.
I really like iris %>% select(matches("Wid|Spec")) and iris %>% select(starts_with("Petal"))
I hadn't realised that left_join joins by common column names automatically and says what it's done on the log.
There are other occasions when I'm typing that I need to enclose column names in quotes eg using pivot_wider id_cols a nonsensical example is
library(tidyverse)
mtcars2 <- rownames_to_column(mtcars, "model")
chkme <- mtcars2 %>%
pivot_wider(id_cols = c("model", "mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am"),
names_from = gear,
values_from = carb)
I often find myself having to change something like:
mpg cyl disp hp drat wt qsec vs am gear carb
to
"model", "mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am"
when I'm writing a program.
I want to make my programs as robust and explicit as possible.
Thanks very much.
Sorry my question wasn't clear
What I'm really after is a function which prints the names of the columns in data set like this
"mpg" , "cyl" , "disp", "hp", "drat"
so that I can copy it (eg using Ctrl C) and paste it directly into eg c() in my program eg
c("mpg" , "cyl" , "disp", "hp", "drat")
almost - thank you
I just need a , between each column name
ie
"mpg", "cyl", "disp", "hp", "drat"
names_paste_quotesf <- function(my_data) {
paste0("'",
paste0(names(my_data), collapse = "', '"),
"'") %>% cat
}
names_paste_quotesf(mtcars2)
#'model', 'mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb'
Thanks very much
Helen
Ah, so my first response was right after all! (Except I didn't pipe it to cat()
like @nirgrahamuk did).
I didn't realise you wanted to just copy and paste the text.
There are occasions where you might want to put quotation marks around bare column names, but I don't think it's necessary in the use cases you've described. pivot_wider
and pivot_longer
don't require (but will accept) quotation marks around column names. They use the same tidyselect syntax as other tidyverse functions. For example, you can do:
# The id_cols argument is unnecessary for most uses of pivot_wider
mtcars2 = mtcars %>%
rownames_to_column(var="model") %>%
arrange(gear) %>%
pivot_wider(names_from = gear, values_from = carb)
mtcars2 %>%
pivot_longer(cols=`3`:`5`, names_to="gear", values_to="carb",
values_drop_na=TRUE)
You can also use tidyselect syntax to reference the columns you don't want to include, which can sometimes be easier. For example:
mtcars2 %>%
pivot_longer(cols=-(model:am), names_to="gear", values_to="carb",
values_drop_na=TRUE)
mtcars %>% select(-c(mpg, hp, vs))
Sorry I wasn't clearer. It's the first time I've asked a question and the response has been amazing
Thanks very much for all your help. I wasn't aware of tidyselect syntax.
Not a problem! Enjoy R
This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.
If you have a query related to it or one of the replies, start a new topic and refer back with a link.