You can assign multiple columns at once in base R. Just grab the column
and data
columns,
set.seed(87)
my_data <- data.frame(
column1 = rnorm(10),
column2 = rnorm(10),
column3 = rnorm(10),
data1 = rnorm(10),
data2 = rnorm(10)
)
my_names <- names(my_data)
column_columns <- my_names[startsWith(my_names, "column")]
data_columns <- gsub("column", "data", column_columns)
is_paired <- data_columns %in% my_names
data_columns <- data_columns[is_paired]
column_columns <- column_columns[is_paired]
By building the data
column names using the column
column names, you're sure to match them up correctly, no matter the physical order. Also, it lets you omit any pairs where the data
column doesn't exist.
Now we can make the names of the results
columns, and assign them the results of multiplying each pair. We'll "loop" over the pairs using mapply
.
results_columns <- gsub("column", "results", column_columns)
my_data[, results_columns] <- mapply(
FUN = "*",
my_data[column_columns],
my_data[data_columns]
)
print(my_data, digits = 2)
# column1 column2 column3 data1 data2 results1 results2
# 1 -2.14 -0.6652 0.32 0.044 0.840 -0.094 -0.55875
# 2 -1.72 0.0011 -0.81 1.414 -1.133 -2.437 -0.00128
# 3 -1.87 -0.1985 -0.72 -0.804 -0.479 1.504 0.09501
# 4 -0.92 0.1079 -0.85 -1.000 -0.008 0.915 -0.00087
# 5 1.84 1.5710 0.32 -0.444 1.468 -0.815 2.30646
# 6 -0.10 0.5353 -0.55 -0.250 0.182 0.026 0.09750
# 7 -0.91 0.0737 1.85 0.626 1.383 -0.570 0.10195
# 8 -0.68 -0.6938 -2.13 -0.633 -2.412 0.428 1.67352
# 9 1.01 1.3855 0.55 -0.956 -1.247 -0.964 -1.72745
# 10 -0.55 -0.4814 -0.57 0.710 -0.809 -0.390 0.38933
Edit
I feel silly for missing something I often look for in these type of problems: there's useful data buried in the column names. We can use tidyr::spread
to make "normalized" versions of the column
and data
values.
library(dplyr)
library(tidyr)
set.seed(87)
my_data <- data.frame(
column1 = rnorm(10),
column2 = rnorm(10),
column3 = rnorm(10),
data1 = rnorm(10),
data2 = rnorm(10)
)
# This will help match values later
my_data[["id"]] <- seq_len(nrow(my_data))
column_data <- my_data %>%
select(id, starts_with("column")) %>%
gather(key = "name", value = "column_value", -id) %>%
mutate(number = gsub("column", "", name)) %>%
select(-name)
data_data <- my_data %>%
select(id, starts_with("data")) %>%
gather(key = "name", value = "data_value", -id) %>%
mutate(number = gsub("data", "", name)) %>%
select(-name)
head(column_data)
# id column_value number
# 1 1 -2.1426914 1
# 2 2 -1.7228117 1
# 3 3 -1.8718209 1
# 4 4 -0.9150332 1
# 5 5 1.8356132 1
# 6 6 -0.1044144 1
head(data_data)
# id data_value number
# 1 1 0.04398221 1
# 2 2 1.41446386 1
# 3 3 -0.80356469 1
# 4 4 -0.99960185 1
# 5 5 -0.44377045 1
# 6 6 -0.24976348 1
With these, it's simple to just join and multiply. The result's in a handy format, as well.
results <- column_data %>%
inner_join(data_data, by = c("id", "number")) %>%
mutate(results = column_value * data_value)
head(results)
# id column_value number data_value results
# 1 1 -2.1426914 1 0.04398221 -0.09424031
# 2 2 -1.7228117 1 1.41446386 -2.43685491
# 3 3 -1.8718209 1 -0.80356469 1.50412916
# 4 4 -0.9150332 1 -0.99960185 0.91466888
# 5 5 1.8356132 1 -0.44377045 -0.81459089
# 6 6 -0.1044144 1 -0.24976348 0.02607890
Unless you absolutely need the result to be in the same form as the original data (wide, as opposed to long), I suggest keeping it this way. It's just easier to use down the line.