Hi there!
I am trying to create a function that essentially mimics the results I get here, but with different site names. The below dput() is a subset of a much larger dataframe that contains many other sites.
glns2508 <- structure(list(PUBLICATION_DATE = c(42933, 42933, 42933, 42933,
42933, 42933, 42933, 42933, 42933, 42933, 42933, 42933, 42933,
42933, 42933), UID = c(175411, 175411, 175411, 175411, 175411,
175411, 175411, 175411, 175411, 175411, 175411, 175411, 175411,
175411, 175411), SITE_ID = c("GLNS15-2508", "GLNS15-2508", "GLNS15-2508",
"GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508",
"GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508",
"GLNS15-2508", "GLNS15-2508"), DATE_COL = c(42220, 42220, 42220,
42220, 42220, 42220, 42220, 42220, 42220, 42220, 42220, 42220,
42220, 42220, 42220), VISIT_NO = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1), STUDY = c("LK ERIE ENHANCE", "LK ERIE ENHANCE",
"LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE",
"LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE",
"LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE",
"LK ERIE ENHANCE"), PSTL_CODE = c("MI", "MI", "MI", "MI", "MI",
"MI", "MI", "MI", "MI", "MI", "MI", "MI", "MI", "MI", "MI"),
SAMPLE_TYPE = c("HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO",
"HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO",
"HYDRO", "HYDRO", "HYDRO"), LINE = c(1, 2, 3, 4, 5, 6, 7,
23, 22, 21, 20, 19, 18, 17, 0), CAST = c("DOWNCAST", "DOWNCAST",
"DOWNCAST", "DOWNCAST", "DOWNCAST", "DOWNCAST", "DOWNCAST",
"UPCAST", "UPCAST", "UPCAST", "UPCAST", "UPCAST", "UPCAST",
"UPCAST", NA), COL_LOC = c("Surface", "Mid-water", "Mid-water",
"Mid-water", "Mid-water", "Mid-water", "Bottom", "Surface",
"Mid-water", "Mid-water", "Mid-water", "Mid-water", "Mid-water",
"Bottom", NA), DEPTH = c(0.1, 0.5, 1, 2, 3, 4, 4.5, 0.1,
0.5, 1, 2, 3, 4, 4.5, NA), CONDUCTIVITY = c(305.4, 305.4,
305.5, 305.6, 305.7, 305.6, 305.7, 305.4, 305.4, 305.4, 305.4,
305.6, 305.6, 305.7, NA), DO = c(10.53, 10.71, 10.65, 10.47,
10.36, 10.34, 10.28, 10.72, 10.77, 10.74, 10.75, 10.61, 10.56,
10.3, NA), LIGHT_AMB = c(1172, 1064, 1059, 953.2, 1178, 1249,
1204, 1180, 1160, 1146, 1154, 1129, 1203, 1169, NA), LIGHT_UW = c(321.5,
167.2, 93.3, 11.74, 1.5, 0.3, 0.1, 195.4, 113.8, 156.5, 19.6,
2.6, 0.3, 0.07, NA), NCCA_FLAG = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), PH = c(9.1, 9.1, 9.1, 9, 9, 9, 9, 9.1, 9.1,
9.1, 9.1, 9, 9, 9, NA), SALINITY = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), STATION_DEPTH = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5), TEMPERATURE = c(24.4,
24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4,
24.4, 24.4, 24.4, NA), NCCA_COMMENT = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), WATERBODY = c("Lake_Erie", "Lake_Erie", "Lake_Erie",
"Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie",
"Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie",
"Lake_Erie", "Lake_Erie")), row.names = c(NA, -15L), class = c("tbl_df",
"tbl", "data.frame"))
depth.2508 <- glns2508$DEPTH
do.2508 <- glns2508$DO
cond.2508 <- glns2508$CONDUCTIVITY
ph.2508 <- glns2508$PH
temp.2508 <- glns2508$TEMPERATURE
cast.2508 <- glns2508$CAST
glns2508 <- data.frame(cast.2508, depth.2508, do.2508, cond.2508, ph.2508, temp.2508)
colnames(glns2508) <- c("CAST", "DEPTH_METERS", "DO_MG.L", "COND_US", "PH", "TEMP_CELSIUS")
na.omit(glns2508)
Here is the function I am writing to try and get R to repeat this process in my larger dataframe, but with different site names. I have succesfully created a function to subset the sites themselves:
filt <- function(x) {
filter(wq, SITE_ID == x)
}
But when I try to create a function to reorganize the data frame, it returns a character vector of the column names:
org <- function(x,y) {
depth.x <- y["DEPTH"]
do.x <- y["DO"]
cond.x <- y["CONDUCTIVITY"]
ph.x <- y["PH"]
temp.x <- y["TEMPERATURE"]
cast.x <- y["CAST"]
y <- data.frame(cast.x, depth.x, do.x, cond.x, ph.x, temp.x)
colnames(y) <- c('CAST', "DEPTH_METERS", "DO_MG.L", "COND_US", "PH", "TEMP_CELSIUS")
na.omit(y)
}
glns3235 <- org(x="3235", y="glns3235")
Can anyone help me figure out how to do this? I initially tried using the $ operator for the columns, but I received an error message telling me that that operator is invalid for atomic vectors."
Thank you so much!