I have a large data set that has the following columns: Month, Day, Hour, customer names(6 customers). I need to get the second largest value to each customer for the day. In Excel this would be done using a combination of the LARGE and IF function however my dataset is massive so I am having to do this in RStudio but have no idea where to start. Below is my example starting data set:
structure(list(Month = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
), Day = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), Hour = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24), 4654128
= c(4.6, 6.79, 4.58,
7.65, 2.36, 6.88, 4.99, 5.15, 4.91, 3.14, 3.83, 8.43, 9.37, 4.8,
4.9, 0.01, 3.77, 7.07, 2.27, 4.12, 9.41, 4.75, 5.46, 2.38, 0.87,
0.92, 2.04, 2.35, 1.64, 4.73, 7.66, 8.29, 3.97, 9.47, 0.1, 1.17,
6.08, 8.34, 6.71, 7.13, 0.86, 7.53, 9.98, 7.06, 5.54, 2.7, 8.89,
3.15, 9.1, 0.69, 4.15, 1.06, 5.73, 5.67, 5.42, 8.98, 4.4, 8.49,
2.34, 7.53, 7.03, 0.46, 2.53, 9.03, 6.89, 5.99, 8.55, 8.45, 9.77,
9.16, 5.54, 2.64, 4.89, 9.48, 6.35, 3.42, 2.86, 9.43, 8.75, 7.24,
2.06, 4.75, 7.48, 4.1, 7.33, 1.52, 7.74, 2.66, 0.33, 7.09, 5.1,
5.77, 0.42, 6.09, 2.9, 7.31), 28598546541
= c(9.77, 7.21, 2.57,
0.04, 3.86, 7.68, 2.71, 6.8, 7.88, 2.97, 7.52, 2.59, 5.66, 2.8,
0.6, 9.86, 1.34, 7.05, 6.84, 1.7, 5.65, 8.21, 5.27, 9.57, 2.03,
9.84, 5, 7.19, 3.29, 4.46, 1.03, 2.11, 9.94, 4.26, 1.26, 9.18,
4.13, 8.31, 9.3, 7.41, 3.77, 1.8, 5.31, 5.29, 8.09, 1.56, 9.09,
7.43, 1.82, 8, 7.41, 2.86, 6.9, 8.1, 6.7, 2.82, 9.69, 7.49, 7.08,
0.45, 2.46, 6.06, 8.3, 4.2, 3.68, 1.44, 7.27, 5.55, 6.87, 6.6,
8.52, 8.66, 0.56, 0.84, 9.5, 4.02, 0.98, 7.63, 2.77, 9.55, 7.23,
6.57, 9.23, 6.41, 5.86, 9.22, 3.45, 0.33, 3.26, 0.6, 9.72, 7.3,
1.28, 9.06, 7.99, 7.32), 1546541655
= c(3.12, 9.61, 5.47, 1.64,
4.29, 4.69, 7.06, 5.25, 5.92, 9.06, 9.79, 0.28, 2.24, 8.23, 5.22,
9.32, 8.29, 4.29, 8.17, 8.74, 8.63, 2.78, 3.18, 0.46, 1.49, 4.22,
8.27, 0.64, 0.92, 9.83, 2.12, 6.87, 1.31, 3.6, 0.41, 2.97, 0.49,
6.12, 0.43, 2.35, 4.84, 7.13, 2.43, 2.03, 4.29, 8.24, 3.1, 9.21,
4.43, 3.3, 0.41, 7.99, 3.43, 2.36, 9.07, 2.27, 5.66, 3.93, 6.09,
0.81, 2.03, 6.26, 2.87, 8.78, 5.57, 4.06, 8.15, 0.47, 5.2, 2.56,
7.22, 8.82, 3.01, 0.27, 6.87, 4.03, 3.48, 2.75, 6.5, 7.43, 0.53,
5.17, 3.08, 3.28, 2.25, 0.69, 0.97, 4.31, 9.31, 3.23, 9.02, 9.95,
5.24, 9.54, 3.71, 1.04), 41684561
= c(4.19, 5.63, 8.76, 6.71,
2.86, 8.13, 5.15, 0.95, 5.65, 6.23, 3.05, 7.25, 8.9, 1.68, 6.08,
5.19, 4.22, 2.45, 1.65, 8.47, 5.17, 1.73, 4.27, 0.45, 4.77, 3.69,
7.19, 8.67, 3.93, 5.35, 8.41, 6.01, 3.72, 3.24, 0.18, 9.57, 8.31,
7.99, 3.75, 0.26, 9.29, 5.54, 3.68, 0.21, 2.42, 6.79, 1.45, 0.62,
3.31, 2.17, 0.75, 6.81, 6.97, 0.27, 0.29, 7.42, 1.74, 0.65, 3.3,
9.39, 6.8, 0.71, 4.75, 7.91, 4.22, 0.73, 8.11, 2.2, 1.05, 1.73,
8.76, 6.93, 1.63, 4.37, 1.21, 3.13, 5.72, 4.2, 3.22, 8.62, 2.28,
6.31, 8.96, 6.27, 1.95, 6.01, 9.12, 2.95, 8.55, 3.99, 4.58, 4.15,
6.96, 3, 1.39, 4.96), 5165156
= c(0.35, 7.25, 2.23, 2.64, 3.07,
4.07, 3.3, 6.72, 0.5, 7.97, 6.95, 3.19, 3.07, 5.51, 4.53, 1.07,
3.43, 0.61, 5.75, 6.25, 0.14, 6.98, 1.04, 2.33, 2.01, 5.17, 2.02,
1.4, 7.6, 7.73, 3.97, 0.13, 6.3, 8.21, 7.75, 9.81, 4.89, 5.11,
3.38, 6.93, 3.28, 7.58, 8.69, 1.24, 7.6, 6.13, 5.98, 5.59, 1.59,
4.13, 5.7, 0.65, 3.16, 7, 4.33, 0.37, 7.51, 6.15, 7.08, 9.09,
7.48, 6.24, 0.74, 2.99, 4.06, 2.95, 7.37, 9.15, 8.25, 8.13, 8.41,
0.94, 4.01, 8.21, 7.39, 3.08, 2.89, 0.84, 2.29, 1, 2.52, 5.14,
1.94, 3.65, 7, 4.38, 7.86, 8.49, 2.76, 0.24, 1.34, 9.43, 2.26,
9.56, 4.71, 0.31), 654165165
= c(5.92, 4.89, 9.23, 9.54, 3.67,
2.93, 3.52, 8.85, 0.64, 1.75, 5.27, 0.91, 1.77, 0.54, 5.73, 6.86,
2.3, 2.98, 8.78, 0.51, 5.29, 5.88, 9.49, 9.84, 8.62, 5.86, 6.75,
3.71, 2.58, 9.45, 9.32, 1.41, 6.59, 9.61, 1.01, 8.48, 9.71, 2.35,
1.1, 7.58, 5.96, 5.99, 6.26, 9.11, 4.32, 7.97, 6.55, 3.83, 7.99,
3.56, 6.98, 1.29, 1.99, 9.07, 0.36, 9.72, 8.77, 0.36, 3.92, 3.1,
3.09, 8.03, 9.91, 4.7, 7.91, 4.92, 5.9, 9.32, 9.58, 8.75, 0.9,
1.21, 2.1, 4.3, 2.53, 5.68, 1.86, 3.28, 5.8, 1.81, 8.59, 8.32,
9.39, 5.15, 8.57, 3.07, 4.3, 8.29, 6.08, 0.19, 3.2, 5.51, 7.57,
5.58, 9.3, 4.89)), row.names = c(NA, -96L), class = c("tbl_df",
"tbl", "data.frame"))
I need to get it to this final form:
structure(list(Month = c(3, 3, 3, 3), Day = c(1, 2, 3, 4), 4654128
= c(9.37,
9.47, 9.16, 9.43), 28598546541
= c(9.77, 9.84, 8.66, 9.55),
1546541655
= c(9.61, 9.21, 8.82, 9.54), 41684561
= c(8.76,
9.29, 8.76, 8.96), 5165156
= c(7.25, 8.69, 9.09, 9.43),
654165165
= c(9.54, 9.61, 9.72, 9.3)), row.names = c(NA,
-4L), class = c("tbl_df", "tbl", "data.frame"))
This is the example dataset. The original data set is for a whole year and has the data at a 5 minute frequency and has thousands of customers. If I can get the logic to work for the data set above I should be able to get the bigger data set too. Thank you so much for your help!