I am trying to interpolate and extrapolate certain economic data using existing data

This is my code but I am getting an error that says,
Error in data.frame(State = c("Alabama", "California", "Texas", "Florida", :
arguments imply differing number of rows: 50, 51

I do not know how to fix this error

Define the population density data as a data frame (from the provided table)

pop_density_data <- data.frame(
State = c("Alabama", "California", "Texas", "Florida", "New York", "Ohio", "Wisconsin", "Idaho",
"Georgia", "Connecticut", "Pennsylvania", "Colorado", "North Carolina", "Utah", "Massachusetts",
"Arizona", "Louisiana", "Illinois", "Minnesota", "Kansas", "Hawaii", "Indiana", "Michigan",
"Alaska", "Virginia", "New Jersey", "Kentucky", "Arkansas", "Iowa", "Vermont", "Tennessee",
"Nebraska", "Maryland", "Montana", "Mississippi", "Maine", "South Carolina", "Wyoming",
"Oklahoma", "Missouri", "Nevada", "Delaware", "Oregon", "Washington", "Rhode Island",
"West Virginia", "North Dakota", "New Hampshire", "South Dakota", "New Mexico"),
Pop_Density_2010 = c(94.4, 239.1, 96.3, 350.6, 411.2, 282.3, 105, 19, 168.4, 738.1, 283.9, 48.5, 196.1,
33.6, 839.4, 56.3, 104.9, 231.1, 66.6, 34.9, 211.8, 181, 174.8, 1.2, 202.6, 1195.5,
109.9, 56, 54.5, 67.9, 153.9, 23.8, 594.8, 6.8, 63.2, 43.1, 153.9, 5.8, 54.7, 87.1,
24.6, 460.8, 39.9, 101.2, 1018.1, 77.1, 9.7, 147, 10.7, 17),
Pop_Density_2020 = c(99.2, 253.7, 111.6, 401.4, 428.7, 288.8, 108.8, 22.3, 185.6, 744.7, 290.6, 55.7, 214.7,
39.7, 901.2, 62.9, 107.8, 230.8, 71.7, 35.9, 226.6, 189.4, 178, 1.3, 218.6, 1263, 114.1,
57.9, 57.1, 57.1, 69.8, 167.6, 25.5, 636.1, 7.4, 63.1, 44.2, 170.2, 5.9, 57.7, 89.5,
28.3, 508, 44.1, 115.9, 1061.4, 74.6, 11.3, 153.8, 11.7, 17.5)
)

Define the years for interpolation

years <- 2010:2023

Check the total number of rows needed (50 states × 14 years = 700 rows)

num_years <- length(years)
num_states <- nrow(pop_density_data)

Create an empty data frame to store interpolated values (ensure correct repetition)

pop_density_df <- data.frame(
State = rep(pop_density_data$State, each = num_years), # Repeat each state 14 times
Year = rep(years, times = num_states) # Repeat years for each state
)

Perform interpolation with extrapolation for each state

interpolated_values_list <- lapply(1:num_states, function(i) {
approx(
x = c(2010, 2020),
y = c(pop_density_data$Pop_Density_2010[i], pop_density_data$Pop_Density_2020[i]),
xout = years,
rule = 2 # Set rule = 2 for extrapolation
)$y
})

Combine interpolated values into the data frame

pop_density_df$Pop_Density <- unlist(interpolated_values_list)

View the final interpolated data frame

print(pop_density_df)

In the code below, I reproduce your error, then pull the individual vectors out of code that makes the data frame and I find that Pop_Density_2020 has 51 values while the other two vectors have 50 values. Inspect your data for Pop_Density_2020 and find extra value or find the missing values in the other two vectors.

pop_density_data <- data.frame(
  State = c("Alabama", "California", "Texas", "Florida", "New York", "Ohio", "Wisconsin", "Idaho",
            "Georgia", "Connecticut", "Pennsylvania", "Colorado", "North Carolina", "Utah", "Massachusetts",
            "Arizona", "Louisiana", "Illinois", "Minnesota", "Kansas", "Hawaii", "Indiana", "Michigan",
            "Alaska", "Virginia", "New Jersey", "Kentucky", "Arkansas", "Iowa", "Vermont", "Tennessee",
            "Nebraska", "Maryland", "Montana", "Mississippi", "Maine", "South Carolina", "Wyoming",
            "Oklahoma", "Missouri", "Nevada", "Delaware", "Oregon", "Washington", "Rhode Island",
            "West Virginia", "North Dakota", "New Hampshire", "South Dakota", "New Mexico"),
  Pop_Density_2010 = c(94.4, 239.1, 96.3, 350.6, 411.2, 282.3, 105, 19, 168.4, 738.1, 283.9, 48.5, 196.1,
                       33.6, 839.4, 56.3, 104.9, 231.1, 66.6, 34.9, 211.8, 181, 174.8, 1.2, 202.6, 1195.5,
                       109.9, 56, 54.5, 67.9, 153.9, 23.8, 594.8, 6.8, 63.2, 43.1, 153.9, 5.8, 54.7, 87.1,
                       24.6, 460.8, 39.9, 101.2, 1018.1, 77.1, 9.7, 147, 10.7, 17),
  Pop_Density_2020 = c(99.2, 253.7, 111.6, 401.4, 428.7, 288.8, 108.8, 22.3, 185.6, 744.7, 290.6, 55.7, 214.7,
                       39.7, 901.2, 62.9, 107.8, 230.8, 71.7, 35.9, 226.6, 189.4, 178, 1.3, 218.6, 1263, 114.1,
                       57.9, 57.1, 57.1, 69.8, 167.6, 25.5, 636.1, 7.4, 63.1, 44.2, 170.2, 5.9, 57.7, 89.5,
                       28.3, 508, 44.1, 115.9, 1061.4, 74.6, 11.3, 153.8, 11.7, 17.5)
)
#> Error in data.frame(State = c("Alabama", "California", "Texas", "Florida", : arguments imply differing number of rows: 50, 51

State = c("Alabama", "California", "Texas", "Florida", "New York", "Ohio", "Wisconsin", "Idaho",
          "Georgia", "Connecticut", "Pennsylvania", "Colorado", "North Carolina", "Utah", "Massachusetts",
          "Arizona", "Louisiana", "Illinois", "Minnesota", "Kansas", "Hawaii", "Indiana", "Michigan",
          "Alaska", "Virginia", "New Jersey", "Kentucky", "Arkansas", "Iowa", "Vermont", "Tennessee",
          "Nebraska", "Maryland", "Montana", "Mississippi", "Maine", "South Carolina", "Wyoming",
          "Oklahoma", "Missouri", "Nevada", "Delaware", "Oregon", "Washington", "Rhode Island",
          "West Virginia", "North Dakota", "New Hampshire", "South Dakota", "New Mexico")

Pop_Density_2010 = c(94.4, 239.1, 96.3, 350.6, 411.2, 282.3, 105, 19, 168.4, 738.1, 283.9, 48.5, 196.1,
                     33.6, 839.4, 56.3, 104.9, 231.1, 66.6, 34.9, 211.8, 181, 174.8, 1.2, 202.6, 1195.5,
                     109.9, 56, 54.5, 67.9, 153.9, 23.8, 594.8, 6.8, 63.2, 43.1, 153.9, 5.8, 54.7, 87.1,
                     24.6, 460.8, 39.9, 101.2, 1018.1, 77.1, 9.7, 147, 10.7, 17)

Pop_Density_2020 = c(99.2, 253.7, 111.6, 401.4, 428.7, 288.8, 108.8, 22.3, 185.6, 744.7, 290.6, 55.7, 214.7,
                     39.7, 901.2, 62.9, 107.8, 230.8, 71.7, 35.9, 226.6, 189.4, 178, 1.3, 218.6, 1263, 114.1,
                     57.9, 57.1, 57.1, 69.8, 167.6, 25.5, 636.1, 7.4, 63.1, 44.2, 170.2, 5.9, 57.7, 89.5,
                     28.3, 508, 44.1, 115.9, 1061.4, 74.6, 11.3, 153.8, 11.7, 17.5)
length(State)
#> [1] 50
length(Pop_Density_2010)
#> [1] 50
length(Pop_Density_2020)
#> [1] 51

Created on 2024-10-24 with reprex v2.1.1