Hi,
I'm new to R and having trouble with the as.numeric function in RStudio. I have a variable "TOT_EMP_Ind" that is a character variable - it doesn't appear that any values have commas or other symbols and no missing data. When I try "as.numeric" I get the warning that NAs were introduced by coercion but the "TOT_EMP_Ind" remains a character variable. Not sure what I'm missing!
dput(Nat_Ind_Total2)
structure(list(YEAR = c(2012, 2012, 2012, 2012, 2012, 2012, 2013,
2013, 2013, 2013, 2013, 2013, 2014, 2014, 2014, 2014, 2014, 2014,
2015, 2015, 2015, 2015, 2015, 2015, 2016, 2016, 2016, 2016, 2016,
2016, 2017, 2017, 2017, 2017, 2017, 2017, 2018, 2018, 2018, 2018,
2018, 2018, 2019, 2019, 2019, 2019, 2019, 2019, 2020, 2020, 2020,
2020, 2020, 2020, 2021, 2021, 2021, 2021, 2021, 2021, 2022, 2022,
2022, 2022, 2022, 2022, 2023, 2023, 2023, 2023, 2023, 2023),
AREA_TITLE = c("U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"U.S.", "U.S."), INDUSTRY = c("Soc", "Gov", "Ins", "Amb",
"Hosp", "Total", "Ins", "Total", "Soc", "Gov", "Amb", "Hosp",
"Ins", "Amb", "Total", "Soc", "Gov", "Hosp", "Hosp", "Ins",
"Amb", "Soc", "Gov", "Total", "Total", "Hosp", "Ins", "Amb",
"Soc", "Gov", "Hosp", "Soc", "Ins", "Amb", "Total", "Gov",
"Amb", "Hosp", "Soc", "Gov", "Ins", "Total", "Ins", "Total",
"Amb", "Hosp", "Soc", "Gov", "Amb", "Ins", "Total", "Hosp",
"Soc", "Gov", "Amb", "Hosp", "Ins", "Total", "Soc", "Gov",
"Ins", "Amb", "Hosp", "Gov", "Total", "Soc", "Ins", "Amb",
"Hosp", "Soc", "Gov", "Total"), TOT_EMP_Ind = c("10470",
"7110", "500", "6490", "3410", "38020", "570", "45800", "11940",
"7880", "8950", "3690", "400", "9980", "47880", "13080",
"8060", "4080", "4600", "480", "10510", "12040", "8190",
"48130", "51900", "5730", "720", "10900", "13520", "8920",
"5750", "14170", "940", "11330", "54760", "9460", "10610",
"5770", "14120", "11310", "1110", "56130", "0", "58950",
"11760", "5240", "14220", "11210", "11430", "2580", "58670",
"5390", "13950", "11340", "11020", "5220", "3610", "61010",
"12740", "13150", "3410", "11150", "6480", "11660", "61300",
"13220", "2300", "11950", "6400", "12870", "11880", "58550"
), H_MEAN = c(14.95, 19.38, 24.94, 18.44, 21.73, 18.02, 24,
18.1, 15.53, 19.45, 17.64, 21.66, 23.72, 17.95, 18.35, 15.78,
19.79, 21.4, 22.41, 23.24, 18.31, 17.11, 20.82, 19.3, 19.8,
23.11, 23.17, 18.94, 17.37, 20.9, 23.9, 18.16, 22.12, 19.62,
20.36, 21.63, 20.16, 23.85, 18.48, 22.54, 23.5, 20.9, 26.47,
21.34, 20.58, 24.43, 18.97, 22.96, 21.32, 26.94, 22.12, 24.77,
19.92, 23.49, 22.71, 25.9, 28.11, 22.97, 20.33, 24.05, 28.96,
22.91, 27.53, 24.81, 23.99, 20.82, 30.75, 23.93, 28.66, 22.52,
26.4, 25.3), H_MEDIAN = c(14.48, 17.81, 23.73, 16.9, 20.49,
16.64, 23.92, 16.64, 14.62, 17.96, 16.36, 20.28, 23.97, 16.56,
16.76, 14.76, 18.21, 19.86, 20.93, 22.37, 16.7, 15.96, 18.86,
17.45, 17.95, 21.73, 22, 17.24, 16.55, 18.97, 22.28, 17.27,
20.96, 17.88, 18.45, 19.59, 18.38, 22.04, 17.6, 20.76, 22.03,
19.01, 26.03, 19.41, 18.78, 22.72, 17.8, 21.27, 19.43, 26.18,
20.19, 23.15, 18.35, 21.87, 21.39, 23.68, 29.39, 22.4, 18.6,
22.79, 28.97, 21.26, 26.15, 22.97, 22.21, 19.67, 30.27, 22.18,
26.48, 21.83, 24.3, 23.17), TOT_EMP_Nat = c(38020, 38020,
38020, 38020, 38020, 38020, 45800, 45800, 45800, 45800, 45800,
45800, 47880, 47880, 47880, 47880, 47880, 47880, 48130, 48130,
48130, 48130, 48130, 48130, 51900, 51900, 51900, 51900, 51900,
51900, 54760, 54760, 54760, 54760, 54760, 54760, 56130, 56130,
56130, 56130, 56130, 56130, 58950, 58950, 58950, 58950, 58950,
58950, 58670, 58670, 58670, 58670, 58670, 58670, 61010, 61010,
61010, 61010, 61010, 61010, 61300, 61300, 61300, 61300, 61300,
61300, 58550, 58550, 58550, 58550, 58550, 58550)), row.names = c(NA,
-72L), class = "data.frame")
str(Nat_Ind_Total2)
'data.frame': 72 obs. of 7 variables:
YEAR : num 2012 2012 2012 2012 2012 ... AREA_TITLE : chr "U.S." "U.S." "U.S." "U.S." ...
INDUSTRY : chr "Soc" "Gov" "Ins" "Amb" ... TOT_EMP_Ind: chr "10470" "7110" "500" "6490" ...
H_MEAN : num 14.9 19.4 24.9 18.4 21.7 ... H_MEDIAN : num 14.5 17.8 23.7 16.9 20.5 ...
$ TOT_EMP_Nat: num 38020 38020 38020 38020 38020 ...
as.numeric("TOT_EMP_Ind")
[1] NA
Warning message:
NAs introduced by coercion
str(Nat_Ind_Total2)
'data.frame': 72 obs. of 7 variables:
YEAR : num 2012 2012 2012 2012 2012 ... AREA_TITLE : chr "U.S." "U.S." "U.S." "U.S." ...
INDUSTRY : chr "Soc" "Gov" "Ins" "Amb" ... TOT_EMP_Ind: chr "10470" "7110" "500" "6490" ...
H_MEAN : num 14.9 19.4 24.9 18.4 21.7 ... H_MEDIAN : num 14.5 17.8 23.7 16.9 20.5 ...
$ TOT_EMP_Nat: num 38020 38020 38020 38020 38020 ...