RNAHES Package Help and dataset

Hi, I am confused why a new data set will not be created to put only the 5 variables I want in it?
nhanes.2014 which is the big data set to a smaller one nhanes.smaller?

Library package "RHANES" reads NHANES files into R-studio

library (package = "RNHANES")

Reassigned values to data set nhanes.2014

nhanes.2014 <- nhanes_load_data(file_name = "WHQMEC_H", year = "2013-2014",
demographics = TRUE)

#Check Summary

#Renaming a smaller data set
nhanes.little <- nhanes.2014 %>%

#Selecting "RIDAGEYR, RIAGENDR,WHQ030M, WHQ500, WHQ520)
select(RIDAGEYR, RIAGENDR, WHQ030M, WHQ500, WHQ520)%>%

Changing Age = numeric, and everything else to be factor

mutate(RIDAGEYR = as.numeric(RIDAGEYR)) %>%
mutate(RIAGENDR = as.factor(RIAGENDR)) %>%

WHQ030M:How do you consider your weight (Factor)

Removing "don't know" option choice

mutate(WHQ030M = as.factor(WHQ030M)) %>%
mutate(WHQ030M = na_if(x = WHQ030M, y = 9)) %>%

WHQ500: Trying to do about weight (Factor)

Removing Not trying to do anything about your weight (4), refused (7),

Do not know (9)

mutate(WHQ500 = as.factor(WHQ500)) %>%
mutate(WHQ500 = na_if(x = WHQ500, y = 7)) %>%
mutate(WHQ500 = na_if(x = WHQ500, y = 9)) %>%

WHQ520: How often tried to lose weight

Removing Refused (7), Do not know (9)

mutate(WHQ520 = as.factor(WHQ520)) %>%
mutate(WHQ520 = na_if(x = WHQ520, y = 7)) %>%
mutate(WHQ520 = na_if(x = WHQ520, y = 9)) %>%

mutate(RIAGENDR = recode_factor(RIAGENDR,
1 = "Male",
2 = "Female")) %>%

mutate(WHQ030M = recode_factor(WHQ030M,
1 = "fat or overweight",
2 = "too thin",
3 = "about the right weight"))%>%

mutate(WHQ500 = recode_factor(WHQ500,
1 = "lose weight",
2 = "gain Weight",
3 = "stay the same weight",
4 = "not trying to do anything about your weight" )) %>%

mutate(WHQ520 = recode_factor(WHQ520,
1 = "never",
2 = "somtimes",
3 = "a lot" )) %>%
drop_na() %>%
rename(gender = RIAGENDR) %>%
rename(age = RIDAGEYR) %>%
rename(body_image = WHQ030M) %>%
rename(percieved_weight = WHQ500) %>%
rename(self_inervention = WHQ520) %>%

summary(object = nhanes.little)

In future, try to put your code in a code chunk. You can do this by putting the code within 3 backticks "```"

I think the problem with your code is the pipe at the end before the summary. Additionally, you need to put the numerics in backticks when using recode_factor. NHANES is survey data. Any analysis should use sampling weights and design so you should always keep those variables. To determine which weight is appropriate to use, check out documentation here: NHANES Tutorials - Module 3 - Weighting (Selecting the correct weight in NHANES)

I don't suggest dropping records with NA's - this isn't good for variance estimation and will drop records that have some NA but not all.

Here's a working reprex to help get you started:

library(RNHANES)
#> Warning: package 'RNHANES' was built under R version 4.1.1
library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.1.1
#> Warning: package 'readr' was built under R version 4.1.1
nhanes.2014 <- nhanes_load_data(
   file_name = "WHQMEC_H", year = "2013-2014", demographics = TRUE)
#> Downloading WHQMEC_H.XPT to C:\Users\sazimmer\AppData\Local\Temp\RtmpecCJGs/WHQMEC_H.XPT
#> Downloading DEMO_H.XPT to C:\Users\sazimmer\AppData\Local\Temp\RtmpecCJGs/DEMO_H.XPT
#> Caching CSV to C:\Users\sazimmer\AppData\Local\Temp\RtmpecCJGs/DEMO_H.csv
#Check Summary

#Renaming a smaller data set
nhanes.little <- nhanes.2014 %>%
   
   #Selecting "RIDAGEYR, RIAGENDR,WHQ030M, WHQ500, WHQ520)
   select(RIDAGEYR, RIAGENDR, WHQ030M, WHQ500, WHQ520, SDMVPSU, SDMVSTRA, starts_with("WT"))%>%
   # Changing Age = numeric, and everything else to be factor
   mutate(RIDAGEYR = as.numeric(RIDAGEYR)) %>% #not sure why this is done, it's already a number
   mutate(RIAGENDR = as.factor(RIAGENDR)) %>%
   
   # WHQ030M:How do you consider your weight (Factor)
   # Removing "don't know" option choice
   mutate(WHQ030M = as.factor(WHQ030M)) %>%
   mutate(WHQ030M = na_if(x = WHQ030M, y = 9)) %>%
#    WHQ500: Trying to do about weight (Factor)
# Removing Not trying to do anything about your weight (4), refused (7),
# Do not know (9)
   mutate(WHQ500 = as.factor(WHQ500)) %>%
   mutate(WHQ500 = na_if(x = WHQ500, y = 7)) %>%
   mutate(WHQ500 = na_if(x = WHQ500, y = 9)) %>%
#    WHQ520: How often tried to lose weight
# Removing Refused (7), Do not know (9)
   mutate(WHQ520 = as.factor(WHQ520)) %>%
   mutate(WHQ520 = na_if(x = WHQ520, y = 7)) %>%
   mutate(WHQ520 = na_if(x = WHQ520, y = 9)) %>%
   
   mutate(RIAGENDR = recode_factor(RIAGENDR,
                                   `1` = "Male",
                                   `2` = "Female")) %>%
   
   mutate(WHQ030M = recode_factor(WHQ030M,
                                  `1` = "fat or overweight",
                                  `2` = "too thin",
                                  `3` = "about the right weight"))%>%
   
   mutate(WHQ500 = recode_factor(WHQ500,
                                 `1` = "lose weight",
                                 `2` = "gain Weight",
                                 `3` = "stay the same weight",
                                 `4` = "not trying to do anything about your weight" )) %>%
   
   mutate(WHQ520 = recode_factor(WHQ520,
                                 `1` = "never",
                                 `2` = "somtimes",
                                 `3` = "a lot" )) %>%
   rename(gender = RIAGENDR) %>%
   rename(age = RIDAGEYR) %>%
   rename(body_image = WHQ030M) %>%
   rename(percieved_weight = WHQ500) %>%
   rename(self_inervention = WHQ520) 

summary(nhanes.little)
#>       age           gender                     body_image  
#>  Min.   : 8.00   Male  :790   fat or overweight     : 254  
#>  1st Qu.: 9.00   Female:762   too thin              : 114  
#>  Median :11.00                about the right weight:1109  
#>  Mean   :11.34                9                     :   0  
#>  3rd Qu.:13.00                NA's                  :  75  
#>  Max.   :15.00                                             
#>                                                            
#>                                     percieved_weight self_inervention
#>  lose weight                                :589     never   :585    
#>  gain Weight                                :179     somtimes:688    
#>  stay the same weight                       :396     a lot   :204    
#>  not trying to do anything about your weight:312     9       :  0    
#>  7                                          :  0     NA's    : 75    
#>  9                                          :  0                     
#>  NA's                                       : 76                     
#>     SDMVPSU         SDMVSTRA      WTINT2YR         WTMEC2YR     
#>  Min.   :1.000   Min.   :104   Min.   :  4545   Min.   :  4533  
#>  1st Qu.:1.000   1st Qu.:107   1st Qu.: 10518   1st Qu.: 10904  
#>  Median :1.000   Median :111   Median : 14342   Median : 14645  
#>  Mean   :1.464   Mean   :111   Mean   : 20786   Mean   : 21504  
#>  3rd Qu.:2.000   3rd Qu.:115   3rd Qu.: 19927   3rd Qu.: 20648  
#>  Max.   :2.000   Max.   :118   Max.   :102078   Max.   :104556  
#> 

Created on 2021-09-27 by the reprex package (v2.0.1)

Here's a much more compact example which includes a good practice of checking your derived variables:

library(RNHANES)
#> Warning: package 'RNHANES' was built under R version 4.1.1
library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.1.1
#> Warning: package 'readr' was built under R version 4.1.1
nhanes.2014 <- nhanes_load_data(
   file_name = "WHQMEC_H", year = "2013-2014", demographics = TRUE)
#> Downloading WHQMEC_H.XPT to C:\Users\sazimmer\AppData\Local\Temp\RtmpOYC7E8/WHQMEC_H.XPT
#> Downloading DEMO_H.XPT to C:\Users\sazimmer\AppData\Local\Temp\RtmpOYC7E8/DEMO_H.XPT
#> Caching CSV to C:\Users\sazimmer\AppData\Local\Temp\RtmpOYC7E8/DEMO_H.csv
#Check Summary

#Renaming a smaller data set
nhanes.little <- nhanes.2014 %>%
   as_tibble() %>%
   #Selecting "RIDAGEYR, RIAGENDR,WHQ030M, WHQ500, WHQ520, and analysis variables
   select(age=RIDAGEYR, RIAGENDR, WHQ030M, WHQ500, WHQ520, SDMVPSU, SDMVSTRA, starts_with("WT"))%>%
   mutate(
      # recoding gender
      gender = recode_factor(RIAGENDR,
                               `1` = "Male",
                               `2` = "Female"),
      # recoding body_image from WHQ030M and creating NAs
      body_image = na_if(x = WHQ030M, y = 9),
      body_image = recode_factor(body_image,
                              `1` = "fat or overweight",
                              `2` = "too thin",
                              `3` = "about the right weight"),
      # recoding perceived_weight from WHQ500 and creating NAs
      perceived_weight = na_if(x = WHQ500, y = 7),
      perceived_weight = na_if(x = perceived_weight, y = 9),
      perceived_weight = recode_factor(perceived_weight,
                                       `1` = "lose weight",
                                       `2` = "gain Weight",
                                       `3` = "stay the same weight",
                                       `4` = "not trying to do anything about your weight" ),
      # recoding self_inervention from WHQ520 and creating NAs
      self_inervention = na_if(x = WHQ520, y = 7),
      self_inervention = na_if(x = self_inervention, y = 9),
      self_inervention = recode_factor(self_inervention,
                             `1` = "never",
                             `2` = "somtimes",
                             `3` = "a lot" )
   ) 


# Check derived variables
nhanes.little %>% count(gender, RIAGENDR)
#> # A tibble: 2 x 3
#>   gender RIAGENDR     n
#>   <fct>     <dbl> <int>
#> 1 Male          1   790
#> 2 Female        2   762
nhanes.little %>% count(body_image, WHQ030M)
#> # A tibble: 5 x 3
#>   body_image             WHQ030M     n
#>   <fct>                    <dbl> <int>
#> 1 fat or overweight            1   254
#> 2 too thin                     2   114
#> 3 about the right weight       3  1109
#> 4 <NA>                         9     1
#> 5 <NA>                        NA    74
nhanes.little %>% count(perceived_weight, WHQ500)
#> # A tibble: 7 x 3
#>   perceived_weight                            WHQ500     n
#>   <fct>                                        <dbl> <int>
#> 1 lose weight                                      1   589
#> 2 gain Weight                                      2   179
#> 3 stay the same weight                             3   396
#> 4 not trying to do anything about your weight      4   312
#> 5 <NA>                                             7     1
#> 6 <NA>                                             9     1
#> 7 <NA>                                            NA    74
nhanes.little %>% count(self_inervention, WHQ520)
#> # A tibble: 5 x 3
#>   self_inervention WHQ520     n
#>   <fct>             <dbl> <int>
#> 1 never                 1   585
#> 2 somtimes              2   688
#> 3 a lot                 3   204
#> 4 <NA>                  9     1
#> 5 <NA>                 NA    74
nhanes.little.2 <- nhanes.little %>%
   select(age, gender, body_image, perceived_weight, self_inervention, SDMVPSU, SDMVSTRA, starts_with("WT"))

summary(nhanes.little.2)
#>       age           gender                     body_image  
#>  Min.   : 8.00   Male  :790   fat or overweight     : 254  
#>  1st Qu.: 9.00   Female:762   too thin              : 114  
#>  Median :11.00                about the right weight:1109  
#>  Mean   :11.34                NA's                  :  75  
#>  3rd Qu.:13.00                                             
#>  Max.   :15.00                                             
#>                                     perceived_weight self_inervention
#>  lose weight                                :589     never   :585    
#>  gain Weight                                :179     somtimes:688    
#>  stay the same weight                       :396     a lot   :204    
#>  not trying to do anything about your weight:312     NA's    : 75    
#>  NA's                                       : 76                     
#>                                                                      
#>     SDMVPSU         SDMVSTRA      WTINT2YR         WTMEC2YR     
#>  Min.   :1.000   Min.   :104   Min.   :  4545   Min.   :  4533  
#>  1st Qu.:1.000   1st Qu.:107   1st Qu.: 10518   1st Qu.: 10904  
#>  Median :1.000   Median :111   Median : 14342   Median : 14645  
#>  Mean   :1.464   Mean   :111   Mean   : 20786   Mean   : 21504  
#>  3rd Qu.:2.000   3rd Qu.:115   3rd Qu.: 19927   3rd Qu.: 20648  
#>  Max.   :2.000   Max.   :118   Max.   :102078   Max.   :104556

Created on 2021-09-27 by the reprex package (v2.0.1)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.