Removing single cells containing NA in a model?

Hey, I'm very new to R and I have come across an error which I understand (variable lengths differ - I assume it is due to the many NAs), but don't know how to fix.
I want to create a model to further run an anova. My data unfortunately contains lots of NA (there is not a single row without), so I have found all the ways to remove whole rows from the data which I don't want. Is there any way to remove just single cells? I have tried na.rm=TRUE, but it doesn't work, not sure if it is due to the function not being compatible.

datarec<-read.csv(file = "recovery.csv")
head(datarec)

datarec$ID<-as.factor(datarec$ID)
datarec$Maintanance <- as.factor(datarec$Maintanance)
datarec$blinking<-as.factor(datarec$blinking)
datarec$muzzle<-as.factor(datarec$muzzle)
datarec$limb<-as.factor(datarec$limb)
datarec$sternal<-as.factor(datarec$sternal)
datarec$standing<-as.factor(datarec$standing)

rec.m<-lm(Maintenance~blinking, data = datarec)

Hi @alantini ,

Could you do the following to help folks here reproduce the errors you're seeing?

  1. Run dput(datarec).
  2. Copy the output it produces in the console.
  3. Post the output here, between a pair of triple backticks, like this:
```
paste here
```

(This creates a code block that allows folks to copy code easily.)

Sure!

structure(list(ID = structure(c(29L, 10L, 17L, 10L, 29L, 22L, 
26L, 27L, 11L, 19L, 9L, 19L, 4L, 9L, 23L, 11L, 1L, 18L, 25L, 
3L, 6L, 7L, 14L, 15L, 18L, 24L, 24L, 24L, 24L, 28L, 4L, 2L, 2L, 
5L, 6L, 8L, 12L, 13L, 16L, 20L, 21L), levels = c("Bac Ha", "Ben", 
"Binh Yen", "Dua", "Freddie", "Hoa Lan", "Hoa Tra", "Hung", "Khe", 
"La", "Lam", "Lim", "Long", "May", "Misa", "Mui", "Nhai", "Nhi Nho", 
"Oi", "Ot", "San ", "Sang", "Thai Giang", "Thai Van", "Thia La", 
"Thom", "Thu", "Trang", "Xoai"), class = "factor"), Sex = c("female", 
"female", "female", "female", "female", "female", "female", "female", 
"male", "male", "male", "male", "male", "male", "male", "male", 
"female", "female", "female", "female", "female", "female", "female", 
"female", "female", "female", "female", "female", "female", "female", 
"male", "male", "male", "male", "male", "male", "male", "male", 
"male", "male", "male"), Maintanance = structure(c(2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), levels = c("MeK", "P_CRI"), class = "factor"), 
    blinking = structure(c(6L, NA, NA, 2L, 2L, 2L, 1L, 2L, 2L, 
    1L, NA, NA, NA, 1L, 3L, 2L, NA, 4L, NA, 5L, 3L, NA, 7L, 5L, 
    NA, 4L, NA, 4L, 3L, NA, 2L, 2L, 5L, 3L, 6L, 3L, NA, 4L, 4L, 
    4L, 3L), levels = c("0", "5", "10", "15", "20", "25", "45"
    ), class = "factor"), muzzle = structure(c(NA, NA, 1L, 2L, 
    2L, NA, 2L, NA, 4L, 1L, NA, 3L, NA, 1L, 3L, 1L, NA, 5L, NA, 
    5L, 4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), levels = c("5", "10", "15", 
    "20", "25"), class = "factor"), limb = structure(c(NA, NA, 
    5L, 2L, 5L, 5L, 2L, 3L, 5L, 2L, NA, 3L, NA, 2L, 4L, 1L, NA, 
    4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6L, NA, NA, 2L, 2L, 
    7L, 3L, NA, NA, NA, NA, 5L, 3L, NA), levels = c("5", "10", 
    "15", "20", "30", "40", "45"), class = "factor"), sternal = structure(c(NA, 
    NA, 3L, NA, NA, NA, 1L, NA, NA, NA, NA, 2L, NA, 5L, NA, 4L, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6L, NA, NA, 
    NA, 7L, 2L, 7L, NA, NA, NA, 3L, 8L, NA), levels = c("20", 
    "25", "35", "45", "50", "75", "85", "95"), class = "factor"), 
    standing = structure(c(NA, NA, 2L, NA, NA, NA, 1L, NA, NA, 
    NA, NA, NA, NA, 2L, NA, 2L, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    3L, NA), levels = c("30", "55", "110"), class = "factor")), row.names = c(NA, 
-41L), class = "data.frame")
1 Like

I get no errors when I run your code:

original data, saved as `datarec
structure(list(ID = structure(c(29L, 10L, 17L, 10L, 29L, 22L, 
26L, 27L, 11L, 19L, 9L, 19L, 4L, 9L, 23L, 11L, 1L, 18L, 25L, 
3L, 6L, 7L, 14L, 15L, 18L, 24L, 24L, 24L, 24L, 28L, 4L, 2L, 2L, 
5L, 6L, 8L, 12L, 13L, 16L, 20L, 21L), levels = c("Bac Ha", "Ben", 
"Binh Yen", "Dua", "Freddie", "Hoa Lan", "Hoa Tra", "Hung", "Khe", 
"La", "Lam", "Lim", "Long", "May", "Misa", "Mui", "Nhai", "Nhi Nho", 
"Oi", "Ot", "San ", "Sang", "Thai Giang", "Thai Van", "Thia La", 
"Thom", "Thu", "Trang", "Xoai"), class = "factor"), Sex = c("female", 
"female", "female", "female", "female", "female", "female", "female", 
"male", "male", "male", "male", "male", "male", "male", "male", 
"female", "female", "female", "female", "female", "female", "female", 
"female", "female", "female", "female", "female", "female", "female", 
"male", "male", "male", "male", "male", "male", "male", "male", 
"male", "male", "male"), Maintenance = structure(c(2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), levels = c("MeK", "P_CRI"), class = "factor"), 
    blinking = structure(c(6L, NA, NA, 2L, 2L, 2L, 1L, 2L, 2L, 
    1L, NA, NA, NA, 1L, 3L, 2L, NA, 4L, NA, 5L, 3L, NA, 7L, 5L, 
    NA, 4L, NA, 4L, 3L, NA, 2L, 2L, 5L, 3L, 6L, 3L, NA, 4L, 4L, 
    4L, 3L), levels = c("0", "5", "10", "15", "20", "25", "45"
    ), class = "factor"), muzzle = structure(c(NA, NA, 1L, 2L, 
    2L, NA, 2L, NA, 4L, 1L, NA, 3L, NA, 1L, 3L, 1L, NA, 5L, NA, 
    5L, 4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), levels = c("5", "10", "15", 
    "20", "25"), class = "factor"), limb = structure(c(NA, NA, 
    5L, 2L, 5L, 5L, 2L, 3L, 5L, 2L, NA, 3L, NA, 2L, 4L, 1L, NA, 
    4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6L, NA, NA, 2L, 2L, 
    7L, 3L, NA, NA, NA, NA, 5L, 3L, NA), levels = c("5", "10", 
    "15", "20", "30", "40", "45"), class = "factor"), sternal = structure(c(NA, 
    NA, 3L, NA, NA, NA, 1L, NA, NA, NA, NA, 2L, NA, 5L, NA, 4L, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6L, NA, NA, 
    NA, 7L, 2L, 7L, NA, NA, NA, 3L, 8L, NA), levels = c("20", 
    "25", "35", "45", "50", "75", "85", "95"), class = "factor"), 
    standing = structure(c(NA, NA, 2L, NA, NA, NA, 1L, NA, NA, 
    NA, NA, NA, NA, 2L, NA, 2L, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    3L, NA), levels = c("30", "55", "110"), class = "factor")), row.names = c(NA, 
-41L), class = "data.frame") -> datarec
lm(Maintenance~blinking, data = datarec)
#> Warning in model.response(mf, "numeric"): using type = "numeric" with a factor
#> response will be ignored
#> Warning in Ops.factor(y, z$residuals): '-' not meaningful for factors
#> 
#> Call:
#> lm(formula = Maintenance ~ blinking, data = datarec)
#> 
#> Coefficients:
#> (Intercept)    blinking5   blinking10   blinking15   blinking20   blinking25  
#>      2.0000      -0.2500      -0.8333      -1.0000      -1.0000      -0.5000  
#>  blinking45  
#>     -1.0000

Created on 2024-07-18 with reprex v2.0.2

I don't seem to be recreating the issue but I notice there's two spellings of Maintenance/Maintanance in your code.

that is a little embarrassing! thanks though!

Sorry — I should have mentioned that I made exactly the correction that @keithn pointed out when I posted this.

P.S.: Out of curiosity, is there a reason why you convert all the columns to factors?

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.