The chisq.test treats the empty strings in Smoking.Status as another level of that factor. You can see this by running
str(Test1)
on the Test1 object in my code. I replaced the empty strings with the word "blank" to show that the test result is the same. I also compared filtering out rows with empty strings and replacing the empty strings with NA to show that those two results are the same.
data_1 <- structure(list(Smoking.Status = c("smoking", "smoking", "smoking",
"smoking", "smoking", "non-smoking", "smoking", "non-smoking",
"non-smoking", "non-smoking", "smoking", "non-smoking", "non-smoking",
"smoking", "non-smoking", "smoking", "smoking", "non-smoking",
"non-smoking", "", "", "", "", "", "", "", "non-smoking", "",
"", "non-smoking", "smoking", "non-smoking", "non-smoking", "smoking",
"non-smoking", "non-smoking", "non-smoking", "non-smoking", "non-smoking",
"", "non-smoking", "smoking", "non-smoking", "non-smoking", "smoking",
"non-smoking", "smoking"),
Post.score = c(1.309408341, 7.213930348,
25.26690391, 12.92719168, 8.702064897, 5.556698909, 16.09399246,
8.097784568, 4.505119454, 1.120709783, 1.708011387, 5.040871935,
0.937744204, 6.898584906, 16.31768953, 5.823792932, 3.003754693,
1.416005149, 44.515357, 4.358683314, 5.233572398, 0.376175549,
38.43137255, 22.97383535, 1.367088608, 7.234251969, 8.444902163,
5.696202532, 6.324262169, 3.12922542, 8.610271903, 53.125, 4.962950198,
7.529843893, 2.871287129, 3.155728333, 15.67839196, 3.181336161,
3.718393654, 3.9408867, 29.10839161, 21.28337983, 7.73073889,
12.6340882, 18.53658537, 17.49837978, 15.8557047)),
row.names = c(NA, 47L), class = "data.frame")
#original test
data_1$disease <- cut(data_1$Post.score, br = c(0,5,100), labels = c("none", "disease"))
Test1 <- chisq.test(data_1$Smoking.Status,data_1$disease,correct=TRUE)
#> Warning in chisq.test(data_1$Smoking.Status, data_1$disease, correct = TRUE):
#> Chi-squared approximation may be incorrect
Test1
#>
#> Pearson's Chi-squared test
#>
#> data: data_1$Smoking.Status and data_1$disease
#> X-squared = 2.5837, df = 2, p-value = 0.2748
Test1$observed
#> data_1$disease
#> data_1$Smoking.Status none disease
#> 4 6
#> non-smoking 10 12
#> smoking 3 12
#New coloumn with the word blank replacing ""
data_1$Smoking_3level <- ifelse(data_1$Smoking.Status == "","blank",data_1$Smoking.Status)
Test2 <- chisq.test(data_1$Smoking_3level,data_1$disease,correct=TRUE)
#> Warning in chisq.test(data_1$Smoking_3level, data_1$disease, correct = TRUE):
#> Chi-squared approximation may be incorrect
#Same result as Test1
Test2
#>
#> Pearson's Chi-squared test
#>
#> data: data_1$Smoking_3level and data_1$disease
#> X-squared = 2.5837, df = 2, p-value = 0.2748
Test2$observed
#> data_1$disease
#> data_1$Smoking_3level none disease
#> blank 4 6
#> non-smoking 10 12
#> smoking 3 12
# Filter out rows with Smoking.Status == ""
data_1_filtered <- data_1[data_1$Smoking.Status != "",]
nrow(data_1_filtered)
#> [1] 37
Test3 <- chisq.test(data_1_filtered$Smoking.Status,data_1_filtered$disease,correct=TRUE)
Test3
#>
#> Pearson's Chi-squared test with Yates' continuity correction
#>
#> data: data_1_filtered$Smoking.Status and data_1_filtered$disease
#> X-squared = 1.5418, df = 1, p-value = 0.2144
Test3$observed
#> data_1_filtered$disease
#> data_1_filtered$Smoking.Status none disease
#> non-smoking 10 12
#> smoking 3 12
#Replace "" with NA.
data_1$SmokingNA <- ifelse(data_1$Smoking.Status == "", NA, data_1$Smoking.Status)
Test4 <- chisq.test(data_1$SmokingNA,data_1$disease,correct=TRUE)
#Same result as Test3
Test4
#>
#> Pearson's Chi-squared test with Yates' continuity correction
#>
#> data: data_1$SmokingNA and data_1$disease
#> X-squared = 1.5418, df = 1, p-value = 0.2144
Test4$observed
#> data_1$disease
#> data_1$SmokingNA none disease
#> non-smoking 10 12
#> smoking 3 12
Created on 2021-11-30 by the reprex package (v2.0.1)