Hi @Lilip,
Welcome to the RStudio Community Forum.
I noticed its 6 days since you posted your question. If you haven't solved it yet then this code may help:
suppressPackageStartupMessages(library(tidyverse))
# OP's data modified to make "pop3" values more extreme, for illustration
main <- data.frame(
SNP = c("SNP_A", "SNP_B", "SNP_C"),
pop1_allele_count = c(1, 3, 5),
pop1_allele_number = c(100, 100, 100),
pop2_allele_count = c(5, 7, 18),
pop2_allele_number = c(100, 100, 100),
pop3_allele_count = c(2, 3, 14),
pop3_allele_number = c(100, 100, 100)
)
main
#> SNP pop1_allele_count pop1_allele_number pop2_allele_count
#> 1 SNP_A 1 100 5
#> 2 SNP_B 3 100 7
#> 3 SNP_C 5 100 18
#> pop2_allele_number pop3_allele_count pop3_allele_number
#> 1 100 2 100
#> 2 100 3 100
#> 3 100 14 100
pivot_longer(main, cols=contains("pop")) %>%
separate(., col=name, into=c("pop","allele","type")) -> tmp.df
pivot_wider(tmp.df, id_cols=c("SNP", "pop"), names_from="type") %>%
mutate(absent = number - count) -> long.df
long.df
#> # A tibble: 9 x 5
#> SNP pop count number absent
#> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 SNP_A pop1 1 100 99
#> 2 SNP_A pop2 5 100 95
#> 3 SNP_A pop3 2 100 98
#> 4 SNP_B pop1 3 100 97
#> 5 SNP_B pop2 7 100 93
#> 6 SNP_B pop3 3 100 97
#> 7 SNP_C pop1 5 100 95
#> 8 SNP_C pop2 18 100 82
#> 9 SNP_C pop3 14 100 86
long.df %>%
group_by(SNP) %>%
mutate(expected = mean(count)) %>%
mutate(correction = ifelse(expected < 5, 0.5, 0)) %>% # Yates's correction
mutate(chisq = (count - expected - correction)^2/expected) %>%
print(.) %>%
summarise(sum_chisq = sum(chisq),
df = length(count)-1)
#> # A tibble: 9 x 8
#> # Groups: SNP [3]
#> SNP pop count number absent expected correction chisq
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 SNP_A pop1 1 100 99 2.67 0.5 1.76
#> 2 SNP_A pop2 5 100 95 2.67 0.5 1.26
#> 3 SNP_A pop3 2 100 98 2.67 0.5 0.510
#> 4 SNP_B pop1 3 100 97 4.33 0.5 0.776
#> 5 SNP_B pop2 7 100 93 4.33 0.5 1.08
#> 6 SNP_B pop3 3 100 97 4.33 0.5 0.776
#> 7 SNP_C pop1 5 100 95 12.3 0 4.36
#> 8 SNP_C pop2 18 100 82 12.3 0 2.60
#> 9 SNP_C pop3 14 100 86 12.3 0 0.225
#> # A tibble: 3 x 3
#> SNP sum_chisq df
#> <chr> <dbl> <dbl>
#> 1 SNP_A 3.53 2
#> 2 SNP_B 2.63 2
#> 3 SNP_C 7.19 2
long.df %>%
group_by(SNP) %>%
summarise(stat = chisq.test(count)$statistic,
df = chisq.test(count)$parameter,
pval = chisq.test(count)$p.value,
method = chisq.test(count)$method)
#> Warning in chisq.test(count): Chi-squared approximation may be incorrect
#> Warning in chisq.test(count): Chi-squared approximation may be incorrect
#> Warning in chisq.test(count): Chi-squared approximation may be incorrect
#> Warning in chisq.test(count): Chi-squared approximation may be incorrect
#> Warning in chisq.test(count): Chi-squared approximation may be incorrect
#> Warning in chisq.test(count): Chi-squared approximation may be incorrect
#> Warning in chisq.test(count): Chi-squared approximation may be incorrect
#> Warning in chisq.test(count): Chi-squared approximation may be incorrect
#> # A tibble: 3 x 5
#> SNP stat df pval method
#> <chr> <dbl> <dbl> <dbl> <chr>
#> 1 SNP_A 3.25 2 0.197 Chi-squared test for given probabilities
#> 2 SNP_B 2.46 2 0.292 Chi-squared test for given probabilities
#> 3 SNP_C 7.19 2 0.0275 Chi-squared test for given probabilities
Created on 2022-02-10 by the reprex package (v2.0.1)
Hop e this helps.