Thank you. I work in R363, (Dada2 tutorial 1.16.) to get OTU table of Illumina Miseq sequences. Following the tutorial I got the results pasted below. Most concerning is the taxonomy Eukaryotes" note. We work on 16SrRNA sequences. What possibly went wrong?
path <- "~/EKTO"
list.files(path)
[1] "Sample_TA-2423-Rhizo2Ser205-1-EKTO" "Sample_TA-2423-Rhizo2Ser205-2-EKTO"
[3] "Sample_TA-2423-Rhizo2Ser205-3-EKTO" "Sample_TA-2423-Rhizo2Ser211-1-EKTO"
[5] "Sample_TA-2423-Rhizo2Ser211-2-EKTO" "Sample_TA-2423-Rhizo2Ser211-3-EKTO"
[7] "Sample_TA-2423-Rhizo2Ser229-1-EKTO" "Sample_TA-2423-Rhizo2Ser229-2-EKTO"
[9] "Sample_TA-2423-Rhizo2Ser229-3-EKTO" "Sample_TA-2423-Rhizo2Ser42-1-EKTO"
[11] "Sample_TA-2423-Rhizo2Ser42-2-EKTO" "Sample_TA-2423-Rhizo2Ser42-3-EKTO"
[13] "Sample_TA-2423-Rhizo2Ser72-1-EKTO" "Sample_TA-2423-Rhizo2Ser72-2-EKTO"
[15] "Sample_TA-2423-Rhizo2Ser72-3-EKTO" "Sample_TA-2423-Rhizo2Ser97-1-EKTO"
[17] "Sample_TA-2423-Rhizo2Ser97-2-EKTO" "Sample_TA-2423-Rhizo2Ser97-3-EKTO"
fnFs <- sort(list.files(path, pattern="_R1_001.fastq", full.names = TRUE))
fnRs <- sort(list.files(path, pattern="R2_001.fastq", full.names = TRUE))
sample.names <- sapply(strsplit(basename(fnFs), ""), [
, 1)
plotQualityProfile(fnFs[1:2])
Error in $<-.data.frame
(*tmp*
, "minScore", value = Inf) :
replacement has 1 row, data has 0
In addition: Warning message:
In min(anndf$minScore) : no non-missing arguments to min; returning Inf
list.files(path)
[1] "TA-2423-Rhizo2Ser205-1-EKTO_S76_L001_R1_001.fastq"
[2] "TA-2423-Rhizo2Ser205-1-EKTO_S76_L001_R2_001.fastq"
[3] "TA-2423-Rhizo2Ser205-2-EKTO_S77_L001_R1_001.fastq"
[4] "TA-2423-Rhizo2Ser205-2-EKTO_S77_L001_R2_001.fastq"
[5] "TA-2423-Rhizo2Ser205-3-EKTO_S78_L001_R1_001.fastq"
[6] "TA-2423-Rhizo2Ser205-3-EKTO_S78_L001_R2_001.fastq"
[7] "TA-2423-Rhizo2Ser211-1-EKTO_S82_L001_R1_001.fastq"
[8] "TA-2423-Rhizo2Ser211-1-EKTO_S82_L001_R2_001.fastq"
[9] "TA-2423-Rhizo2Ser211-2-EKTO_S83_L001_R1_001.fastq"
[10] "TA-2423-Rhizo2Ser211-2-EKTO_S83_L001_R2_001.fastq"
[11] "TA-2423-Rhizo2Ser211-3-EKTO_S84_L001_R1_001.fastq"
[12] "TA-2423-Rhizo2Ser211-3-EKTO_S84_L001_R2_001.fastq"
[13] "TA-2423-Rhizo2Ser229-1-EKTO_S88_L001_R1_001.fastq"
[14] "TA-2423-Rhizo2Ser229-1-EKTO_S88_L001_R2_001.fastq"
[15] "TA-2423-Rhizo2Ser229-2-EKTO_S89_L001_R1_001.fastq"
[16] "TA-2423-Rhizo2Ser229-2-EKTO_S89_L001_R2_001.fastq"
[17] "TA-2423-Rhizo2Ser229-3-EKTO_S90_L001_R1_001.fastq"
[18] "TA-2423-Rhizo2Ser229-3-EKTO_S90_L001_R2_001.fastq"
[19] "TA-2423-Rhizo2Ser42-1-EKTO_S73_L001_R1_001.fastq"
[20] "TA-2423-Rhizo2Ser42-1-EKTO_S73_L001_R2_001.fastq"
[21] "TA-2423-Rhizo2Ser42-2-EKTO_S74_L001_R1_001.fastq"
[22] "TA-2423-Rhizo2Ser42-2-EKTO_S74_L001_R2_001.fastq"
[23] "TA-2423-Rhizo2Ser42-3-EKTO_S75_L001_R1_001.fastq"
[24] "TA-2423-Rhizo2Ser42-3-EKTO_S75_L001_R2_001.fastq"
[25] "TA-2423-Rhizo2Ser72-1-EKTO_S79_L001_R1_001.fastq"
[26] "TA-2423-Rhizo2Ser72-1-EKTO_S79_L001_R2_001.fastq"
[27] "TA-2423-Rhizo2Ser72-2-EKTO_S80_L001_R1_001.fastq"
[28] "TA-2423-Rhizo2Ser72-2-EKTO_S80_L001_R2_001.fastq"
[29] "TA-2423-Rhizo2Ser72-3-EKTO_S81_L001_R1_001.fastq"
[30] "TA-2423-Rhizo2Ser72-3-EKTO_S81_L001_R2_001.fastq"
[31] "TA-2423-Rhizo2Ser97-1-EKTO_S85_L001_R1_001.fastq"
[32] "TA-2423-Rhizo2Ser97-1-EKTO_S85_L001_R2_001.fastq"
[33] "TA-2423-Rhizo2Ser97-2-EKTO_S86_L001_R1_001.fastq"
[34] "TA-2423-Rhizo2Ser97-2-EKTO_S86_L001_R2_001.fastq"
[35] "TA-2423-Rhizo2Ser97-3-EKTO_S87_L001_R1_001.fastq"
[36] "TA-2423-Rhizo2Ser97-3-EKTO_S87_L001_R2_001.fastq"
fnFs <- sort(list.files(path, pattern="_R1_001.fastq", full.names = TRUE))
fnRs <- sort(list.files(path, pattern="R2_001.fastq", full.names = TRUE))
sample.names <- sapply(strsplit(basename(fnFs), ""), [
, 1)
plotQualityProfile(fnFs[1:2])
plotQualityProfile(fnRs[1:2])
filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz"))
filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz"))
names(filtFs) <- sample.names
names(filtRs) <- sample.names
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(240,160),
Creating output directory: C:/Users/salme/Documents/EKTO/filtered
Multithreading has been DISABLED, as forking is not supported on .Platform$OS.type 'windows'
head(out)
reads.in reads.out
TA-2423-Rhizo2Ser205-1-EKTO_S76_L001_R1_001.fastq 88630 85678
TA-2423-Rhizo2Ser205-2-EKTO_S77_L001_R1_001.fastq 109849 106941
TA-2423-Rhizo2Ser205-3-EKTO_S78_L001_R1_001.fastq 94610 91885
TA-2423-Rhizo2Ser211-1-EKTO_S82_L001_R1_001.fastq 81891 79259
TA-2423-Rhizo2Ser211-2-EKTO_S83_L001_R1_001.fastq 98852 95893
TA-2423-Rhizo2Ser211-3-EKTO_S84_L001_R1_001.fastq 90015 87377
errF <- learnErrors(filtFs, multithread=TRUE)
110317440 total bases in 459656 reads from 5 samples will be used for learning the error rates.
errR <- learnErrors(filtRs, multithread=TRUE)
102606240 total bases in 641289 reads from 7 samples will be used for learning the error rates.
plotErrors(errF, nominalQ=TRUE)
Warning messages:
1: Transformation introduced infinite values in continuous y-axis
2: Transformation introduced infinite values in continuous y-axis
dadaFs <- dada(filtFs, err=errF, multithread=FALSE)
Sample 1 - 85678 reads in 79448 unique sequences.
Sample 2 - 106941 reads in 98414 unique sequences.
Sample 3 - 91885 reads in 85360 unique sequences.
Sample 4 - 79259 reads in 75700 unique sequences.
Sample 5 - 95893 reads in 91470 unique sequences.
Sample 6 - 87377 reads in 84853 unique sequences.
Sample 7 - 94256 reads in 78848 unique sequences.
Sample 8 - 82848 reads in 80306 unique sequences.
Sample 9 - 94140 reads in 88000 unique sequences.
Sample 10 - 82671 reads in 80536 unique sequences.
Sample 11 - 93457 reads in 85821 unique sequences.
Sample 12 - 95864 reads in 89124 unique sequences.
Sample 13 - 112104 reads in 93457 unique sequences.
Sample 14 - 89341 reads in 86036 unique sequences.
Sample 15 - 86509 reads in 84584 unique sequences.
Sample 16 - 97714 reads in 95629 unique sequences.
Sample 17 - 90339 reads in 86049 unique sequences.
Sample 18 - 91580 reads in 88658 unique sequences.
dadaRs <- dada(filtRs, err=errR, multithread=FALSE)
Sample 1 - 85678 reads in 71398 unique sequences.
Sample 2 - 106941 reads in 89689 unique sequences.
Sample 3 - 91885 reads in 78918 unique sequences.
Sample 4 - 79259 reads in 69791 unique sequences.
Sample 5 - 95893 reads in 84368 unique sequences.
Sample 6 - 87377 reads in 79817 unique sequences.
Sample 7 - 94256 reads in 65342 unique sequences.
Sample 8 - 82848 reads in 74098 unique sequences.
Sample 9 - 94140 reads in 78025 unique sequences.
Sample 10 - 82671 reads in 73187 unique sequences.
Sample 11 - 93457 reads in 74845 unique sequences.
Sample 12 - 95864 reads in 79513 unique sequences.
Sample 13 - 112104 reads in 72618 unique sequences.
Sample 14 - 89341 reads in 80322 unique sequences.
Sample 15 - 86509 reads in 80833 unique sequences.
Sample 16 - 97714 reads in 89734 unique sequences.
Sample 17 - 90339 reads in 77334 unique sequences.
Sample 18 - 91580 reads in 80687 unique sequences.
dadaFs[[1]]
dada-class: object describing DADA2 denoising results
50 sequence variants were inferred from 79448 input unique sequences.
Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, verbose=TRUE)
18 paired-reads (in 3 unique pairings) successfully merged out of 70679 (in 1830 pairings) input.
3 paired-reads (in 1 unique pairings) successfully merged out of 90154 (in 3619 pairings) input.
20 paired-reads (in 2 unique pairings) successfully merged out of 76526 (in 2254 pairings) input.
28 paired-reads (in 4 unique pairings) successfully merged out of 60738 (in 1199 pairings) input.
21 paired-reads (in 4 unique pairings) successfully merged out of 78903 (in 2280 pairings) input.
7 paired-reads (in 1 unique pairings) successfully merged out of 68155 (in 1823 pairings) input.
6 paired-reads (in 1 unique pairings) successfully merged out of 70121 (in 20330 pairings) input.
3 paired-reads (in 1 unique pairings) successfully merged out of 58963 (in 1368 pairings) input.
7 paired-reads (in 2 unique pairings) successfully merged out of 73902 (in 2913 pairings) input.
17 paired-reads (in 2 unique pairings) successfully merged out of 57570 (in 1167 pairings) input.
19 paired-reads (in 3 unique pairings) successfully merged out of 73063 (in 3546 pairings) input.
23 paired-reads (in 4 unique pairings) successfully merged out of 74906 (in 2309 pairings) input.
10 paired-reads (in 2 unique pairings) successfully merged out of 90416 (in 27086 pairings) input.
30 paired-reads (in 1 unique pairings) successfully merged out of 73657 (in 2717 pairings) input.
23 paired-reads (in 3 unique pairings) successfully merged out of 62131 (in 1980 pairings) input.
23 paired-reads (in 3 unique pairings) successfully merged out of 72161 (in 2170 pairings) input.
13 paired-reads (in 2 unique pairings) successfully merged out of 69085 (in 2599 pairings) input.
5 paired-reads (in 1 unique pairings) successfully merged out of 69425 (in 2254 pairings) input.
head(mergers[[1]])
sequence
651 TGGGGTCCTAGAAAGCCTAAATTGTAACAAATTCAATCAAATTCAAGTATTGGGATCTGTGGATTAAATAATTTGAAACTGTTTTTAATTTTCCTCTACAGTGTCATTTATACTACCTTCACCACAGAGCTATGGATGCGCAAGAAGATCTGCTGTCCCAGGACATCACCCTGACCGTACTAATTCCGGGAGGACAGGAGACCACAGCCACGGTGCATGGCAGGTAAGCCAGAGTCTAAG
1046 ACAGCTTCCTGCCATTATGAACAAGATACTGGCTATCACGGTTTTTGTTGCTTTCCTTGCAACTGGGTATGTACACACTCACATAAACATGAACAAACTAACTTCATAATTAAATTATCACTTATTCAATTATTAAATGTATTATTTTGCCTTGTTTCAAAGGTCATACGCTAAGAAGCAATAATGAAACTGAATGTAGCTCCTCATGAAGTGTGAAATTAAGTATTTTGCTCTTTTTCGTTGCA
1399 TGCAGACTCCTCACCACAAGAAAGTGCCAACATGAATGCTGCCAACCACTTAGGGGCCGGCACTCTATGTGCCATTTGTGGGGACCGGGCCACAGGAAAGCACTACGGGGCCTCAAGCTGTGATGGATGCAAGGGGTTCTTCAGACGCAGTGTACGCAAAAACCACAGATCGGAAGAGCACACGTCTGAACTCCAGTCACGCTCATGAATCTCGTATGCCGTCTTCTGCTTGAAAAAAAA
abundance forward reverse nmatch nmismatch nindel prefer accept
651 11 4 4 160 0 0 2 TRUE
1046 5 7 8 155 0 0 1 TRUE
1399 2 10 17 160 0 0 1 TRUE
sequence
function (nvec)
unlist(lapply(nvec, seq_len))
<bytecode: 0x0000000043a42a68>
<environment: namespace:base>
seqtab <- makeSequenceTable(mergers)
dim(seqtab)
[1] 18 14
Inspect distribution of sequence lengths
table(nchar(getSequences(seqtab)))
240 244 245 246 247 254 255
7 2 1 1 1 1 1
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, ver
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=FALSE, verbose=TRUE)
Identified 0 bimeras out of 14 input sequences.
dim(seqtab.nochim)
[1] 18 14
sum(seqtab.nochim)/sum(seqtab)
[1] 1
getN <- function(x) sum(getUniques(x))
track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim))
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
rownames(track) <- sample.names
head(track)
input filtered denoisedF denoisedR merged nonchim
TA-2423-Rhizo2Ser205-1-EKTO 88630 85678 72916 81381 18 18
TA-2423-Rhizo2Ser205-2-EKTO 109849 106941 91901 103048 3 3
TA-2423-Rhizo2Ser205-3-EKTO 94610 91885 77732 88214 20 20
TA-2423-Rhizo2Ser211-1-EKTO 81891 79259 62509 74931 28 28
TA-2423-Rhizo2Ser211-2-EKTO 98852 95893 80573 91586 21 21
TA-2423-Rhizo2Ser211-3-EKTO 90015 87377 70476 82730 7 7
taxa <- assignTaxonomy(seqtab.nochim,"~/EKTO/silva_nr_v138_train_set.fa.gz")
taxa <- addSpecies(taxa, "~/EKTO/silva_species_assignment_v138.fa.gz")
taxa.print <- taxa
rownames(taxa.print) <- NULL
head(taxa.print)
Kingdom Phylum Class Order Family Genus Species
[1,] "Eukaryota" NA NA NA NA NA NA
[2,] NA NA NA NA NA NA NA
[3,] "Eukaryota" NA NA NA NA NA NA
[4,] "Eukaryota" NA NA NA NA NA NA
[5,] "Eukaryota" NA NA NA NA NA NA
[6,] "Eukaryota" NA NA NA NA NA NA