Hi @Ringyao. You can use the subsetByOverlaps
function in the GenomicRanges
package. First, construct the GRanges
objects with the tables x and y. And subset the x by y with the subsetByOverlaps
function. The following code didn't give the result that you want because table x that you gave out didn't have any rows that overlap with table y. So, please check carefully the sample data in the question.
library(tidyverse)
library(GenomicRanges)
x <- structure(list(seqnames = c("Chr1", "Chr1", "Chr10", "Chr11",
"Chr2", "Chr2", "Chr3", "Chr3", "Chr7", "Chr9"), Region = c(18898802,
9452880, 20320957, 17429568, 30343082, 1693851, 5283755, 9219325,
8580489, 29139452)), .Names = c("seqnames", "Region"), class = "data.frame", row.names = c(NA,
-10L))
x
#> seqnames Region
#> 1 Chr1 18898802
#> 2 Chr1 9452880
#> 3 Chr10 20320957
#> 4 Chr11 17429568
#> 5 Chr2 30343082
#> 6 Chr2 1693851
#> 7 Chr3 5283755
#> 8 Chr3 9219325
#> 9 Chr7 8580489
#> 10 Chr9 29139452
y <- structure(list(seqnames = c("Chr1", "Chr2", "Chr3", "Chr3", "Chr4",
"Chr4", "Chr7", "Chr9"), start = c(18898811, 1693855, 5283764,
9219334, 1836000, 1380604, 6794948, 29139456), end = c(18898810,
1693854, 5283763, 9219333, 1835949, 1380553, 6794947, 29139455
)), .Names = c("seqnames", "start", "end"), class = "data.frame", row.names = c(NA,
-8L))
y
#> seqnames start end
#> 1 Chr1 18898811 18898810
#> 2 Chr2 1693855 1693854
#> 3 Chr3 5283764 5283763
#> 4 Chr3 9219334 9219333
#> 5 Chr4 1836000 1835949
#> 6 Chr4 1380604 1380553
#> 7 Chr7 6794948 6794947
#> 8 Chr9 29139456 29139455
yGR <- GRanges(y$seqnames, IRanges(pmap_dbl(y[,2:3], min), pmap_dbl(y[2:3], max)))
xGR <- GRanges(x$seqnames, IRanges(x$Region, x$Region))
subsetByOverlaps(xGR, yGR) %>%
as.data.frame()
#> Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
#> - in 'x': Chr10, Chr11
#> - in 'y': Chr4
#> Make sure to always combine/compare objects based on the same reference
#> genome (use suppressWarnings() to suppress this warning).
#> [1] seqnames start end width strand
#> <0 rows> (or 0-length row.names)
Created on 2019-10-15 by the reprex package (v0.3.0)