I ran this program in Rstudio and was prompted with cairo error "Error writing output stream".
This is a message indicating an error:
Zcp: sample number is 14...Error in cairo_pdf(filename = "outputfile/boxplot_omics2.pdf", width = 16, :
unable to start device 'cairo_pdf'
此外: Warning message:
In cairo_pdf(filename = "outputfile/boxplot_omics2.pdf", width = 16, :
cairo error 'error while writing to output stream'
This is the code I ran:
#数据读取和准备:
rm(list = ls())#清除所有变量
library(tidyverse)
library(openxlsx)
library(OmicsPLS)
library(parallel)
#entering comparison group information:
#菌群
sample.omics1 <- c("LS1","LS2","LS3","LS4","LS5","LS6","LS7",
"FS1","FS2","FS3","FS4","FS5","FS6","FS7")#metabonomics
#代谢组
sample.omics2 <- c("LS1","LS2","LS3","LS4","LS5","LS6","LS7",
"FS1","FS2","FS3","FS4","FS5","FS6","FS7")#RNA
#reading data
files.xlsx <- fs::dir_ls("inputfile",recurse = TRUE,glob = ".xlsx")
files_names.xlsx <- str_extract(files.xlsx,"(?<=\/).(?=\.)");files_names.xlsx
dat.omics1 <- map_dfc(files.xlsx,read.xlsx) %>%
column_to_rownames(var = "Index") %>% select(all_of(sample.omics1)) %>%
t() %>%
as.data.frame()#meta--y
rownames(dat.omics1)
files.csv <- fs::dir_ls("inputfile",recurse = TRUE,glob = ".csv")
files_names.csv <- str_extract(files.csv,"(?<=\/).(?=\.)");files_names.csv
dat.omics2 <- map_dfc(files.csv,read_csv) %>%
column_to_rownames(var = "genus_taxonomy") %>% #select(matches("fpkm")) %>%
#set_names(str_remove_all(names(.), ":fpkm"))%>%
select(all_of(sample.omics2)) %>%
t() %>%
as.data.frame()#RNA--x
rownames(dat.omics2)
analysis.func <- function(dat.omics2=dat.omics2,#x data
dat.omics1=dat.omics1,#y data
sample.omics1=sample.omics1,#selected sample of y data
sample.omics2=sample.omics2,#selected sample of x data
type="sig",# "sig": different results; "all": all data
top.omics.x=10,#the top numbers of x data
top.omics.y=10,#the top numbers of y data
nr_cores=4,##the number of cores used
percent=0.75){#percent: IQR:0.00 0.25 0.50 0.75 1.00
if(length(sample.omics1)==length(sample.omics2)){
cat(paste0("\nZcp: sample number is ",length(sample.omics1),"..."))
switch(type,
"sig"={
cairo_pdf(filename ="outputfile/boxplot_omics2.pdf",
width = 16, height = 8)
boxplot(log2(t(dat.omics2)+1), col = rainbow(23), nr.rm=TRUE)
graphics.off()
cairo_pdf(filename ="outputfile/boxplot_omics1.pdf",
width = 16, height = 8)
boxplot(log2(t(dat.omics1)+1), col = rainbow(23), nr.rm=TRUE)
graphics.off()
#UV scaling
dat.omics1.uvscal <-log2(dat.omics1+1) %>% scale(scale=F)
dat.omics2.uvscal <- log2(dat.omics2+1) %>% scale(scale=F)
#modeling and fitting
set.seed(332684.5*6)
CV=crossval_o2m(dat.omics2.uvscal, # x.data=====> Minimal CV error is at ax
dat.omics1.uvscal, # y.data=====> Minimal CV error is at ay
2:5,1:3,1:3,nr_cores = nr_cores,#the number of cores used
nr_folds = nrow(dat.omics1.uvscal)) #It is recommended that at least ten folds are used. Too few folds
#(but not less than two) result in unreliable estimates.
x <- CV[["Original"]] %>% as.data.frame()
x[x!=min(x,na.rm = T)] <- NA
x1 <- x %>% filter(!if_all(where(is.numeric),is.na))
str(x1)
x.data.specific.componets <- rownames(x1) %>% str_replace("ax=","") %>%
as.numeric()
y.data.specific.componets <- x1 %>% t() %>% as.data.frame() %>%
filter(!if_all(where(is.numeric),is.na)) %>% rownames()%>%
str_extract("(?<=ay=).*(?=\\.)")%>%
as.numeric()
y.data.specific.componets %>%
str()
joint <- x1 %>% t() %>% as.data.frame() %>%
filter(!if_all(where(is.numeric),is.na)) %>% rownames() %>%
str_extract("\\.a=.*") %>%
str_extract("\\d") %>%
as.numeric()
joint%>%
str()
cat(paste0("\nZcp: Following the advice of the last CV output:\n joint value: ",joint,";\n"))
cat(paste0("\nZcp: Following the advice of the last CV output:\n omics1 componets: ",x.data.specific.componets,";\n"))
cat(paste0("\nZcp: Following the advice of the last CV output:\n omics2 componets: ",y.data.specific.componets,";\n"))
modelfit<-o2m(dat.omics2.uvscal, # rows as samples and columns as variables
dat.omics1.uvscal,# rows as samples and columns as variables
joint,# joint value
x.data.specific.componets ,# x.data-specific componets
y.data.specific.componets)#y.data-specific componets
print(modelfit)
summary (modelfit)
#x selecting
all.omics.x <- loadings(modelfit, "Xjoint", 1:2) %>% as.data.frame()
all.omics.x<- all.omics.x %>% mutate("score"=sqrt(V1^2+V2^2))
all.omics.x <- all.omics.x %>% arrange(desc(score)) %>%
mutate("omics"=rep("x.omics",times=nrow(.)))
all.omics.x%>%
write.csv("outputfile/all_xjiont_loading.csv")
important.omics.x <- all.omics.x[1:top.omics.x,]
important.omics.x%>%
write.csv(paste0("outputfile/top",top.omics.x,"_xjiont_loading.csv"))
#y selecting
all.omics.y <- loadings(modelfit, "Yjoint", 1:2) %>% as.data.frame()
all.omics.y<- all.omics.y %>% mutate("score"=sqrt(V1^2+V2^2))
all.omics.y <- all.omics.y %>% arrange(desc(score)) %>%
mutate("omics"=rep("y.omics",times=nrow(.)))
all.omics.y %>%
write.csv("outputfile/all_yjiont_loading.csv")
important.omics.y <- all.omics.y[1:top.omics.y,]
important.omics.y %>%
write.csv(paste0("outputfile/top",top.omics.y,"_yjiont_loading.csv"))
return(modelfit)
},
"all"={
#calculate the maximum of gene expression per each gene and take the top
max.exp <- dat.omics2 %>% apply(2,max)
max.exp.selected <- max.exp %>% quantile(percent)
#take the IQR of each gene and take the top genes
IQR.exp <- dat.omics2 %>% apply ( 2, IQR, na.rm=TRUE)
IQR.exp.selected <- IQR.exp %>% quantile (percent)
#selected genes/percent are the intersection of the two previous sets
filter <- ( intersect ( which (max.exp > max.exp.selected),
which (IQR.exp > IQR.exp.selected)))
dat.omics2 <- dat.omics2[,filter]
cairo_pdf(filename ="outputfile/boxplot_omics2.pdf",
width = 16, height = 8)
boxplot(log2(t(dat.omics2)+1), col = rainbow(23), nr.rm=TRUE)
graphics.off()
cairo_pdf(filename ="outputfile/boxplot_omics1.pdf",
width = 16, height = 8)
boxplot(log2(t(dat.omics1)+1), col = rainbow(23), nr.rm=TRUE)
graphics.off()
#UV scaling
dat.omics1.uvscal <-log2(dat.omics1+1) %>% scale(scale=F)
dat.omics2.uvscal <- log2(dat.omics2+1) %>% scale(scale=F)
#modeling and fitting
set.seed(332684.5*6)
CV=crossval_o2m(dat.omics2.uvscal, # x.data=====> Minimal CV error is at ax
dat.omics1.uvscal, # y.data=====> Minimal CV error is at ay
2:5,1:3,1:3,nr_cores = nr_cores,#the number of cores used
nr_folds = nrow(dat.omics1.uvscal)) #It is recommended that at least ten folds are used. Too few folds
#(but not less than two) result in unreliable estimates.
x <- CV[["Original"]] %>% as.data.frame()
x[x!=min(x,na.rm = T)] <- NA
x1 <- x %>% filter(!if_all(where(is.numeric),is.na))
str(x1)
x.data.specific.componets <- rownames(x1) %>% str_replace("ax=","") %>%
as.numeric()
y.data.specific.componets <- x1 %>% t() %>% as.data.frame() %>%
filter(!if_all(where(is.numeric),is.na)) %>% rownames()%>%
str_extract("(?<=ay=).*(?=\\.)")%>%
as.numeric()
y.data.specific.componets %>%
str()
joint <- x1 %>% t() %>% as.data.frame() %>%
filter(!if_all(where(is.numeric),is.na)) %>% rownames() %>%
str_extract("\\.a=.*") %>%
str_extract("\\d") %>%
as.numeric()
joint%>%
str()
cat(paste0("\nZcp: Following the advice of the last CV output:\n joint value: ",joint,";\n"))
cat(paste0("\nZcp: Following the advice of the last CV output:\n omics1 componets: ",x.data.specific.componets,";\n"))
cat(paste0("\nZcp: Following the advice of the last CV output:\n omics2 componets: ",y.data.specific.componets,";\n"))
modelfit<-o2m(dat.omics2.uvscal, # rows as samples and columns as variables
dat.omics1.uvscal,# rows as samples and columns as variables
joint,# joint value
x.data.specific.componets ,# x.data-specific componets
y.data.specific.componets)#y.data-specific componets
print(modelfit)
summary (modelfit)
#x selecting
all.omics.x <- loadings(modelfit, "Xjoint", 1:2) %>% as.data.frame()
all.omics.x<- all.omics.x %>% mutate("score"=sqrt(V1^2+V2^2))
all.omics.x <- all.omics.x %>% arrange(desc(score)) %>%
mutate("omics"=rep("x.omics",times=nrow(.)))
all.omics.x%>%
write.csv("outputfile/all_xjiont_loading.csv")
important.omics.x <- all.omics.x[1:top.omics.x,]
important.omics.x%>%
write.csv(paste0("outputfile/top",top.omics.x,"_xjiont_loading.csv"))
#y selecting
all.omics.y <- loadings(modelfit, "Yjoint", 1:2) %>% as.data.frame()
all.omics.y<- all.omics.y %>% mutate("score"=sqrt(V1^2+V2^2))
all.omics.y <- all.omics.y %>% arrange(desc(score)) %>%
mutate("omics"=rep("y.omics",times=nrow(.)))
all.omics.y %>%
write.csv("outputfile/all_yjiont_loading.csv")
important.omics.y <- all.omics.y[1:top.omics.y,]
important.omics.y %>%
write.csv(paste0("outputfile/top",top.omics.y,"_yjiont_loading.csv"))
return(modelfit)
})
}else{
stop("\nZcp: please ensureing the number of samples is consistent in each omics...\n")
}
}
#调用函数:
modelfit <- analysis.func(dat.omics2=dat.omics2,#x data
dat.omics1=dat.omics1,#y data
sample.omics1=sample.omics1,#selected sample of y data
sample.omics2=sample.omics2,#selected sample of x data
type="all",# "sig": different results; "all": all data
top.omics.x=10,#the top numbers of x data
top.omics.y=10,#the top numbers of y data
nr_cores=8,#the number of cores used
percent=0.75)#quantile