"Cannot Open Connection" Error

InkyMooncake · November 27, 2022, 1:01am

set_colnames_remove_unk_chrx <- function(df, x){
  colnames(df) <- c("Chromosome", "?", "?2", "Index", "SNP1", "SNP2")
  return(df[df$Chromosome == x, ])
}

unpack <- function(l){
  return(l[[1]])
}

sig_test <- function(chr, path="", save=FALSE){
  options(warn=-1)
  weight <- read.csv("C:\\Users\\noree\\OneDrive - The University of Chicago\\Genetics Lab-Info and RMD file\\Genetics 20187 Dog GWAS files\\Major\\weight2022.csv")
  names <- weight$namegetw
  
  #Read in raw data files named {dog_name}.tped
  dfs <- lapply(paste0(names, '.tped'), read.table, stringsAsFactors=FALSE)
  dfs <- lapply(dfs, set_colnames_remove_unk_chrx, chr)
  names(dfs) <- names
  
  #Bin by weight
  large_names <- weight[weight$weight > 35, 'name']
  small_names <- weight[weight$weight <= 35, 'name']
  large_df <- dfs[large_names]
  small_df <- dfs[small_names]
  rm(dfs)#free up memory
  
  
  small_df <- Map(function(x, i) setNames(x, ifelse(names(x) %in% c("Chromosome", "Index", "?"), names(x), sprintf('%s.%d', names(x), i))), small_df, seq_along(small_df))
  large_df <- Map(function(x, i) setNames(x, ifelse(names(x) %in% c("Chromosome", "Index", "?"), names(x), sprintf('%s.%d', names(x), i))), large_df, seq_along(large_df))
  
  
  small_df <- Reduce(function(dtf1, dtf2) merge(dtf1, dtf2, by = c("Chromosome", "Index", "?"), all = TRUE), small_df)
  large_df <- Reduce(function(dtf1, dtf2) merge(dtf1, dtf2, by = c("Chromosome", "Index", "?"), all = TRUE), large_df)
  small_df[is.na(small_df)] <- 0
  large_df[is.na(large_df)] <- 0
  
  nsample <- nrow(small_df) 

  small_snps2 <- list()
  large_snps2 <- list()
  for(i in 1:nsample){
    s <- ""
    for(j in 1:length(small_names)){
      s <- paste0(s, small_df[i, sprintf('%s.%d', "SNP1", j)], small_df[i, sprintf('%s.%d', "SNP2", j)])
    }
    small_snps2 <- append(small_snps2, s)
    
    l <- ""
    for(j in 1:length(large_names)){
      l <- paste0(l, large_df[i, sprintf('%s.%d', "SNP1", j)], large_df[i, sprintf('%s.%d', "SNP2", j)])
      
    }
    large_snps2 <- append(large_snps2, l)
  }
  
  r
  small_splitSNPs <- lapply(small_snps2, strsplit, "")
  small_splitSNPs <- lapply(small_splitSNPs, unpack)
  large_splitSNPs <- lapply(large_snps2, strsplit, "")
  large_splitSNPs <- lapply(large_splitSNPs, unpack)
  
  
  small_single_count <- lapply(lapply(small_splitSNPs, factor, levels=c("A", "T", "C", "G")), table)
  large_single_count <- lapply(lapply(large_splitSNPs, factor, levels=c("A", "T", "C", "G")), table)
  write.csv(small_single_count, paste0(path,  "small_single_count", chr, ".csv"), row.names = FALSE)
  write.csv(large_single_count, paste0(path, "large_single_count", chr, ".csv"), row.names = FALSE)
  rm(small_splitSNPs)
  rm(large_splitSNPs)
  
  p_values <- numeric(nsample)
  for(i in 1:nsample){
    
    t <- rbind(small_single_count[[i]],large_single_count[[i]])
    
    remove <- c()
    for(c in 1:ncol(t)){
      if(sum(t[,c] == numeric(2)) == 2){
        remove <- append(remove, c)
      }
    }
    t<-t[,remove*-1]
    
    if(all(is.na(t))){
      p_values[i] <- NA
    }else{
      p_values[i] <- chisq.test(t)$p.value
    }
  }
  return(data.frame("pvals"=p_values,"indices"=small_df$Index[1:nsample]))
}
#This first code chunk has been assigned to me. The only thing that I have altered is that I have typed the full working directory for the variable "weight"

out1 <- sig_test(1)
out2 <- sig_test(2)
out3 <- sig_test(3)
out4 <- sig_test(4)

pvals1 <- out1$pvals[-1*which(is.na(out1$pvals))]
pvals2 <- out2$pvals[-1*which(is.na(out2$pvals))]
pvals3 <- out3$pvals[-1*which(isfi.na(out3$pvals))]
pvals4 <- out3$pvals[-1*which(is.na(out4$pvals))]

indices1 <- out1$indices
indices2 <- out2$indices
indices3 <- out3$indices
indices4 <- out4$indices
# Whenever I run this chunk, I get an error message, "Error in file(file, "rt"), cannot open the connection," with the traceback as follows:
          "4. file(file, "rt")
3. FUN(X[[i]], ...)
2. lapply(paste0(names, ".tped"), read.table, stringsAsFactors = FALSE)
1. sig_test(1)"

technocrat · November 27, 2022, 5:17am

Is isfi.na() the function you intend?

InkyMooncake · November 27, 2022, 5:32am

I have corrected that error, but the main problem is not resolved.

technocrat · November 27, 2022, 5:53am

Try copying this into your working directory to avoid potential problems resolving the filename.

InkyMooncake · November 27, 2022, 6:09am

When in input this into setwd(), I get a "cannot change working directory" error

technocrat · November 27, 2022, 6:12am

I'm not a Windows user, so I'm guessing that OneDrive is at fault. My suggestion is to make a copy of

into the directory shown by getwd().

InkyMooncake · November 27, 2022, 6:15am

What do you mean "make a copy of the code into the working directory"? Do you mean replace the working directory with a copy of my code?

technocrat · November 27, 2022, 6:44am

Sorry to be unclear—put a copy of the OneDrive file ending in weight2022.csv into the same directory shown by the command getwd(), which will be somewhere that your code is running, either by default or by opening a project or otherwise. You don't want to replace the contents of the working direction, just add the one file. Do that and confirm in the Files pane or with the system browser. Then

weight <- read.csv("weight2022.csv")

should work.

InkyMooncake · November 27, 2022, 7:13am

On my computer, R has trouble finding the file I want unless I write the full directory.
On a related note, my instructor mentioned that the "weight2022.csv" may be hard to read for some computers, so they suggested that we put this line of code "at the beginning of your "sig_test" function (after the function name)"

colnames(weight) <- c("name", "weight")

Where exactly do I put this line, and what other adjustments would I have to make?

technocrat · November 27, 2022, 7:23am

OK, are you using RStudio or the R console from terminal?

may be what

is supposed to fix. colnames() assigns variable names.

There is a package that is supposed to cut through the OneDrive fog. I can't test it (Windows free since long time passing) but it might help fetch a local copy of the file for you.

InkyMooncake · November 27, 2022, 7:52am

I am using R studio, also I realize that

names <- weight$namegetw

should be

names <- weight$name

Nonetheless fixing that mistake will not improve the situation. I will take your advice with the package.

system · December 18, 2022, 7:53am

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.