Error while parsing a json file and converting to a table format

Let me know after looking at the entire receiver() object.

# libraries
library(jsonlite)

# functions
get_addn <- function(x) {
  step1 = strsplit(batch[x],":")
  step2 = lapply(step1,trimws)
  addn = as.numeric(gsub(",","",step2[[1]][2]))
  return(addn)
}

get_gene <- function(x) {
  step1 = strsplit(batch[x],":")[[1]][2]
  step2 = gsub("[^A-Z0-9]+",",",step1)
  step3 = trimws(step2)
  return(step3)
}
get_gene <- function(x) {
  step1 = strsplit(batch[[1]][x],":")[[1]][2]
  step2 = gsub("[^A-Z0-9]+",",",step1)
  step3 = trimws(step2)
  return(step3)
} 


get_note <- function(x) {
  step1 = strsplit(batch[x],":")
  step2 = lapply(step1,trimws)
  note = gsub("^.|.$","",step2[[1]][1])
  return(note)
}

get_summary <- function(x){
  strsplit(batch[x],":")[[1]][2] |> 
    gsub("^ \"|\",$",",",x=_)      |> 
    gsub(",","",x=_)
}

populate_receiver <- function(x) {
  new_row = data.frame(
    gene     = get_gene(3),
    summary  = get_summary(4),
    notes6   = get_note(6),
    addn6    = get_addn(6),
    note7    = get_note(7),
    addn7    = get_addn(7),
    notes8   = get_note(8),
    addn8    = get_addn(8),
    notes9   = get_note(9),
    addn9    = get_addn(9),
    notes10  = get_note(10),
    addn10   = get_addn(10),
    notes11  = get_note(11),
    addn11   = get_addn(11),
    notes12  = get_note(12),
    addn12   = get_addn(12),
    notes13  = get_note(13),
    addn13   = get_addn(13),
    notes14  = get_note(14),
    addn14   = get_addn(14),
    notes15  = get_note(15),
    addn15   = get_addn(15),
    notes16  = get_note(16),
    addn16   = get_addn(16),
    notes17  = get_note(17),
    addn17   = get_addn(17),
    notes18  = get_note(18),
    addn18   = get_addn(18),
    notes19  = get_note(19),
    addn19   = get_addn(19),
    notes20  = get_note(20),
    addn20   = get_addn(20)
  )
  return(new_row)
}

# data

gist <- "https://gist.githubusercontent.com/technocrat/7c6602553011f31dabddf1f12077f6a2/raw/1eb5fb0e673df107844d8048d81aede5568f9f85/test_json.json"

# preprocessing

intake <- read_json(gist)
batch  <- sapply(intake, function(x) fromJSON(toJSON(x, auto_unbox = TRUE)))[[1]]

# create receiver to add processed items

receiver <- data.frame(
  gene    = as.character(),
  summary = as.character(),
  notes6 = as.character(),
  addn6 = as.numeric(),
  notes7 = as.character(),
  addn7 = as.numeric(),
  notes8 = as.character(),
  addn8 = as.numeric(),
  notes9 = as.character(),
  addn9 = as.numeric(),
  notes10 = as.character(),
  addn10 = as.numeric(),
  notes11 = as.character(),
  addn11 = as.numeric(),
  notes12 = as.character(),
  addn12 = as.numeric(),
  notes13 = as.character(),
  addn13 = as.numeric(),
  notes14 = as.character(),
  addn14 = as.numeric(),
  notes15 = as.character(),
  addn15 = as.numeric(),
  notes16 = as.character(),
  addn16 = as.numeric(),
  notes17 = as.character(),
  addn17 = as.numeric(),
  notes18 = as.character(),
  addn18 = as.numeric(),
  notes19 = as.character(),
  addn19 = as.numeric(),
  notes20 = as.character(),
  addn20 = as.numeric()
)

# main

for (i in seq_along(batch)) receiver = rbind(receiver, populate_receiver(i))

# showing just the last line of the entire objects
receiver |> tail(x = ,1) 
#>    gene
#> 25 <NA>
#>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               summary
#> 25 AAK1 (AP2 associated kinase 1) is a gene that encodes a protein kinase involved in endocytosis a process by which cells internalize molecules and particles from their environment. The AAK1 protein regulates the binding of the AP2 complex to clathrin and the plasma membrane which is crucial for the formation of clathrin-coated vesicles. This process is important for the internalization of proteins and receptors from the cell surface and their subsequent recycling or degradation.
#>                                                     notes6 addn6
#> 25 This gene is specifically associated with the test case     1
#>                                                             note7 addn7
#> 25 This gene is specifically associated with the biology of cells     1
#>                                                               notes8 addn8
#> 25 This gene is specifically associated with the biology of proteins     1
#>                                                                  notes9 addn9
#> 25 This gene is specifically associated with the biology of metabolites     1
#>                                             notes10 addn10
#> 25 This gene is involved in mediating the induction      2
#>                                       notes11 addn11
#> 25 This gene is implicated in mediated damage      2
#>                                          notes12 addn12
#> 25 This gene is involved in mediating attachment      3
#>                                                  notes13 addn13
#> 25 This gene is implicated in the process of replication      2
#>                                     notes14 addn14
#> 25 This gene is involved in mediating entry      3
#>                                      notes15 addn15
#> 25 This gene is implicated in the impairment      2
#>                                                notes16 addn16
#> 25 This gene is involved in conferring drug resistance      2
#>                                                        notes17 addn17
#> 25 This gene is involved in mediating the response to blockage      3
#>                                                       notes18 addn18
#> 25 This gene is involved in mediating the initiation of cells      2
#>                                            notes19 addn19
#> 25 This gene is involved in mediating the blockage      4
#>                                                        notes20 addn20
#> 25 This gene is involved in mediating the priming of the cells      3

Created on 2024-01-08 with reprex v2.0.2

1 Like

@technocrat yes, thats correct.

OK, good luck. Kinda unwieldy table!

1 Like

@technocrat thank you very much for the continuous support.

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.