Duplicates getting created when rearranging data.

kuttan98 · June 16, 2022, 4:52am

Hi. I am having a data of students and a few parameters. I am changing the arrangement of data. But I am getting few error messages and output is shown as list-cols.
I want to remove the duplicates and the response must also be proper (it must be either 0 or 1).
How can I overcome this?

library(tidyverse)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test
data1<-tibble::tribble(
  ~enumerator,                ~en_name, ~clusterid,      ~schoolid,          ~teacherid,         ~cal_name1,           ~cal_name2,                  ~cal_name3,       ~cal_name4,               ~cal_name5, ~cal_name6,        ~q_1, ~q_1_1, ~q_1_2, ~q_1_3, ~q_1_4, ~q_1_5, ~q_1_6, ~q_1_7, ~q_1_8, ~q_1_9, ~q_1_10, ~q_1_11,      ~q_2, ~q_2_1, ~q_2_2, ~q_2_3, ~q_2_4, ~q_2_5, ~q_2_6, ~q_2_7, ~q_2_8, ~q_2_9, ~q_2_10, ~q_2_11,       ~q_3, ~q_3_1, ~q_3_2, ~q_3_3, ~q_3_4, ~q_3_5, ~q_3_6, ~q_3_7, ~q_3_8, ~q_3_9, ~q_3_10, ~q_3_11,       ~q_4, ~q_4_1, ~q_4_2, ~q_4_3, ~q_4_4, ~q_4_5, ~q_4_6, ~q_4_7, ~q_4_8, ~q_4_9, ~q_4_10, ~q_4_11,         ~q_5, ~q_5_1, ~q_5_2, ~q_5_3, ~q_5_4, ~q_5_5, ~q_5_6, ~q_5_7, ~q_5_8, ~q_5_9, ~q_5_10, ~q_5_11, ~q_6, ~q_6_1, ~q_6_2, ~q_6_3, ~q_6_4, ~q_6_5, ~q_6_6, ~q_6_7, ~q_6_8, ~q_6_9, ~q_6_10, ~q_6_11,
     "Ben001",   "Gangavva B Hiremath",   "Katnur", "GHPS Giriyal",     "J B Savadatti", "Pallavi.R.Ingale",    "Sachin.B.Jadhav",      "Shravani Muraganavar", "Pavitra Salgar", "Sannidhi Haranashikari",         NA,         "1",     1L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,       "4",     0L,     0L,     0L,     1L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,         NA,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      0L,      0L,      "5 8",     0L,     0L,     0L,     0L,     1L,     0L,     0L,     1L,     0L,      0L,      0L,          "6",     0L,     0L,     0L,     0L,     0L,     1L,     0L,     0L,     0L,      0L,      0L,   NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA,
     "Ben001",   "Gangavva B Hiremath",   "Katnur", "GHPS Giriyal", "Sumanagala G Kore", "Pallavi.R.Ingale",    "Sachin.B.Jadhav",      "Shravani Muraganavar", "Pavitra Salgar", "Sannidhi Haranashikari",         NA,       "4 8",     0L,     0L,     0L,     1L,     0L,     0L,     0L,     1L,     0L,      0L,      0L,    "7 10",     0L,     0L,     0L,     0L,     0L,     0L,     1L,     0L,     0L,      1L,      0L,    "3 7 9",     0L,     0L,     1L,     0L,     0L,     0L,     1L,     0L,     1L,      0L,      0L,        "6",     0L,     0L,     0L,     0L,     0L,     1L,     0L,     0L,     0L,      0L,      0L,        "4 8",     0L,     0L,     0L,     1L,     0L,     0L,     0L,     1L,     0L,      0L,      0L,   NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA,
     "Ben002", "Hemalata B. Bhajantri", "Byahatti", "DPEP Kusugal",         "Manjula A",   "Anand Byahatti", "Apsana. S Hubballi", "Fathima Begum.R.Kawalikai",               NA,                       NA,         NA,    "1 2 10",     1L,     1L,     0L,     0L,     0L,     0L,     0L,     0L,     0L,      1L,      0L,   "2 6 9",     0L,     1L,     0L,     0L,     0L,     1L,     0L,     0L,     1L,      0L,      0L,  "4 5 7 9",     0L,     0L,     0L,     1L,     1L,     0L,     1L,     0L,     1L,      0L,      0L,    "1 5 9",     1L,     0L,     0L,     0L,     1L,     0L,     0L,     0L,     1L,      0L,      0L, "4 6 8 9 10",     0L,     0L,     0L,     1L,     0L,     1L,     0L,     1L,     1L,      1L,      0L,   NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA,
     "Ben002", "Hemalata B. Bhajantri", "Byahatti", "DPEP Kusugal",         "Manjula A",   "Anand Byahatti", "Apsana. S Hubballi", "Fathima Begum.R.Kawalikai",               NA,                       NA,         NA, "8 9 10 11",     0L,     0L,     0L,     0L,     0L,     0L,     0L,     1L,     1L,      1L,      1L, "3 4 5 9",     0L,     0L,     1L,     1L,     1L,     0L,     0L,     0L,     1L,      0L,      0L, "3 5 9 11",     0L,     0L,     1L,     0L,     1L,     0L,     0L,     0L,     1L,      0L,      1L, "4 7 9 10",     0L,     0L,     0L,     1L,     0L,     0L,     1L,     0L,     1L,      1L,      0L,      "3 4 7",     0L,     0L,     1L,     1L,     0L,     0L,     1L,     0L,     0L,      0L,      0L,   NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,     NA,      NA,      NA
  )

Students <- data1 |> select(schoolid, starts_with("cal_name"))
Students <- Students |> 
  pivot_longer(cols = -schoolid,names_to = "ID",values_to = "Name") |> 
  mutate(ID=str_remove(ID,"cal_name")) |> 
  filter(!is.na(Name))

Scores <- data1 |> select(schoolid,matches("q_\\d+_\\d+"))
Scores <- Scores |> 
  pivot_longer(cols = -schoolid,names_to = "ID",values_to = "Score") |> 
  mutate(Q=str_remove(ID,"_\\d+"),
         ID=str_extract(ID,"\\d+")) |> 
  filter(!is.na(Score)) |>
  pivot_wider(names_from = "Q",values_from = "Score")
#> Warning: Values are not uniquely identified; output will contain list-cols.
#> * Use `values_fn = list` to suppress this warning.
#> * Use `values_fn = length` to identify where the duplicates arise
#> * Use `values_fn = {summary_fun}` to summarise duplicates
Final <- inner_join(Students,Scores,by=c("ID","schoolid"))
Created on 2022-06-16 by the reprex package (v2.0.1)

system · July 7, 2022, 4:52am

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.