Hi. I am having a data of students and a few parameters. I am changing the arrangement of data. But I am getting few error messages and output is shown as list-cols.
I want to remove the duplicates and the response must also be proper (it must be either 0 or 1).
How can I overcome this?
library(tidyverse)
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
data1<-tibble::tribble(
~enumerator, ~en_name, ~clusterid, ~schoolid, ~teacherid, ~cal_name1, ~cal_name2, ~cal_name3, ~cal_name4, ~cal_name5, ~cal_name6, ~q_1, ~q_1_1, ~q_1_2, ~q_1_3, ~q_1_4, ~q_1_5, ~q_1_6, ~q_1_7, ~q_1_8, ~q_1_9, ~q_1_10, ~q_1_11, ~q_2, ~q_2_1, ~q_2_2, ~q_2_3, ~q_2_4, ~q_2_5, ~q_2_6, ~q_2_7, ~q_2_8, ~q_2_9, ~q_2_10, ~q_2_11, ~q_3, ~q_3_1, ~q_3_2, ~q_3_3, ~q_3_4, ~q_3_5, ~q_3_6, ~q_3_7, ~q_3_8, ~q_3_9, ~q_3_10, ~q_3_11, ~q_4, ~q_4_1, ~q_4_2, ~q_4_3, ~q_4_4, ~q_4_5, ~q_4_6, ~q_4_7, ~q_4_8, ~q_4_9, ~q_4_10, ~q_4_11, ~q_5, ~q_5_1, ~q_5_2, ~q_5_3, ~q_5_4, ~q_5_5, ~q_5_6, ~q_5_7, ~q_5_8, ~q_5_9, ~q_5_10, ~q_5_11, ~q_6, ~q_6_1, ~q_6_2, ~q_6_3, ~q_6_4, ~q_6_5, ~q_6_6, ~q_6_7, ~q_6_8, ~q_6_9, ~q_6_10, ~q_6_11,
"Ben001", "Gangavva B Hiremath", "Katnur", "GHPS Giriyal", "J B Savadatti", "Pallavi.R.Ingale", "Sachin.B.Jadhav", "Shravani Muraganavar", "Pavitra Salgar", "Sannidhi Haranashikari", NA, "1", 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, "4", 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, "5 8", 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, "6", 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
"Ben001", "Gangavva B Hiremath", "Katnur", "GHPS Giriyal", "Sumanagala G Kore", "Pallavi.R.Ingale", "Sachin.B.Jadhav", "Shravani Muraganavar", "Pavitra Salgar", "Sannidhi Haranashikari", NA, "4 8", 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, "7 10", 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, "3 7 9", 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, "6", 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, "4 8", 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
"Ben002", "Hemalata B. Bhajantri", "Byahatti", "DPEP Kusugal", "Manjula A", "Anand Byahatti", "Apsana. S Hubballi", "Fathima Begum.R.Kawalikai", NA, NA, NA, "1 2 10", 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, "2 6 9", 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, "4 5 7 9", 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, "1 5 9", 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, "4 6 8 9 10", 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
"Ben002", "Hemalata B. Bhajantri", "Byahatti", "DPEP Kusugal", "Manjula A", "Anand Byahatti", "Apsana. S Hubballi", "Fathima Begum.R.Kawalikai", NA, NA, NA, "8 9 10 11", 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, "3 4 5 9", 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, "3 5 9 11", 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, "4 7 9 10", 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, "3 4 7", 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
)
Students <- data1 |> select(schoolid, starts_with("cal_name"))
Students <- Students |>
pivot_longer(cols = -schoolid,names_to = "ID",values_to = "Name") |>
mutate(ID=str_remove(ID,"cal_name")) |>
filter(!is.na(Name))
Scores <- data1 |> select(schoolid,matches("q_\\d+_\\d+"))
Scores <- Scores |>
pivot_longer(cols = -schoolid,names_to = "ID",values_to = "Score") |>
mutate(Q=str_remove(ID,"_\\d+"),
ID=str_extract(ID,"\\d+")) |>
filter(!is.na(Score)) |>
pivot_wider(names_from = "Q",values_from = "Score")
#> Warning: Values are not uniquely identified; output will contain list-cols.
#> * Use `values_fn = list` to suppress this warning.
#> * Use `values_fn = length` to identify where the duplicates arise
#> * Use `values_fn = {summary_fun}` to summarise duplicates
Final <- inner_join(Students,Scores,by=c("ID","schoolid"))
Created on 2022-06-16 by the reprex package (v2.0.1)