Practice in data cleanup

Hello, I am new to data science and want to practice my data cleanup skills in R. I was hoping to get some advise on what would be a way to format the following example data in a way that would be considered tidy. Any thoughts would be greatly appreciated

> dput(quilt_block)
structure(list(Color = c("black printed", "blue plaid", "blue/aqua", 
"coral", "coral/green", "cream", "dark gray", "green", "leopard", 
"light blue", "light gray", "lime green", "maroon", "medium blue", 
"pale pink/yellow", "pink", "pink setting fabric", "pink/white", 
"purple", "red", "rust", "rust/spinning print", "solid black", 
"solid blue", "stripe fabric", "yellow", "brown? Maybe rust? - rust called out in day 11", 
"Gray? Dark or light?", "Gray fabrics? Dark and light? - day 10 specifies light gray floral and gray large print", 
"Aqua? Blue/Aqua?", "coral? Coral green?", "light gray floral? Light gray?", 
"Gray large print? Dark gray?"), Quantity...2 = c(2, NA, 2, NA, 
NA, 9, 4, 3, 4, 8, NA, 4, 4, 5, 2, 4, NA, 2, 4, NA, 4, NA, 8, 
2, NA, 4, 2, 2, 4, 12, 10, 8, 4), Width...3 = c(1.5, NA, 1.5, 
NA, NA, 4.5, 1.5, 1.5, 1.5, 1.5, NA, 1.5, 3, 1.5, 1.5, 1.5, NA, 
1.5, 1.5, NA, 3.5, NA, 1.5, 1.5, NA, 1.5, 1.5, 1.5, 1.5, 2.5, 
3, 2.5, 2.5), Length...4 = c(15, NA, 16, NA, NA, 4.5, 10, 15, 
10, 7, NA, 1.5, 3, 15, 16, 7, NA, 18, 10, NA, 3.5, NA, 3, 15, 
NA, 10, 18, 15, 1.5, 2.5, 3, 2.5, 2.5), `Block number...5` = c(3, 
NA, 5, NA, NA, 5, 2, 4, 2, 4, NA, 5, 9, 4, 5, 4, NA, 3, 2, NA, 
11, NA, 11, 3, NA, 2, 3, 4, 5, 7, 7, 10, 10), `Block name...6` = c("checkerboard blocks", 
NA, "plaid strip block", NA, NA, "plaid strip block", "strip blocks", 
"more checkerboard blocks", "strip blocks", "more checkerboard blocks", 
NA, "plaid strip block", "half square triangle block", "more checkerboard blocks", 
"plaid strip block", "more checkerboard blocks", NA, "checkerboard blocks", 
"strip blocks", NA, "half square triangle cross blocks", NA, 
"half square triangle cross blocks", "checkerboard blocks", NA, 
"strip blocks", "checkerboard blocks", "more checkerboard blocks", 
"plaid strip block", "diagonal half square triangle block", "diagonal half square triangle block", 
"four patch + half square triangle", "four patch + half square triangle"
), Quantity...7 = c(4, NA, NA, NA, NA, 8, 4, 2, NA, NA, NA, 8, 
8, 4, 8, 4, NA, NA, NA, NA, NA, NA, 2, 10, NA, 4, NA, NA, NA, 
NA, 4, 2, 2), Width...8 = c(3.5, NA, NA, NA, NA, 1.5, 3.5, 1.5, 
NA, NA, NA, 4, 2.5, 1.5, 4, 1.5, NA, NA, NA, NA, NA, NA, 1.5, 
3, NA, 1.5, NA, NA, NA, NA, 3.5, 5.5, 5.5), Length...9 = c(3.5, 
NA, NA, NA, NA, 3, 3.5, 16, NA, NA, NA, 4, 2.5, 1.5, 4, 1.5, 
NA, NA, NA, NA, NA, NA, 1.5, 3, NA, 7, NA, NA, NA, NA, 3.5, 5.5, 
5.5), `Block number...10` = c(11, NA, NA, NA, NA, 11, 11, 5, 
NA, NA, NA, 6, 9, 5, 6, 5, NA, NA, NA, NA, NA, NA, 11, 7, NA, 
4, NA, NA, NA, NA, 11, 10, 10), `Block name...11` = c("half square triangle cross blocks", 
NA, NA, NA, NA, "half square triangle cross blocks", "half square triangle cross blocks", 
"plaid strip block", NA, NA, NA, "Half square triangle block", 
"Half square triangle block", "plaid strip block", "Half square triangle block", 
"plaid strip block", NA, NA, NA, NA, NA, NA, "half square triangle cross blocks", 
"diagonal half square triangle block", NA, "more checkerboard blocks", 
NA, NA, NA, NA, "half square triangle cross blocks", "four patch + half square triangle", 
"four patch + half square triangle"), ...12 = c(2, NA, NA, NA, 
NA, 2, NA, 4, NA, NA, NA, 2, 6, 4, NA, 2, NA, NA, NA, NA, NA, 
NA, NA, 2, NA, 2, NA, NA, NA, NA, NA, NA, NA), ...13 = c(2.5, 
NA, NA, NA, NA, 1.5, NA, 3, NA, NA, NA, 2.5, 2.5, 3, NA, 3, NA, 
NA, NA, NA, NA, NA, NA, 2.5, NA, 1.5, NA, NA, NA, NA, NA, NA, 
NA), ...14 = c(4.5, NA, NA, NA, NA, 1.5, NA, 3, NA, NA, NA, 2.5, 
2.5, 3, NA, 3, NA, NA, NA, NA, NA, NA, NA, 2.5, NA, 16, NA, NA, 
NA, NA, NA, NA, NA), ...15 = c(12, NA, NA, NA, NA, 11, NA, 8, 
NA, NA, NA, 12, 10, 8, NA, 8, NA, NA, NA, NA, NA, NA, NA, 10, 
NA, 5, NA, NA, NA, NA, NA, NA, NA), ...16 = c("bordered blowtie blocks", 
NA, NA, NA, NA, "half square triangle cross blocks", NA, "multi half square triangle block", 
NA, NA, NA, "bordered blowtie blocks", "four patch + half square triangle", 
"multi half square triangle block", NA, "multi half square triangle block", 
NA, NA, NA, NA, NA, NA, NA, "four patch + half square triangle", 
NA, "plaid strip block", NA, NA, NA, NA, NA, NA, NA), ...17 = c(2, 
NA, NA, NA, NA, 4, NA, NA, NA, NA, NA, NA, 8, NA, NA, 4, NA, 
NA, NA, NA, NA, NA, NA, 4, NA, 6, NA, NA, NA, NA, NA, NA, NA), 
    ...18 = c(2.5, NA, NA, NA, NA, 2.5, NA, NA, NA, NA, NA, NA, 
    2.5, NA, NA, 3, NA, NA, NA, NA, NA, NA, NA, 2.5, NA, 3, NA, 
    NA, NA, NA, NA, NA, NA), ...19 = c(6.5, NA, NA, NA, NA, 4.5, 
    NA, NA, NA, NA, NA, NA, 2.5, NA, NA, 3, NA, NA, NA, NA, NA, 
    NA, NA, 2.5, NA, 3, NA, NA, NA, NA, NA, NA, NA), ...20 = c(12, 
    NA, NA, NA, NA, 12, NA, NA, NA, NA, NA, NA, 12, NA, NA, 9, 
    NA, NA, NA, NA, NA, NA, NA, 12, NA, 8, NA, NA, NA, NA, NA, 
    NA, NA), ...21 = c("bordered bowtie blocks", NA, NA, NA, 
    NA, "bordered bowtie blocks", NA, NA, NA, NA, NA, NA, "bordered bowtie blocks", 
    NA, NA, "half square triangle block", NA, NA, NA, NA, NA, 
    NA, NA, "bordered bowtie blocks", NA, "multi half square triangle", 
    NA, NA, NA, NA, NA, NA, NA), ...22 = c(NA, NA, NA, NA, NA, 
    4, NA, NA, NA, NA, NA, NA, 4, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, 4, NA, NA, NA, NA, NA, NA, NA, NA, NA), ...23 = c(NA, 
    NA, NA, NA, NA, 2.5, NA, NA, NA, NA, NA, NA, 1.5, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, 2.5, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), ...24 = c(NA, NA, NA, NA, NA, 6.5, NA, NA, 
    NA, NA, NA, NA, 1.5, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, 2.5, NA, NA, NA, NA, NA, NA, NA, NA, NA), ...25 = c(NA, 
    NA, NA, NA, NA, 12, NA, NA, NA, NA, NA, NA, 12, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, 12, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), ...26 = c(NA, NA, NA, NA, NA, "bordered bowtie blocks", 
    NA, NA, NA, NA, NA, NA, "bordered bowtie blocks", NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, "bordered bowtie blocks", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), ...27 = c(NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), ...28 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1.5, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA), ...29 = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, 1.5, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    ...30 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 12, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), ...31 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, "bordered bowtie blocks", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-33L))

Hi, Maybe start with the janitor package.

cran.r-project.org/web/packages/janitor/vignettes/janitor.html

Otherwise, you would want to better understand the dataset. There is a colour column, with quantity, width, length, block number and block name. The last few repeat, but the colour doesn't. How do they relate?

So the first column is a list of colored fabrics. The other column names repeat because the same color is used in different blocks and needs to be cut to different dimensions. Would it be beneficial to make this a long table with multiple rows of the same color, but then use the block number/name columns to differentiate?

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.