Tidyverse function problem

Hey,

For my studies I had to reshape some datas. I work on the data of Democracy - Our World in Data in the category "polity". It gives us a .cvs in which we had one ligne for each year for each country

For example :
|1|Afghanistan|AFG|1800|-6|
|2|Afghanistan|AFG|1801|-6|
|3|Afghanistan|AFG|1802|-6|

It had been 3 hours that I tried to obtain something of this shape with the function pivot_wider :
................... 1800. 1801. 1802.
Afghanistan -6 -6 -6

I didn't find how to get this result.

Could you help me please ?

Thnaks !

Here is one solution.

DF <- data.frame(Country = c("Afghanistan","Afghanistan","Afghanistan",
                             "Bolivia","Bolivia","Bolivia"),
                 Abrev = c("AFG","AFG","AFG","BOL","BOL","BOL"),
                 Year = c(1801,1802,1803,1801,1802,1803),
                 Value = c(-6,-6,-6,1,2,3))
DF
#>       Country Abrev Year Value
#> 1 Afghanistan   AFG 1801    -6
#> 2 Afghanistan   AFG 1802    -6
#> 3 Afghanistan   AFG 1803    -6
#> 4     Bolivia   BOL 1801     1
#> 5     Bolivia   BOL 1802     2
#> 6     Bolivia   BOL 1803     3
library(tidyr)
DFwide = DF |> pivot_wider(names_from = "Year", values_from = "Value")
DFwide
#> # A tibble: 2 × 5
#>   Country     Abrev `1801` `1802` `1803`
#>   <chr>       <chr>  <dbl>  <dbl>  <dbl>
#> 1 Afghanistan AFG       -6     -6     -6
#> 2 Bolivia     BOL        1      2      3

Created on 2023-09-26 with reprex v2.0.2

I find something with the command

outcome <- pivot_wider(finaldata, names_from = Year, values_from = democracy_polity)

but when I export the file, I lose some data. I don't have anything front 2013 to 2020 as you can see on the picture attached.

How can I fix that ?

thanks in advance,

I downloaded the data I believe you are using. After pivoting, I found that there is a column for every year but the order is strange.

library(tidyr)
DF <- read.csv("~/R/Play/democracy-index-polity.csv")
DFwide = DF |> pivot_wider(names_from = "Year", values_from = "democracy_polity")
ncol(DFwide)  #How many columns
#> [1] 247
DFwide[1:3, 238:247]  #show last 10 columns
#> # A tibble: 3 × 10
#>   `1792` `1793` `1794` `1795` `1796` `1797` `1798` `1799` `2019` `2020`
#>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#> 1     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 2     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> 3     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
colnames(DFwide) #Show all column names
#>   [1] "Entity" "Code"   "1800"   "1801"   "1802"   "1803"   "1804"   "1805"  
#>   [9] "1806"   "1807"   "1808"   "1809"   "1810"   "1811"   "1812"   "1813"  
#>  [17] "1814"   "1815"   "1816"   "1817"   "1818"   "1819"   "1820"   "1821"  
#>  [25] "1822"   "1823"   "1824"   "1825"   "1826"   "1827"   "1828"   "1829"  
#>  [33] "1830"   "1831"   "1832"   "1833"   "1834"   "1835"   "1836"   "1837"  
#>  [41] "1838"   "1839"   "1840"   "1841"   "1842"   "1843"   "1844"   "1845"  
#>  [49] "1846"   "1847"   "1848"   "1849"   "1850"   "1851"   "1852"   "1853"  
#>  [57] "1854"   "1855"   "1856"   "1857"   "1858"   "1859"   "1860"   "1861"  
#>  [65] "1862"   "1863"   "1864"   "1865"   "1866"   "1867"   "1868"   "1869"  
#>  [73] "1870"   "1871"   "1872"   "1873"   "1874"   "1875"   "1876"   "1877"  
#>  [81] "1878"   "1879"   "1880"   "1881"   "1882"   "1883"   "1884"   "1885"  
#>  [89] "1886"   "1887"   "1888"   "1889"   "1890"   "1891"   "1892"   "1893"  
#>  [97] "1894"   "1895"   "1896"   "1897"   "1898"   "1899"   "1900"   "1901"  
#> [105] "1902"   "1903"   "1904"   "1905"   "1906"   "1907"   "1908"   "1909"  
#> [113] "1910"   "1911"   "1912"   "1913"   "1914"   "1915"   "1916"   "1917"  
#> [121] "1918"   "1919"   "1920"   "1921"   "1922"   "1923"   "1924"   "1925"  
#> [129] "1926"   "1927"   "1928"   "1929"   "1930"   "1931"   "1932"   "1933"  
#> [137] "1934"   "1935"   "1936"   "1937"   "1938"   "1939"   "1940"   "1941"  
#> [145] "1942"   "1943"   "1944"   "1945"   "1946"   "1947"   "1948"   "1949"  
#> [153] "1950"   "1951"   "1952"   "1953"   "1954"   "1955"   "1956"   "1957"  
#> [161] "1958"   "1959"   "1960"   "1961"   "1962"   "1963"   "1964"   "1965"  
#> [169] "1966"   "1967"   "1968"   "1969"   "1970"   "1971"   "1972"   "1973"  
#> [177] "1974"   "1975"   "1976"   "1977"   "1978"   "1989"   "1990"   "1991"  
#> [185] "1992"   "1993"   "1994"   "1995"   "1996"   "1997"   "1998"   "1999"  
#> [193] "2000"   "2014"   "2015"   "2016"   "2017"   "2018"   "1979"   "1980"  
#> [201] "1981"   "1982"   "1983"   "1984"   "1985"   "1986"   "1987"   "1988"  
#> [209] "2001"   "2002"   "2003"   "2004"   "2005"   "2006"   "2007"   "2008"  
#> [217] "2009"   "2010"   "2011"   "2012"   "2013"   "1776"   "1777"   "1778"  
#> [225] "1779"   "1780"   "1781"   "1782"   "1783"   "1784"   "1785"   "1786"  
#> [233] "1787"   "1788"   "1789"   "1790"   "1791"   "1792"   "1793"   "1794"  
#> [241] "1795"   "1796"   "1797"   "1798"   "1799"   "2019"   "2020"
sort(colnames(DFwide)) #sort the column names
#>   [1] "1776"   "1777"   "1778"   "1779"   "1780"   "1781"   "1782"   "1783"  
#>   [9] "1784"   "1785"   "1786"   "1787"   "1788"   "1789"   "1790"   "1791"  
#>  [17] "1792"   "1793"   "1794"   "1795"   "1796"   "1797"   "1798"   "1799"  
#>  [25] "1800"   "1801"   "1802"   "1803"   "1804"   "1805"   "1806"   "1807"  
#>  [33] "1808"   "1809"   "1810"   "1811"   "1812"   "1813"   "1814"   "1815"  
#>  [41] "1816"   "1817"   "1818"   "1819"   "1820"   "1821"   "1822"   "1823"  
#>  [49] "1824"   "1825"   "1826"   "1827"   "1828"   "1829"   "1830"   "1831"  
#>  [57] "1832"   "1833"   "1834"   "1835"   "1836"   "1837"   "1838"   "1839"  
#>  [65] "1840"   "1841"   "1842"   "1843"   "1844"   "1845"   "1846"   "1847"  
#>  [73] "1848"   "1849"   "1850"   "1851"   "1852"   "1853"   "1854"   "1855"  
#>  [81] "1856"   "1857"   "1858"   "1859"   "1860"   "1861"   "1862"   "1863"  
#>  [89] "1864"   "1865"   "1866"   "1867"   "1868"   "1869"   "1870"   "1871"  
#>  [97] "1872"   "1873"   "1874"   "1875"   "1876"   "1877"   "1878"   "1879"  
#> [105] "1880"   "1881"   "1882"   "1883"   "1884"   "1885"   "1886"   "1887"  
#> [113] "1888"   "1889"   "1890"   "1891"   "1892"   "1893"   "1894"   "1895"  
#> [121] "1896"   "1897"   "1898"   "1899"   "1900"   "1901"   "1902"   "1903"  
#> [129] "1904"   "1905"   "1906"   "1907"   "1908"   "1909"   "1910"   "1911"  
#> [137] "1912"   "1913"   "1914"   "1915"   "1916"   "1917"   "1918"   "1919"  
#> [145] "1920"   "1921"   "1922"   "1923"   "1924"   "1925"   "1926"   "1927"  
#> [153] "1928"   "1929"   "1930"   "1931"   "1932"   "1933"   "1934"   "1935"  
#> [161] "1936"   "1937"   "1938"   "1939"   "1940"   "1941"   "1942"   "1943"  
#> [169] "1944"   "1945"   "1946"   "1947"   "1948"   "1949"   "1950"   "1951"  
#> [177] "1952"   "1953"   "1954"   "1955"   "1956"   "1957"   "1958"   "1959"  
#> [185] "1960"   "1961"   "1962"   "1963"   "1964"   "1965"   "1966"   "1967"  
#> [193] "1968"   "1969"   "1970"   "1971"   "1972"   "1973"   "1974"   "1975"  
#> [201] "1976"   "1977"   "1978"   "1979"   "1980"   "1981"   "1982"   "1983"  
#> [209] "1984"   "1985"   "1986"   "1987"   "1988"   "1989"   "1990"   "1991"  
#> [217] "1992"   "1993"   "1994"   "1995"   "1996"   "1997"   "1998"   "1999"  
#> [225] "2000"   "2001"   "2002"   "2003"   "2004"   "2005"   "2006"   "2007"  
#> [233] "2008"   "2009"   "2010"   "2011"   "2012"   "2013"   "2014"   "2015"  
#> [241] "2016"   "2017"   "2018"   "2019"   "2020"   "Code"   "Entity"

Created on 2023-09-26 with reprex v2.0.2
Try using the colnames() function on your data set to check if you are seeing the same thing.

On my computer, I have only 223 columns : it haven't any column between 2012 and 2020. And all that data are NA

> library(tidyr)
> DF <- democracy.index.polity
> DFwide = DF |> pivot_wider(names_from = "Year", values_from = "democracy_polity")
> View(DFwide)
> View(DFwide)
> colnames(DFwide)
  [1] "Entity" "Code"   "1800"   "1801"   "1802"   "1803"   "1804"   "1805"   "1806"   "1807"   "1808"   "1809"   "1810"   "1811"  
 [15] "1812"   "1813"   "1814"   "1815"   "1816"   "1817"   "1818"   "1819"   "1820"   "1821"   "1822"   "1823"   "1824"   "1825"  
 [29] "1826"   "1827"   "1828"   "1829"   "1830"   "1831"   "1832"   "1833"   "1834"   "1835"   "1836"   "1837"   "1838"   "1839"  
 [43] "1840"   "1841"   "1842"   "1843"   "1844"   "1845"   "1846"   "1847"   "1848"   "1849"   "1850"   "1851"   "1852"   "1853"  
 [57] "1854"   "1855"   "1856"   "1857"   "1858"   "1859"   "1860"   "1861"   "1862"   "1863"   "1864"   "1865"   "1866"   "1867"  
 [71] "1868"   "1869"   "1870"   "1871"   "1872"   "1873"   "1874"   "1875"   "1876"   "1877"   "1878"   "1879"   "1880"   "1881"  
 [85] "1882"   "1883"   "1884"   "1885"   "1886"   "1887"   "1888"   "1889"   "1890"   "1891"   "1892"   "1893"   "1894"   "1895"  
 [99] "1896"   "1897"   "1898"   "1899"   "1900"   "1901"   "1902"   "1903"   "1904"   "1905"   "1906"   "1907"   "1908"   "1909"  
[113] "1910"   "1911"   "1912"   "1913"   "1914"   "1915"   "1916"   "1917"   "1918"   "1919"   "1920"   "1921"   "1922"   "1923"  
[127] "1924"   "1925"   "1926"   "1927"   "1928"   "1929"   "1930"   "1931"   "1932"   "1933"   "1934"   "1935"   "1936"   "1937"  
[141] "1938"   "1939"   "1940"   "1941"   "1942"   "1943"   "1944"   "1945"   "1946"   "1947"   "1948"   "1949"   "1950"   "1951"  
[155] "1952"   "1953"   "1954"   "1955"   "1956"   "1957"   "1958"   "1959"   "1960"   "1961"   "1962"   "1963"   "1964"   "1965"  
[169] "1966"   "1967"   "1968"   "1969"   "1970"   "1971"   "1972"   "1973"   "1974"   "1975"   "1976"   "1977"   "1978"   "1989"  
[183] "1990"   "1991"   "1992"   "1993"   "1994"   "1995"   "1996"   "1997"   "1998"   "1999"   "2000"   "2014"   "2015"   "2016"  
[197] "2017"   "2018"   "1979"   "1980"   "1981"   "1982"   "1983"   "1984"   "1985"   "1986"   "1987"   "1988"   "2001"   "2002"  
[211] "2003"   "2004"   "2005"   "2006"   "2007"   "2008"   "2009"   "2010"   "2011"   "2012"   "2013"   "2019"   "2020"  

and I have also that :

> DFwide[1:3, 238:247] 
Error in `DFwide[1:3, 238:247]`:
! Can't subset columns past the end.
ℹ Locations 238, 239, 240, …, 246, and 247 don't exist.
ℹ There are only 223 columns.
Run `rlang::last_trace()` to see where the error occurred.

I went to the web page you linked in your first post, set DATASET to Polity, clicked Download and chose Full Data (CSV). I got a file with 18768 rows of data. Is that what you start with? What data processing do you do after that? Please show all the code from loading in the data set to colnames(DFwide).

We have the same data pack. I try to start from the beginning. You fill find below the process.

After downloading the data pack, I load it on R

I load the library with

> library(tidyverse)

after that, I used this command to reshape data

outcome <- pivot_wider(`democracy.index.polity.(2)`, names_from = Year, values_from = democracy_polity)

then, I used

View(outcome)

Finally, I get this

> colnames(outcome)
  [1] "Entity" "Code"   "1800"   "1801"   "1802"   "1803"   "1804"   "1805"   "1806"   "1807"   "1808"   "1809"   "1810"   "1811"  
 [15] "1812"   "1813"   "1814"   "1815"   "1816"   "1817"   "1818"   "1819"   "1820"   "1821"   "1822"   "1823"   "1824"   "1825"  
 [29] "1826"   "1827"   "1828"   "1829"   "1830"   "1831"   "1832"   "1833"   "1834"   "1835"   "1836"   "1837"   "1838"   "1839"  
 [43] "1840"   "1841"   "1842"   "1843"   "1844"   "1845"   "1846"   "1847"   "1848"   "1849"   "1850"   "1851"   "1852"   "1853"  
 [57] "1854"   "1855"   "1856"   "1857"   "1858"   "1859"   "1860"   "1861"   "1862"   "1863"   "1864"   "1865"   "1866"   "1867"  
 [71] "1868"   "1869"   "1870"   "1871"   "1872"   "1873"   "1874"   "1875"   "1876"   "1877"   "1878"   "1879"   "1880"   "1881"  
 [85] "1882"   "1883"   "1884"   "1885"   "1886"   "1887"   "1888"   "1889"   "1890"   "1891"   "1892"   "1893"   "1894"   "1895"  
 [99] "1896"   "1897"   "1898"   "1899"   "1900"   "1901"   "1902"   "1903"   "1904"   "1905"   "1906"   "1907"   "1908"   "1909"  
[113] "1910"   "1911"   "1912"   "1913"   "1914"   "1915"   "1916"   "1917"   "1918"   "1919"   "1920"   "1921"   "1922"   "1923"  
[127] "1924"   "1925"   "1926"   "1927"   "1928"   "1929"   "1930"   "1931"   "1932"   "1933"   "1934"   "1935"   "1936"   "1937"  
[141] "1938"   "1939"   "1940"   "1941"   "1942"   "1943"   "1944"   "1945"   "1946"   "1947"   "1948"   "1949"   "1950"   "1951"  
[155] "1952"   "1953"   "1954"   "1955"   "1956"   "1957"   "1958"   "1959"   "1960"   "1961"   "1962"   "1963"   "1964"   "1965"  
[169] "1966"   "1967"   "1968"   "1969"   "1970"   "1971"   "1972"   "1973"   "1974"   "1975"   "1976"   "1977"   "1978"   "1989"  
[183] "1990"   "1991"   "1992"   "1993"   "1994"   "1995"   "1996"   "1997"   "1998"   "1999"   "2000"   "2014"   "2015"   "2016"  
[197] "2017"   "2018"   "1979"   "1980"   "1981"   "1982"   "1983"   "1984"   "1985"   "1986"   "1987"   "1988"   "2001"   "2002"  
[211] "2003"   "2004"   "2005"   "2006"   "2007"   "2008"   "2009"   "2010"   "2011"   "2012"   "2013"   "1776"   "1777"   "1778"  
[225] "1779"   "1780"   "1781"   "1782"   "1783"   "1784"   "1785"   "1786"   "1787"   "1788"   "1789"   "1790"   "1791"   "1792"  
[239] "1793"   "1794"   "1795"   "1796"   "1797"   "1798"   "1799"   "2019"   "2020"  

It seems weird... I don't understand why I have

"2013"   "1776"   "1777"   "1778"  
[225] "1779"   "1780"   "1781"   "1782"   "1783"   "1784"   "1785"   "1786"   "1787"   "1788"   "1789"   "1790"   "1791"   "1792"  
[239] "1793"   "1794"   "1795"   "1796"   "1797"   "1798"   "1799"   "2019"   "2020"

Thanks a lot for your help

You can get the years in order in the wide data frame if you sort the long data frame by year first.

library(tidyr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
DF <- read.csv("~/R/Play/democracy-index-polity.csv")
DF <- DF |> arrange(Year)
DFwide = DF |> pivot_wider(names_from = "Year", values_from = "democracy_polity")
ncol(DFwide)  #How many columns
#> [1] 247
DFwide[1:3, 238:247]  #show last 10 columns
#> # A tibble: 3 × 10
#>   `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018` `2019` `2020`
#>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#> 1  10     10     10     10     10      8      8      8         7      5
#> 2  NA     NA     NA     -1     -1     -1     -1     -1        NA     NA
#> 3   2.20   2.02   2.31   2.48   2.60   2.52   2.69   2.56     NA     NA
colnames(DFwide) #Show all column names
#>   [1] "Entity" "Code"   "1776"   "1777"   "1778"   "1779"   "1780"   "1781"  
#>   [9] "1782"   "1783"   "1784"   "1785"   "1786"   "1787"   "1788"   "1789"  
#>  [17] "1790"   "1791"   "1792"   "1793"   "1794"   "1795"   "1796"   "1797"  
#>  [25] "1798"   "1799"   "1800"   "1801"   "1802"   "1803"   "1804"   "1805"  
#>  [33] "1806"   "1807"   "1808"   "1809"   "1810"   "1811"   "1812"   "1813"  
#>  [41] "1814"   "1815"   "1816"   "1817"   "1818"   "1819"   "1820"   "1821"  
#>  [49] "1822"   "1823"   "1824"   "1825"   "1826"   "1827"   "1828"   "1829"  
#>  [57] "1830"   "1831"   "1832"   "1833"   "1834"   "1835"   "1836"   "1837"  
#>  [65] "1838"   "1839"   "1840"   "1841"   "1842"   "1843"   "1844"   "1845"  
#>  [73] "1846"   "1847"   "1848"   "1849"   "1850"   "1851"   "1852"   "1853"  
#>  [81] "1854"   "1855"   "1856"   "1857"   "1858"   "1859"   "1860"   "1861"  
#>  [89] "1862"   "1863"   "1864"   "1865"   "1866"   "1867"   "1868"   "1869"  
#>  [97] "1870"   "1871"   "1872"   "1873"   "1874"   "1875"   "1876"   "1877"  
#> [105] "1878"   "1879"   "1880"   "1881"   "1882"   "1883"   "1884"   "1885"  
#> [113] "1886"   "1887"   "1888"   "1889"   "1890"   "1891"   "1892"   "1893"  
#> [121] "1894"   "1895"   "1896"   "1897"   "1898"   "1899"   "1900"   "1901"  
#> [129] "1902"   "1903"   "1904"   "1905"   "1906"   "1907"   "1908"   "1909"  
#> [137] "1910"   "1911"   "1912"   "1913"   "1914"   "1915"   "1916"   "1917"  
#> [145] "1918"   "1919"   "1920"   "1921"   "1922"   "1923"   "1924"   "1925"  
#> [153] "1926"   "1927"   "1928"   "1929"   "1930"   "1931"   "1932"   "1933"  
#> [161] "1934"   "1935"   "1936"   "1937"   "1938"   "1939"   "1940"   "1941"  
#> [169] "1942"   "1943"   "1944"   "1945"   "1946"   "1947"   "1948"   "1949"  
#> [177] "1950"   "1951"   "1952"   "1953"   "1954"   "1955"   "1956"   "1957"  
#> [185] "1958"   "1959"   "1960"   "1961"   "1962"   "1963"   "1964"   "1965"  
#> [193] "1966"   "1967"   "1968"   "1969"   "1970"   "1971"   "1972"   "1973"  
#> [201] "1974"   "1975"   "1976"   "1977"   "1978"   "1979"   "1980"   "1981"  
#> [209] "1982"   "1983"   "1984"   "1985"   "1986"   "1987"   "1988"   "1989"  
#> [217] "1990"   "1991"   "1992"   "1993"   "1994"   "1995"   "1996"   "1997"  
#> [225] "1998"   "1999"   "2000"   "2001"   "2002"   "2003"   "2004"   "2005"  
#> [233] "2006"   "2007"   "2008"   "2009"   "2010"   "2011"   "2012"   "2013"  
#> [241] "2014"   "2015"   "2016"   "2017"   "2018"   "2019"   "2020"

Created on 2023-09-26 with reprex v2.0.2

I works thank you !

But I still haven't data for 2019 and 2020 as below

The original data set, named DF in my code, only has one country with data in 2019 and 2020, as you see in your result.

DF |> filter(Year == 2020)
         Entity Code Year democracy_polity
1 United States  USA 2020                5

DF |> filter(Year == 2019)
         Entity Code Year democracy_polity
1 United States  USA 2019                7

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.