Unequal bar color intensity with different bar widths

As the bar width is increased for this plot of number of nests over time, more bars appear or overlap. Not sure what is going on here. I need some elucidation. Could this have been done with geom_col? I tried but could not get it to work.

Thanks,
Jeff

library(tidyverse)
library("gridExtra")
#> 
#> Attaching package: 'gridExtra'
#> The following object is masked from 'package:dplyr':
#> 
#>     combine
library("reprex")

turtle_activity_gtm_nest_date <-  read_csv("https://www.dropbox.com/s/nkqtfvtug46r4w6/turtle_activity_report_nest_date.csv?dl=1")
#> Rows: 765 Columns: 6
#> โ”€โ”€ Column specification โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
#> Delimiter: ","
#> chr (4): activity, ref_no, activity_date, species
#> dbl (2): latitude, longitude
#> 
#> โ„น Use `spec()` to retrieve the full column specification for this data.
#> โ„น Specify the column types or set `show_col_types = FALSE` to quiet this message.

#view(turtle_activity_gtm_nest_date)
turtle_activity_gtm_nest_date$activity_nest_date <- 
  as.Date(turtle_activity_gtm_nest_date$activity_date,format="%m/%d/%Y")

#Ignore first nest with odd-ball date for the moment. 

#view(turtle_activity_gtm_nest_date)

turtle_activity_gtm_only_nest_date <- turtle_activity_gtm_nest_date |> filter(activity=="N")

plot1 <- 
ggplot() + 
  geom_bar(data = turtle_activity_gtm_only_nest_date, 
           aes(x = activity_nest_date, fill = species),
           width = 0.1,
    position = position_dodge(width = 0.9)) +
  scale_x_date(date_breaks = "8 day", date_labels = "%b/%d") +
  scale_y_continuous(breaks = 1:12) +
  labs(x="",y="Number of Nests", caption="bar width = 0.1") +
  theme(axis.text.y = element_text(face = "bold",  
                             size = 10, angle = 0),
#        legend.position=c(2022-07-22, 8),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(size = 15))
  
plot2 <- 
  ggplot() + 
  geom_bar(data = turtle_activity_gtm_only_nest_date, 
           aes(x = activity_nest_date, fill = species),
           width = 0.2,
           position = position_dodge(width = 0.9)) +
  scale_x_date(date_breaks = "10 day", date_labels = "%b/%d") +
  scale_y_continuous(breaks = 1:12) +
  labs(x="",y="Number of Nests", caption="bar width = 0.2") +
  theme(axis.text.y = element_text(face = "bold",  
                                   size = 8, angle = 0),
        #        legend.position=c(2022-07-22, 8),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(size = 15))
        
plot3 <- 
  ggplot() + 
  geom_bar(data = turtle_activity_gtm_only_nest_date, 
           aes(x = activity_nest_date, fill = species),
           width = 0.3,
           position = position_dodge(width = 0.9)) +
  scale_x_date(date_breaks = "10 day", date_labels = "%b/%d") +
  scale_y_continuous(breaks = 1:12) +
  labs(x="",y="Number of Nests", caption="bar width = 0.3") +
  theme(axis.text.y = element_text(face = "bold",  
                                   size = 8, angle = 0),
        #        legend.position=c(2022-07-22, 8),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(size = 15))
  
plot1
#> Warning: Removed 1 rows containing non-finite values (`stat_count()`).

plot2
#> Warning: Removed 1 rows containing non-finite values (`stat_count()`).

plot3
#> Warning: Removed 1 rows containing non-finite values (`stat_count()`).

Created on 2023-01-05 with reprex v2.0.2

Hello @JeffF ,

Some bars look darker than other as there is an overlap of bar, one on top of another. This can be addressed by using the padding argument in the position_dodge2() function. Here is an example below, by modifying the code you have provided:

>   ggplot() + 
+   geom_bar(data = turtle_activity_gtm_only_nest_date|>
+              mutate(
+                activity_date = lubridate::mdy(activity_date)
+              ), 
+            aes(x = activity_date, fill = species),
+            #width = 0.1,
+            position = position_dodge2(padding = 0.4)) +
+   scale_x_date(date_breaks = "8 day", 
+                date_labels = "%b/%d",
+                date_minor_breaks = "1 day") +
+   scale_y_continuous(breaks = 1:12) +
+   labs(x="",y="Number of Nests", caption="bar width = 0.1") +
+   theme(axis.text.y = element_text(face = "bold",  
+                                    size = 10, angle = 0),
+         #        legend.position=c(2022-07-22, 8),
+         axis.title = element_text(size = 12, face = "bold"),
+         plot.caption = element_text(size = 15))
Warning messages:
1: Problem while computing `activity_date = lubridate::mdy(activity_date)`.
โ„น  1 failed to parse. 
2: Removed 1 rows containing non-finite values (`stat_count()`). 
> 

Here, instead of overlapping the situation is handled by "Padding between elements at the same position. Elements are shrunk by this proportion to allow space between them. Defaults to 0.1."

Same can be done with geom_col as well, but that would required grouped summaries before plotting.

I would also try line charts or freqpoly

> turtle_activity_gtm_only_nest_date|>
+   mutate(
+     activity_date = lubridate::mdy(activity_date)
+   )|>
+   group_by(activity_date,species)|>
+   summarise(
+     number_nests = n()
+   )|>
+   ggplot(aes(activity_date,number_nests))+
+   geom_line(aes(colour = species))+
+   geom_point(aes(colour = species))+
+   scale_x_date(date_breaks = "8 day", date_labels = "%b/%d") +
+   scale_y_continuous(breaks = 1:12)
`summarise()` has grouped output by 'activity_date'. You can override using the `.groups` argument.
Warning messages:
1: Problem while computing `activity_date = lubridate::mdy(activity_date)`.
โ„น  1 failed to parse. 
2: Removed 1 row containing missing values (`geom_line()`). 
3: Removed 1 rows containing missing values (`geom_point()`). 
> 

hope this helps

1 Like

Now I get it. The days with only one species the bars filled up the available space whereas if there is more than one species on a day the bars become narrower. Given your suggestions I looked up more on dodge2. I found that if I used position = position_dodge2(preserve = "single") all the bars are the same width no matter the number of species on a day. Iโ€™ve put in my modified code below.

I like bars instead of lines.

I do have a follow up. Why lubridate::mdy(activity_date) ? I am still figuring out how to handle dates.

Thanks,
Jeff

library(tidyverse)
library("reprex")

turtle_activity_gtm_nest_date <-  read_csv("https://www.dropbox.com/s/nkqtfvtug46r4w6/turtle_activity_report_nest_date.csv?dl=1")
#> Rows: 765 Columns: 6
#> โ”€โ”€ Column specification โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
#> Delimiter: ","
#> chr (4): activity, ref_no, activity_date, species
#> dbl (2): latitude, longitude
#> 
#> โ„น Use `spec()` to retrieve the full column specification for this data.
#> โ„น Specify the column types or set `show_col_types = FALSE` to quiet this message.

#view(turtle_activity_gtm_nest_date)
turtle_activity_gtm_nest_date$activity_nest_date <- 
  as.Date(turtle_activity_gtm_nest_date$activity_date,format="%m/%d/%Y")

#Ignore first nest with odd-ball date for the moment and remove NA. 

turtle_activity_gtm_nest_date <- drop_na(turtle_activity_gtm_nest_date)

#view(turtle_activity_gtm_nest_date)

turtle_activity_gtm_only_nest_date <- turtle_activity_gtm_nest_date |> filter(activity=="N")


ggplot() + 
  geom_bar(data = turtle_activity_gtm_only_nest_date|>
             mutate(
               activity_date = lubridate::mdy(activity_date)
             ), 
           aes(x = activity_date, fill = species),
           #width = 0.1,
           position = position_dodge2(preserve = "single")) +
  scale_x_date(date_breaks = "8 day", 
                date_labels = "%b/%d",
                date_minor_breaks = "1 day") +
  scale_y_continuous(breaks = 1:12) +
  labs(x="",y="Number of Nests", caption='position = position_dodge2(preserve = "single")') +
  theme(axis.text.y = element_text(face = "bold",  
                                   size = 10, angle = 0),
        #        legend.position=c(2022-07-22, 8),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(size = 15))

Created on 2023-01-06 with reprex v2.0.2

This is precisely what is happening.

I used {lubridate} because the activity_date column was character type when i read the csv, and I wanted to convert it into date format. So I used the mdy function to change it to month/day/year format. Lubridate is excellent package to handle date time objects. Here is the link to package vignette : https://lubridate.tidyverse.org/

Regards,
Ayush

Funny, when I noticed this response, I was in the midst of going through 19 Dates and times . I had figured I needed to dig into date handling after my struggles. I am now starting to get it.

Thanks,
Jeff

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.