Pie chart pie slices not matching legend percentages

library(ggplot2)
library(gridExtra)
library(officer)

Read in data from CSV file

data1 <- read.csv("data11.csv")

Create a new Word document

doc <- read_docx()

Get unique primary breeds

data2 <- unique(data1$Primary.Breed)

Define color palette for the pie chart

colors <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999")

Define vector of dates

dates <- as.Date(c("01/01/2010", "01/01/2011", "01/01/2012", "01/01/2013", "01/01/2014", "01/01/2015", "01/01/2016", "01/01/2017", "01/01/2018", "01/01/2019", "01/01/2020", "01/01/2021", "01/01/2022", "01/01/2023"), format = "%m/%d/%Y")

Define age categories and corresponding labels

age_categories <- c("Less than three Months", "Between 3 months and 6 months", "Between 6 months and 12 months", "Greater than 12 months")
AgeCategorySex <- c("Less than three Months M", "Less than three Months F", "Between 3 months and 6 months M", "Between 3 months and 6 months F", "Between 6 months and 12 months M", "Between 6 months and 12 months F", "Greater than 12 months M", "Greater than 12 months F")

Convert DateOfBirth column to Date format

data1$DateOfBirth <- as.Date(data1$DateOfBirth, format = "%m/%d/%Y")

Loop through each breed

for (breed in data2) {
breed_data <- subset(data1, Primary.Breed == breed)

# Initialize vector to store counts for each age category sex
age_counts <- rep(0, length(AgeCategorySex))

# Add AgeCategory column and initialize it
breed_data$AgeCategory <- NA

# Calculate difference in days between each birth date and each date in the vector
for (i in 1:length(dates)) {
  date2 <- dates[i]
  
  # Reset age counts for each date
  age_counts <- rep(0, length(AgeCategorySex))
  
  # Calculate age category for each record
  for (j in 1:nrow(breed_data)) {
    diff <- as.numeric(difftime(date2, breed_data$DateOfBirth[j], units = "days"))
     if (!is.na(diff) && diff>0)
    if (!is.na(diff) && diff < 90 && breed_data$Sex[j] == "M") {
      breed_data$AgeCategory[j] <- "Less than three Months M"
      age_counts[1] <- age_counts[1] + 1
    } else if (!is.na(diff) && diff < 90 && breed_data$Sex[j] == "F") {
      breed_data$AgeCategory[j] <- "Less than three Months F"
      age_counts[2] <- age_counts[2] + 1
    } else if (!is.na(diff) && diff >= 90 && diff < 180 && breed_data$Sex[j] == "M") {
      breed_data$AgeCategory[j] <- "Between 3 months and 6 months M"
      age_counts[3] <- age_counts[3] + 1
    } else if (!is.na(diff) && diff >= 90 && diff < 180 && breed_data$Sex[j] == "F") {
      breed_data$AgeCategory[j] <- "Between 3 months and 6 months F"
      age_counts[4] <- age_counts[4] + 1
    } else if (!is.na(diff) && diff >= 180 && diff < 365 && breed_data$Sex[j] == "M") {
      breed_data$AgeCategory[j] <- "Between 6 months and 12 months M"
      age_counts[5] <- age_counts[5] + 1
    } else if (!is.na(diff) && diff >= 180 && diff < 365 && breed_data$Sex[j] == "F") {
      breed_data$AgeCategory[j] <- "Between 6 months and 12 months F"
      age_counts[6] <- age_counts[6] + 1
    } else if (!is.na(diff) && diff >= 365 && breed_data$Sex[j] == "M") {
      breed_data$AgeCategory[j] <- "Greater than 12 months M"
      age_counts[7] <- age_counts[7] + 1
    } else if (!is.na(diff) && diff >= 365 && breed_data$Sex[j] == "F") {
      breed_data$AgeCategory[j] <- "Greater than 12 months F"
      age_counts[8] <- age_counts[8] + 1
    }
  }
  
  # Calculate total count
  total_count <- sum(age_counts)
  
  # Calculate percentages
  percentages <- paste0(round((age_counts / total_count) * 100, 2), "%")
  
  # Create the pie chart using ggplot2
  chart_title <- paste("Pie chart for", breed, "on", as.character(dates[i]), "(Total:", total_count, ")")
  pie_data <- data.frame(AgeCategorySex, Count = age_counts)
  chart <- ggplot(pie_data, aes(x = "", y = Count, fill = AgeCategorySex)) +
    geom_bar(stat = "identity") +
    coord_polar("y", start = 0) +
    labs(title = chart_title) +
    theme_void()
  
  # Add custom legend with percentages
  custom_legend <- paste(pie_data$AgeCategorySex, percentages, sep = " - ")
  chart <- chart + guides(fill = guide_legend(title = "Age Category (Percentage)")) +
    scale_fill_discrete(labels = custom_legend)
  
  
  
  
  # Save the plot as a PNG file with increased width
  ggsave(filename = paste0(breed, "_plot", date2, ".png"), plot = chart, width = 10, height = 4)
  
  # Add the plot image to the Word document
  doc <- body_add_img(doc, src = paste0(breed, "_plot", date2, ".png"), width = 7, height = 4)
  
  # Print plot for current month and breed
  print(chart)
}

}

Save the Word document

print(doc, target = "Plot.docx")
Picture attached below. Pie chart slices and legend percentages not matching.

Thanks for providing code. Could you kindly take further steps to make it easier for other forum users to help you? Share some representative data that will enable your code to run and show the problematic behaviour.

How do I share data for a reprex?

You might use tools such as the library datapasta, or the base function dput() to share a portion of data in code form, i.e. that can be copied from forum and pasted to R session.

Reprex Guide

Advice.
If you have code that process a files is your issue about reading in files ?
If your issue does not relate to file reading , i.e . You have no problem loading your raw data, and if your problem is not about manipulating/processing it, but only a plotting aspect; then you should modify your example to exclude all file loading code and pre graphing manipulation and substitute that code with example data that you prepared following the attached guide.

structure(list(Village = c("Abeke", "Abeke", "Abeke", "Abeke", 
"Abeke", "Abeke", "Abeke", "Abeke", "Abeke", "Abeke"), AnimalId = c("Abeke/GA00001/2017", 
"Abeke/GA0000116/2009", "Abeke/GA0000116/2016", "Abeke/GA0000121/2013", 
"Abeke/GA0000121/2021", "Abeke/GA0000122/2014", "Abeke/GA0000122/2022", 
"Abeke/GA00002/2017", "Abeke/GA0000216/2009", "Abeke/GA0000216/2016"
), Damidentifier = c("", "", "Abeke/GA00520/2015", "", "", "", 
"Abeke/GA0007021/2020", "", "", "Abeke/GA00520/2015"), Sireidentifier = c("", 
"", "Abeke/GA00141/2015", "", "Abeke/GA0022720/2020", "", "", 
"", "", "Abeke/GA00141/2015"), DateOfBirth = c("04/11/2017", 
"03/25/2009", "12/01/2016", "05/09/2013", "01/15/2021", "05/02/2014", 
"01/08/2022", "05/05/2017", "03/25/2009", "12/01/2016"), MonthOfBirth = c(4L, 
3L, 12L, 5L, 1L, 5L, 1L, 5L, 3L, 12L), YearOfBirth = c(2017L, 
2009L, 2016L, 2013L, 2021L, 2014L, 2022L, 2017L, 2009L, 2016L
), Sex = c("F", "F", "F", "M", "M", "F", "F", "F", "F", "F"), 
    TypeOfBirth = c("Unknown", "Single", "Single", "Twin", "Twin", 
    "Single", "Single", "Unknown", "Twin", "Twin"), CoatColor = c("", 
    "PIED BLACK AND WHITE", "PIED BLACK AND WHITE", "PIED BROWN AND WHITE", 
    "PIED BROWN AND WHITE", "PIED BROWN AND WHITE", "PIED BROWN AND WHITE", 
    "", "PIED BROWN AND WHITE", "PIED BROWN AND WHITE"), Primary.Breed = c("Gumer", 
    "Gumer", "Gumer", "Gumer", "Gumer", "Gumer", "Gumer", "Gumer", 
    "Gumer", "Gumer"), PriGeneticComponent = c(100L, 100L, 100L, 
    100L, 100L, 100L, 100L, 100L, 100L, 100L), SecBreed = c("", 
    "", "", "", "", "", "", "", "", ""), SecGeneticComponent = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), DamPostpartumWeight = c(NA, 
    NA, NA, 25L, 25L, 24L, 24L, NA, NA, NA), DamParityAtDOB = c(NA, 
    NA, NA, 3L, 3L, 3L, 3L, NA, NA, NA), Owner = c("", "", "Abeke/Sadik Kedir/6", 
    "", "Abeke/Mekiya Sherif/56", "", "Abeke/Mohammed Kedir/99", 
    "", "", "Abeke/Nursefa Ahmed/7"), X.Weight..Birth.weight. = c(NA, 
    2.2, 2.2, 2.2, 2.2, 2.4, 2.4, NA, 2.1, 2.1), X.DateWeighed..Birth.weight. = c(NA, 
    39897L, 42705L, 41403L, 44211L, 41761L, 44569L, NA, 39897L, 
    42705L), X.Weight..3.months.weight. = c(NA, NA, 13, NA, 12, 
    NA, NA, NA, NA, 12.5), X.DateWeighed..3.months.weight. = c(NA, 
    NA, 42797L, NA, 44303L, NA, NA, NA, NA, 42797L), X.Weight..6.months.weight. = c(NA, 
    NA, 18L, NA, 18L, NA, NA, NA, NA, 16L), X.DateWeighed..6.months.weight. = c(NA, 
    NA, 42830L, NA, 44395L, NA, NA, NA, NA, 42830L), X.Weight..9.months.weight. = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_), X.DateWeighed..9.months.weight. = c("", 
    "", "", "", "", "", "", "", "", ""), X.Weight..Yearling.weight. = c(NA, 
    NA, 21, NA, NA, NA, NA, NA, NA, 19.5), X.DateWeighed..Yearling.weight. = c("", 
    "", "2017-02-11", "", "", "", "", "", "", "2017-02-11"), 
    X.Weight..Sale.weight. = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), X.DateWeighed..Sale.weight. = c("", "", "", "", 
    "", "", "", "", "", ""), X.Weight..Selection.weight. = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_), X.DateWeighed..Selection.weight. = c("", 
    "", "", "", "", "", "", "", "", ""), DisposalPrice = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), DisposalDate = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), DisposalFate = c("", 
    "", "", "", "", "", "", "", "", "")), row.names = c(NA, 10L
), class = "data.frame")

I can upload the whole file if anybody wants

Not a solution but a warning: You should be extremely careful when using the labels= feature, as this will completely overwrite the existing labels with the delivered values! If the new ones aren't in the exact order as the old it will mess up your result!
ggplot for example orders the values alphabetically, in case of stacking (and pie chart) the order seems to be reversed, so the ones starting with "a" are on top, but in the legend at the bottom, etc. Also you have 2 categories with 0%, that aren't in the plot (so in theory no colour is used) but appear in the legend.

See this example:
(Actually I had to reoder the table to make the point)

pie_data = mpg %>% 
  group_by(class) %>% 
  summarise(count = n()) %>% 
  ungroup() %>% 
  mutate(percentage = count/sum(count) * 100) %>% 
  arrange(percentage)

chart = ggplot(pie_data, aes(x = "", y = percentage, 
                     fill = class)) +
  geom_bar(stat = "identity") +
  coord_polar("y", start = 0) +
  #labs(title = chart_title) +
  theme_void()

custom_legend <- paste0(pie_data$class, "-", round(pie_data$percentage, 1), "%")

chart <- chart + guides(fill = guide_legend(title = "Age Category (Percentage)")) +
  scale_fill_discrete(labels = custom_legend)
chart

For that reason I usually try to avoid "labels=". The safest way is to add it to the dataframe directly.

pie_data = mpg %>% 
  group_by(class) %>% 
  summarise(count = n()) %>% 
  ungroup() %>% 
  mutate(percentage = count/sum(count) * 100,
         pie_label = paste0(class, ": ", round(percentage, 1), "%")) %>%   #here
  arrange(percentage)

ggplot(pie_data, aes(x = "", y = percentage, 
                     fill = pie_label)) +  # and here
  geom_bar(stat = "identity") +
  coord_polar("y", start = 0) +
  #labs(title = chart_title) +
  theme_void()

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.