-1

I am trying to plot a ggplot group bar chart using the following code and data. However everytime I plot the ggplot I have the order of Lastyear, oneyear threeyear and twoyear when it should be (according to the order in the df xy the following lastyear, oneyear, twoyear then threeyear.

Where am I going wrong in the plot? The only explination I have is that ggplot is plotting the groups bar charts alphabetically.

ggplot(xy, aes(x=Year, y=Gain, group=Gain, label=Variable)) +
  geom_bar(stat="identity", color="black", fill="#9C27B0", position="dodge") +
  geom_text(angle=90, position=position_dodge(width=0.9), hjust=-0.05) +
  ggtitle("TITLE HERE") +
  guides(fill=FALSE)    

Data:

xy <- structure(list(Variable = c("SALES.WC", "TL.EQ", "EQ.Turnover", 
    "CA.TA", "CA.CL", "CL.FinExp", "CF.NCL", "DailySALES.EBIT", "logTA", 
    "TL.TA", "EBIT.FinExp", "SALES.WC2", "DailySALES.EBIT2", "TL.EQ2", 
    "CA.CL2", "CF.NCL2", "CA.TA2", "CL.FinExp2", "EQ.Turnover2", 
    "EBIT.FinExp2", "TL.TA2", "logTA2", "TL.EQ3", "DailySALES.EBIT3", 
    "CF.NCL3", "CA.CL3", "SALES.WC3", "EQ.Turnover3", "CL.FinExp3", 
    "CA.TA3", "EBIT.FinExp3", "TL.TA3", "logTA3", "CF.NCL4", "DailySALES.EBIT4", 
    "EBIT.FinExp4", "EQ.Turnover4", "SALES.WC4", "CA.CL4", "CA.TA4", 
    "TL.EQ4", "CL.FinExp4", "TL.TA4", "logTA4"), Gain = c(0.0139475947749436, 
    0.0186058318290004, 0.0223217808071512, 0.0244533394205631, 0.0257086851408409, 
    0.0410344050892873, 0.0530577543373713, 0.119669956742959, 0.153321518346671, 
    0.169880049476648, 0.357999084034564, 0.0210381458789509, 0.0279003966700287, 
    0.0321333141168294, 0.0337989653595418, 0.0450490096266459, 0.0491798397528832, 
    0.0661536580122029, 0.0734706661210229, 0.135767436486281, 0.167947601913985, 
    0.347560966061628, 0.0188783661019333, 0.0197853002129162, 0.0272749207536935, 
    0.0347284667673124, 0.0430992866122089, 0.0435834750808189, 0.0631609275004249, 
    0.0641364686807323, 0.0787809479913172, 0.212397184938682, 0.39417465535996, 
    0.0249669149256489, 0.0284788358072123, 0.0416885482543631, 0.0481161711992678, 
    0.0484199735868597, 0.0494398516158408, 0.0771009818151621, 0.0835709725586987, 
    0.0922406378720892, 0.120398717131196, 0.385578395233662), Year = c("LastYear", 
    "LastYear", "LastYear", "LastYear", "LastYear", "LastYear", "LastYear", 
    "LastYear", "LastYear", "LastYear", "LastYear", "OneYear", "OneYear", 
    "OneYear", "OneYear", "OneYear", "OneYear", "OneYear", "OneYear", 
    "OneYear", "OneYear", "OneYear", "TwoYear", "TwoYear", "TwoYear", 
    "TwoYear", "TwoYear", "TwoYear", "TwoYear", "TwoYear", "TwoYear", 
    "TwoYear", "TwoYear", "ThreeYear", "ThreeYear", "ThreeYear", 
    "ThreeYear", "ThreeYear", "ThreeYear", "ThreeYear", "ThreeYear", 
    "ThreeYear", "ThreeYear", "ThreeYear")), class = c("grouped_df", 
    "tbl_df", "tbl", "data.frame"), row.names = c(NA, -44L), vars = "Year", drop = TRUE, indices = list(
        0:10, 11:21, 33:43, 22:32), group_sizes = c(11L, 11L, 11L, 
    11L), biggest_group_size = 11L, .Names = c("Variable", "Gain", 
    "Year"), labels = structure(list(Year = c("LastYear", "OneYear", 
    "ThreeYear", "TwoYear")), class = "data.frame", row.names = c(NA, 
    -4L), vars = "Year", drop = TRUE, indices = list(c(0L, 1L, 5L, 
    6L, 8L, 15L, 24L, 33L, 36L, 38L, 41L), c(4L, 10L, 12L, 13L, 19L, 
    22L, 27L, 28L, 35L, 37L, 40L), c(7L, 11L, 16L, 20L, 21L, 23L, 
    29L, 31L, 32L, 34L, 42L), c(2L, 3L, 9L, 14L, 17L, 18L, 25L, 26L, 
    30L, 39L, 43L)), group_sizes = c(11L, 11L, 11L, 11L), biggest_group_size = 11L, .Names = "Year"))
user113156
  • 6,761
  • 5
  • 35
  • 81
  • 2
    You could just add `+ scale_x_discrete(limits=unique(xy$Year))` – MrFlick Jun 27 '18 at 18:40
  • In regards to the duplicate: I understand this is probably not a completely unique question but I was wondering why/ under what reasons `ggplot` was ordering the bar plots the way it was, when in the data frame the are ordered differently. – user113156 Jun 27 '18 at 18:42
  • 2
    Character values are converted to factors for plotting. The default order of the levels for a factor is to sort the unique values alphabetically. The order of the data itself doesn't matter. – MrFlick Jun 27 '18 at 18:44

1 Answers1

1

By default, R will order characters alphabetically. I'd turn Year into a factor with the level ordering you want:

xy %>%
  ungroup() %>%
  mutate(
    Year = factor(Year, levels = c('LastYear', 'OneYear', "TwoYear", "ThreeYear"))
  ) %>%
ggplot(aes(x=Year, y=Gain, group=Gain, label=Variable)) +
  geom_bar(stat="identity", color="black", fill="#9C27B0", position="dodge") +
  geom_text(angle=90, position=position_dodge(width=0.9), hjust=-0.05) +
  ggtitle("TITLE HERE") +
  guides(fill=FALSE)    
Melissa Key
  • 4,476
  • 12
  • 21