1

I've been perusing DOT airline data and am trying to create a stacked bar graph of the year over year (YOY) change in each airline's passengers from a specific airport to all other stations.

I also want to order the x-axis by the total number of people (market.ppd) traveling from the specified airport to each station (e.g. This set's origin airport is PHL, and its top destination is MCO. Next is Miami, LAS, etc.)

The x-axis stays ordered when the YOY data is solely positive or negative but defaults back to alphabetical order once I try to stack the bar with both. Some stations only experience a positive YOY change or a negative YOY change, whereas the example in this post has positive and negative values for each category.

My hunch is ggplot reverts the levels to alphabetical order once it finds that some of the stations don't have corresponding positive/negative values. Is there any way to retain the ordered levels once I append the negative values to the positive for each station?

Plot with only positive values enter image description here

Plot with both positive and negative values enter image description here

library(ggplot2)

OD <- data.frame(
      destination = c('MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'LAS', 'LAS', 'LAS', 'LAS', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Los Angeles', 'Los Angeles', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'BOS', 'BOS', 'BOS', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'TPA', 'TPA', 'TPA', 'TPA', 'Dallas', 'Dallas', 'Dallas', 'DEN', 'DEN', 'DEN', 'PHX', 'PHX', 'PHX', 'PHX', 'PHX', 'CUN', 'CUN', 'RSW', 'RSW', 'RSW', 'SAN', 'SAN', 'SJU', 'Houston', 'Houston', 'MSY', 'MSP', 'MSP', 'CLT', 'CLT', 'CLT', 'MBJ', 'MBJ', 'PUJ', 'PUJ', 'PUJ'),
      carrier = c('US', 'F9', 'WN', 'AA', 'FL', 'UA', 'DL', 'F9', 'US', 'DL', 'WN', 'UA', 'FL', 'AA', 'US', 'UA', 'NK', 'US', 'NK', 'F9', 'WN', 'UA', 'AA', 'WN', 'VX', 'US', 'DL', 'AA', 'VX', 'UA', 'US', 'B6', 'AA', 'US', 'WN', 'F9', 'DL', 'AA', 'FL', 'US', 'F9', 'WN', 'UA', 'DL', 'WN', 'US', 'US', 'WN', 'DL', 'US', 'WN', 'AA', 'DL', 'UA', 'US', 'F9', 'US', 'WN', 'UA', 'US', 'AA', 'AA', 'DL', 'WN', 'DL', 'F9', 'DL', 'F9', 'US', 'UA', 'AA', 'US', 'AA', 'F9', 'US'),
      market.ppd = c(1242, 1242, 1242, 1242, 1242, 1242, 1242, 1056, 1056, 1056, 1056, 1056, 1056, 645, 645, 645, 645, 641, 641, 641, 641, 641, 641, 526, 526, 498, 498, 498, 498, 498, 492, 492, 492, 482, 482, 482, 482, 482, 482, 478, 478, 478, 478, 399, 399, 399, 333, 333, 333, 298, 298, 298, 298, 298, 243, 243, 232, 232, 232, 213, 213, 205, 198, 198, 173, 163, 163, 160, 160, 160, 152, 152, 147, 147, 147),
      YOY = c(110, 96, 26, 15, -39, -23, -18, 52, 47, 11, -48, -22, -10, 8, -49, -11, -6, 15, 10, 8, 8, -12, -9, 9, -56, 35, 8, 6, -32, -12, 9, 7, 6, 47, 43, 16, 8, 7, -34, 44, 39, 8, -9, 13, 7, -28, 21, 7, 6, 37, 7, 6, -10, -7, 16, 9, 60, -37, -6, 19, -9, 6, 9, -6, -6, 16, -7, 20, 11, -6, 9, -24, 8, -11, -7),
      label.placement = c(55, 158, 219, 239, -20, -50, -71, 26, 75, 105, -24, -59, -75, 4, -25, -55, -63, 8, 20, 30, 38, -6, -17, 4, -28, 17, 39, 46, -16, -38, 4, 12, 19, 23, 68, 98, 110, 118, -17, 22, 64, 87, -5, 6, 17, -14, 10, 24, 31, 18, 40, 47, -5, -14, 8, 20, 30, -19, -40, 9, -5, 3, 4, -3, -3, 8, -4, 10, 26, -3, 5, -12, 4, -6, -15))

OD$destination <- factor(OD$destination, OD$destination)

ggplot() +
geom_bar(data = OD[OD$YOY > 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') + 
geom_text(data = OD[OD$YOY > 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) + 
geom_bar(data = OD[OD$YOY < 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') + 
geom_text(data = OD[OD$YOY < 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) +
theme(axis.text.x = element_text(size = 10, vjust = .5, angle = 90), legend.position = 'none')
Mr. T
  • 11,960
  • 10
  • 32
  • 54
  • 1
    In general, I have had better luck maintaining fill variable order in these sorts of complicated examples by creating two separate data frames (one for the positive values, one negative) and setting the levels of the fill variable in each separately with the specific order I need, rather than subsetting on the fly in each layer. – joran Dec 16 '15 at 20:50
  • @joran I believe the main problem is with x-axis ordering, not fill-color ordering (though I think your suggestion is still a good one!). – Gregor Thomas Dec 16 '15 at 20:52
  • @Gregor Yeah, I probably read a little too quickly. – joran Dec 16 '15 at 20:54

1 Answers1

3

You can define an order and then tell ggplot to display the data accordingly:

library(ggplot2)

OD <- data.frame(
  destination = c('MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'MCO', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'Miami', 'LAS', 'LAS', 'LAS', 'LAS', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Chicago', 'Los Angeles', 'Los Angeles', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'Bay Area', 'BOS', 'BOS', 'BOS', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'ATL', 'TPA', 'TPA', 'TPA', 'TPA', 'Dallas', 'Dallas', 'Dallas', 'DEN', 'DEN', 'DEN', 'PHX', 'PHX', 'PHX', 'PHX', 'PHX', 'CUN', 'CUN', 'RSW', 'RSW', 'RSW', 'SAN', 'SAN', 'SJU', 'Houston', 'Houston', 'MSY', 'MSP', 'MSP', 'CLT', 'CLT', 'CLT', 'MBJ', 'MBJ', 'PUJ', 'PUJ', 'PUJ'),
  carrier = c('US', 'F9', 'WN', 'AA', 'FL', 'UA', 'DL', 'F9', 'US', 'DL', 'WN', 'UA', 'FL', 'AA', 'US', 'UA', 'NK', 'US', 'NK', 'F9', 'WN', 'UA', 'AA', 'WN', 'VX', 'US', 'DL', 'AA', 'VX', 'UA', 'US', 'B6', 'AA', 'US', 'WN', 'F9', 'DL', 'AA', 'FL', 'US', 'F9', 'WN', 'UA', 'DL', 'WN', 'US', 'US', 'WN', 'DL', 'US', 'WN', 'AA', 'DL', 'UA', 'US', 'F9', 'US', 'WN', 'UA', 'US', 'AA', 'AA', 'DL', 'WN', 'DL', 'F9', 'DL', 'F9', 'US', 'UA', 'AA', 'US', 'AA', 'F9', 'US'),
  market.ppd = c(1242, 1242, 1242, 1242, 1242, 1242, 1242, 1056, 1056, 1056, 1056, 1056, 1056, 645, 645, 645, 645, 641, 641, 641, 641, 641, 641, 526, 526, 498, 498, 498, 498, 498, 492, 492, 492, 482, 482, 482, 482, 482, 482, 478, 478, 478, 478, 399, 399, 399, 333, 333, 333, 298, 298, 298, 298, 298, 243, 243, 232, 232, 232, 213, 213, 205, 198, 198, 173, 163, 163, 160, 160, 160, 152, 152, 147, 147, 147),
  YOY = c(110, 96, 26, 15, -39, -23, -18, 52, 47, 11, -48, -22, -10, 8, -49, -11, -6, 15, 10, 8, 8, -12, -9, 9, -56, 35, 8, 6, -32, -12, 9, 7, 6, 47, 43, 16, 8, 7, -34, 44, 39, 8, -9, 13, 7, -28, 21, 7, 6, 37, 7, 6, -10, -7, 16, 9, 60, -37, -6, 19, -9, 6, 9, -6, -6, 16, -7, 20, 11, -6, 9, -24, 8, -11, -7),
  label.placement = c(55, 158, 219, 239, -20, -50, -71, 26, 75, 105, -24, -59, -75, 4, -25, -55, -63, 8, 20, 30, 38, -6, -17, 4, -28, 17, 39, 46, -16, -38, 4, 12, 19, 23, 68, 98, 110, 118, -17, 22, 64, 87, -5, 6, 17, -14, 10, 24, 31, 18, 40, 47, -5, -14, 8, 20, 30, -19, -40, 9, -5, 3, 4, -3, -3, 8, -4, 10, 26, -3, 5, -12, 4, -6, -15))

OD$destination <- factor(OD$destination, OD$destination)
neworder <- unique(levels(OD$destination))

ggplot() +
  geom_bar(data = OD[OD$YOY > 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') + 
  geom_text(data = OD[OD$YOY > 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) + 
  geom_bar(data = OD[OD$YOY < 0, ], aes(x = destination, y = YOY, fill = carrier), stat = 'identity') + 
  geom_text(data = OD[OD$YOY < 0, ], aes(x = destination, y = label.placement, label = carrier), size = 2) +
  theme(axis.text.x = element_text(size = 10, vjust = .5, angle = 90), legend.position = 'none')+
  scale_x_discrete(limits=c(neworder))
keebock
  • 131
  • 2