0

I have a dataset called data_words that looks as follows (these are just a few lines):

             Word     Cognate TestingMoment     Score freq
1        aambeeld     Cognate         Main2 0.3500000   10
2        aambeeld     Cognate         Main4 0.7670000   10
3           bezem     Cognate         Main2 1.0000000    5
4           bezem     Cognate         Main4 1.0000000    5
5    broodrooster Non-cognate         Main2 0.5428571   14
6    broodrooster Non-cognate         Main4 0.5714286   14
7            buis Non-cognate         Main2 0.4545455   11
8            buis Non-cognate         Main4 0.6363636   11

I'm creating a barplot of it that so far looks as follows: Bar chart

This is the code that generated the plot:

ggplot(data_words %>% group_by(Word, Cognate) %>% arrange(desc(Score)), 
       aes(x = reorder(Word, -Score), 
           y = Score * 100, 
           fill = Cognate)) + 
  geom_bar(aes(group = TestingMoment, 
               colour = TestingMoment), 
           stat = "identity", 
           position = "identity", 
           alpha = 0.5) +
  geom_text(aes(label = freq), 
            size = 3, 
            position = position_nudge(x=0.1, y=3), 
            data = filter(data_words, TestingMoment == "Main4")) +
  theme(axis.text.x = element_text(size = 13),
        axis.text.y = element_text(size = 10),
        text = element_text(size = 15)) +
  labs(x = "Word\n", y = "\nAverage score at Main2 and Main4") +
  scale_fill_manual(values=c("#000000", "#56B4E9")) +
  scale_colour_manual(values=c("white", "white"), guide=FALSE) +
  coord_flip()

There are two things I'm struggling with and haven't been able to solve yet, even though I looked at other questions on this topic

1) The bars are currently ordered by the average value of Score over Main2 and Main4. This makes the graph quite messy. How can I sort it by either the value at Main2 OR Main4?

2) When point 1 is taken care of, I want those entries with the same score to be sorted alphabetically. The dataframe is already sorted alphabetically, but because I'm flipping the coordinates at the very end, the order of the words is exactly the other way around from how I want them. How can I do this?

Here are all the data:

structure(list(Word = structure(c(1L, 1L, 2L, 2L, 4L, 4L, 5L, 
5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 11L, 11L, 12L, 12L, 13L, 
13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 19L, 19L, 20L, 20L, 
21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L, 26L, 26L, 27L, 
27L, 28L, 28L, 29L, 29L, 30L, 30L, 31L, 31L, 32L, 32L, 35L, 35L, 
36L, 36L, 37L, 37L, 38L, 38L, 40L, 40L, 41L, 41L, 42L, 42L, 43L, 
43L, 44L, 44L, 45L, 45L, 46L, 46L, 47L, 47L, 48L, 48L, 49L, 49L, 
50L, 50L, 51L, 51L, 52L, 52L, 53L, 53L, 54L, 54L, 55L, 55L, 56L, 
56L, 57L, 57L, 58L, 58L, 59L, 59L, 60L, 60L, 61L, 61L, 62L, 62L, 
63L, 63L, 64L, 64L, 65L, 65L, 66L, 66L, 67L, 67L, 68L, 68L, 69L, 
69L, 70L, 70L, 71L, 71L, 73L, 73L, 74L, 74L, 75L, 75L, 76L, 76L, 
77L, 77L, 78L, 78L, 79L, 79L, 80L, 80L, 81L, 81L, 82L, 82L, 83L, 
83L), .Label = c("aambeeld", "bezem", "brandblusser", "broodrooster", 
"buis", "citruspers", "dienblad", "dobber", "dweil", "emmer", 
"garde", "gesp", "gieter", "gum", "heggenschaar", "hengel", "hes", 
"kaars", "kapstok", "keppel", "kist", "klapper", "klos", "knikker", 
"knuffel", "kooi", "kous", "kraag", "kroon", "kruiwagen", "kruk", 
"kurk", "kussen", "kwast", "lantaarn", "lessenaar", "mijter", 
"onderzetter", "pak", "passer", "peddel", "pet", "pruik", "puntenslijper", 
"rammelaar", "reddingsvest", "rietje", "rits", "romper", "sambabal", 
"schort", "schroef", "servet", "skelter", "slab", "slang", "slinger", 
"speen", "speldje", "spijker", "spuit", "staf", "stamper", "stelt", 
"stofzuiger", "stokpaard", "stolp", "tamboerijn", "tol", "tooi", 
"toverstaf", "tuinbroek", "tulband", "vergiet", "veter", "vijl", 
"vijzel", "waaier", "wafelijzer", "wip", "zaag", "zeis", "zwemvleugel"
), class = "factor"), Cognate = structure(c(1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("Cognate", "Non-cognate"), class = "factor"), 
    TestingMoment = structure(c(2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 
    3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 
    3L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 
    2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 
    3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 
    2L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 
    3L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 
    3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 
    3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 
    3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 
    3L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L), .Label = c("Post", "Main2", 
    "Main4", "FollowUp"), class = "factor"), Score = c(0.35, 
    0.767, 1, 1, 0.542857142857143, 0.571428571428571, 0.454545454545455, 
    0.636363636363636, 1, 1, 1, 1, 0.866666666666667, 0.666666666666667, 
    1, 1, 0.042, 0.108666666666667, 0.383333333333333, 0.25, 
    0.871538461538462, 0.512307692307692, 0.9175, 1, 1, 1, 0.946666666666667, 
    0.493333333333333, 0.585, 0.5, 0.362307692307692, 0.692307692307692, 
    0.5, 0.416666666666667, 0.003, 1, 1, 1, 0.35, 0.616666666666667, 
    1, 1, 1, 1, 0.785714285714286, 0.714285714285714, 0.227272727272727, 
    0.181818181818182, 1, 0.9, 1, 1, 0.504666666666667, 0.908, 
    0.9375, 1, 0.666666666666667, 0.944444444444444, 0.75, 0.6425, 
    0.686, 0.871333333333333, 0.335, 0.335, 1, 1, 0.666666666666667, 
    0.6, 0.571428571428571, 0.857142857142857, 1, 1, 0.7, 0.6375, 
    0.648666666666667, 0.678666666666667, 0.71, 1, 0.9, 0.8, 
    0.75, 0.583333333333333, 1, 1, 0.83, 0.003, 0.902666666666667, 
    0.917333333333333, 0.261538461538462, 0.384615384615385, 
    0.42, 0.353, 0.93, 0.9475, 0.214285714285714, 0.612142857142857, 
    0.533333333333333, 0.316666666666667, 1, 0.833333333333333, 
    0.857142857142857, 0.785714285714286, 0.732142857142857, 
    0.357142857142857, 0.285714285714286, 0.857142857142857, 
    0.559285714285714, 0.785714285714286, 0.25, 0.25, 0.75, 1, 
    0.903333333333333, 1, 0.573571428571429, 0.532857142857143, 
    1, 1, 1, 1, 0.25, 0.003, 0.916666666666667, 1, 0.316666666666667, 
    0.533333333333333, 0.714285714285714, 0.857142857142857, 
    0.7225, 0.695, 0.593333333333333, 0.279333333333333, 1, 1, 
    0.230769230769231, 0.153846153846154, 0.003, 0.003, 0.269285714285714, 
    0.342857142857143, 0.666666666666667, 0.866666666666667, 
    0.698571428571429, 0.841428571428571, 1, 0.916666666666667, 
    0.757272727272727, 0.533636363636364, 0.555833333333333, 
    0.479166666666667, 0.99, 0.99), freq = c(10L, 10L, 5L, 5L, 
    14L, 14L, 11L, 11L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 15L, 
    15L, 15L, 15L, 13L, 13L, 4L, 4L, 1L, 1L, 15L, 15L, 4L, 4L, 
    13L, 13L, 6L, 6L, 1L, 1L, 2L, 2L, 15L, 15L, 1L, 1L, 11L, 
    11L, 14L, 14L, 11L, 11L, 10L, 10L, 1L, 1L, 15L, 15L, 4L, 
    4L, 9L, 9L, 12L, 12L, 15L, 15L, 2L, 2L, 11L, 11L, 15L, 15L, 
    7L, 7L, 1L, 1L, 8L, 8L, 15L, 15L, 1L, 1L, 1L, 1L, 12L, 12L, 
    10L, 10L, 1L, 1L, 15L, 15L, 13L, 13L, 10L, 10L, 8L, 8L, 14L, 
    14L, 15L, 15L, 12L, 12L, 14L, 14L, 14L, 14L, 7L, 7L, 14L, 
    14L, 3L, 3L, 4L, 4L, 3L, 3L, 14L, 14L, 1L, 1L, 8L, 8L, 4L, 
    4L, 12L, 12L, 15L, 15L, 7L, 7L, 4L, 4L, 15L, 15L, 1L, 1L, 
    13L, 13L, 1L, 1L, 14L, 14L, 15L, 15L, 7L, 7L, 12L, 12L, 11L, 
    11L, 12L, 12L, 10L, 10L)), .Names = c("Word", "Cognate", 
"TestingMoment", "Score", "freq"), row.names = c(NA, -152L), class = "data.frame")
joran
  • 169,992
  • 32
  • 429
  • 468
Johanna
  • 1,019
  • 2
  • 9
  • 20
  • _"I don't understand why this question is marked as a duplicate [...] sorted ascending whereas the TO wanted them to be sorted descending"_ So apply the logic in the duplicate in the reverse order? From the duplicate post: "The key with ordering is to set the levels of the factor in the order you want". – msanford Apr 12 '18 at 19:50
  • This is the core of the message: "My problem is that the data in my graph are sorted by the average value of the two levels of TestingMoment, whereas I want them to be sorted by the value of one of the levels." This is not addressed in the 'duplicate' question. – Johanna Apr 12 '18 at 19:52
  • 1
    A small point on the closure/duplicate: we get lots and lots of question on SO that are presented as "how do I reorder the bars in this ggplot?". The *most general* answer is *always* reorder the factor levels. That doesn't always help if the *real* question turns out to be "how do I reorder the levels in this particular case?". So it would help if (1) people here were a little more open to that issue and (2) when you write your question, focus more tightly on *just* the reordering piece, which wouldn't require all the extraneous ggplot2 code. – joran Apr 12 '18 at 22:46
  • Thank you - noted! – Johanna Apr 13 '18 at 06:38

1 Answers1

1

Try this:

library(dplyr)
Main2_order <- data_words %>%
    filter(TestingMoment == 'Main2') %>%
    arrange(Score,Word) %>%
    pull(Word) %>%
    as.character()

Main4_order <- data_words %>%
    filter(TestingMoment == 'Main4') %>%
    arrange(Score,Word) %>%
    pull(Word) %>%
    as.character()

data_words %>% 
    group_by(Word, Cognate) %>% 
    arrange(desc(Score)) %>%
ggplot(data = ., 
             aes(x = factor(Word,levels = rev(Main2_order)), 
                    y = Score * 100, 
                    fill = Cognate)) + 
    geom_bar(aes(group = TestingMoment, 
                             colour = TestingMoment), 
                     stat = "identity", 
                     position = "identity", 
                     alpha = 0.5) +
    geom_text(aes(label = freq), 
                        size = 3, 
                        position = position_nudge(x=0.1, y=3), 
                        data = filter(data_words, TestingMoment == "Main4")) +
    theme(axis.text.x = element_text(size = 13),
                axis.text.y = element_text(size = 10),
                text = element_text(size = 15)) +
    labs(x = "Word\n", y = "\nAverage score at Main2 and Main4") +
    scale_fill_manual(values=c("#000000", "#56B4E9")) +
    scale_colour_manual(values=c("white", "white"), guide=FALSE) +
    coord_flip()

I only inserted the Main2_order in the ggplot code in that example, but you can swap out the other for the other ordering.

To get more complex ordering by Main4, then Main2 then Word you might try:

library(tidyr)
Main2_Main4_order <- data_words %>% 
 select(Word,TestingMoment,Score) %>%
 spread(key = TestingMoment,value = Score) %>%
 arrange(Main4,Main2,Word) %>%
 pull(Word) %>%
 as.character()
joran
  • 169,992
  • 32
  • 429
  • 468
  • Thank you so much, this code does what I asked for. If you have time, I have one more follow-up question. Would it be possible to first sort by Main4_order, THEN by Main2_order, and only then alphabetically? – Johanna Apr 13 '18 at 06:41
  • Awesome, thank you so much! This is exactly what I needed for my research article :) – Johanna Apr 13 '18 at 18:22