0

I have the following 2 dataframes that are actually counts of histograms.

> x1 <- read.table('d1.txt')
> x2 <- read.table('d2.txt')
> head(x1)
  counts
1      8
2      2
3      5
4      1
5      1
6      4
> head(x2)
  counts
1      7
2      0
3      0
4      3
5      0
6      1


> dput(x1)
structure(list(counts = c(8L, 2L, 5L, 1L, 1L, 4L, 6L, 0L, 2L, 
2L, 1L, 0L, 1L, 1L, 0L, 3L, 0L, 0L, 4L, 2L, 0L, 0L, 0L, 2L, 1L, 
0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 3L, 2L, 1L, 4L, 0L, 0L, 1L, 
0L, 1L, 0L, 4L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 1L, 4L, 4L, 4L, 5L, 
8L, 8L, 13L, 2L, 2L, 7L, 6L, 2L, 6L, 5L, 6L, 20L, 15L, 6L, 8L, 
5L, 2L, 13L, 2L, 1L, 2L, 11L, 0L, 25L, 31L, 12L, 17L, 17L, 7L, 
24L, 14L, 4L, 169L, 34L, 3L, 5L, 42L, 28L, 3L, 15L, 6L, 8L, 12L, 
4L, 6L, 2L, 3L, 6L, 2L, 14L, 0L, 5L, 20L, 3L, 23L, 0L, 5L, 17L, 
11L, 13L, 6L, 1L, 13L, 8L, 7L, 1L, 4L, 1L, 2L, 2L, 2L, 1L, 2L, 
0L, 1L, 1L, 3L, 0L, 2L, 4L, 3L, 1L, 0L, 5L, 2L, 2L, 3L, 4L, 3L, 
0L, 0L, 2L, 8L, 0L, 2L, 0L, 13L, 3L, 1L, 2L, 6L, 3L, 2L, 0L, 
3L, 4L, 2L, 1L, 0L, 5L, 0L, 1L, 2L, 1L, 2L, 3L, 0L, 0L, 2L, 2L, 
0L, 3L, 2L, 3L, 1L, 0L, 0L, 6L, 1L, 0L, 2L, 0L, 2L, 1L, 3L, 1L, 
6L, 2L, 5L, 0L, 0L, 1L, 0L, 1L, 0L, 2L, 2L, 3L, 10L, 4L, 1L, 
3L, 2L, 10L, 9L, 0L, 1L, 3L, 0L, 2L, 0L, 6L, 1L, 4L, 1L, 3L, 
0L, 4L, 2L, 4L, 4L, 0L, 2L, 1L, 0L, 0L, 2L, 1L, 0L, 3L, 4L, 3L, 
2L, 0L, 1L, 2L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 2L, 0L, 2L, 0L, 
1L, 2L, 0L, 0L, 0L, 0L, 2L, 1L, 6L, 0L, 0L, 0L, 0L, 7L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 1L, 0L, 0L, 2L, 2L, 3L, 4L, 
1L, 2L, 1L, 0L, 1L, 1L, 2L, 3L, 5L, 4L, 1L, 4L, 3L, 1L, 6L, 0L, 
0L, 0L, 9L, 1L, 2L, 0L, 2L, 0L, 3L, 4L, 0L, 3L, 2L, 0L, 0L, 10L, 
0L, 5L, 4L, 3L, 6L, 6L, 9L, 2L, 1L, 1L, 2L, 1L, 0L, 0L, 3L, 6L, 
3L, 0L, 3L, 7L, 5L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 4L, 1L, 0L, 2L, 
0L, 0L, 0L, 1L, 2L, 0L, 0L, 0L, 2L, 1L, 0L, 0L, 0L, 0L, 6L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 3L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 3L, 1L, 0L, 0L, 
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 4L, 
1L, 0L, 1L, 0L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 
0L, 2L, 0L, 0L, 0L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", 
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", 
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", 
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", 
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", 
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100", 
"101", "102", "103", "104", "105", "106", "107", "108", "109", 
"110", "111", "112", "113", "114", "115", "116", "117", "118", 
"119", "120", "121", "122", "123", "124", "125", "126", "127", 
"128", "129", "130", "131", "132", "133", "134", "135", "136", 
"137", "138", "139", "140", "141", "142", "143", "144", "145", 
"146", "147", "148", "149", "150", "151", "152", "153", "154", 
"155", "156", "157", "158", "159", "160", "161", "162", "163", 
"164", "165", "166", "167", "168", "169", "170", "171", "172", 
"173", "174", "175", "176", "177", "178", "179", "180", "181", 
"182", "183", "184", "185", "186", "187", "188", "189", "190", 
"191", "192", "193", "194", "195", "196", "197", "198", "199", 
"200", "201", "202", "203", "204", "205", "206", "207", "208", 
"209", "210", "211", "212", "213", "214", "215", "216", "217", 
"218", "219", "220", "221", "222", "223", "224", "225", "226", 
"227", "228", "229", "230", "231", "232", "233", "234", "235", 
"236", "237", "238", "239", "240", "241", "242", "243", "244", 
"245", "246", "247", "248", "249", "250", "251", "252", "253", 
"254", "255", "256", "257", "258", "259", "260", "261", "262", 
"263", "264", "265", "266", "267", "268", "269", "270", "271", 
"272", "273", "274", "275", "276", "277", "278", "279", "280", 
"281", "282", "283", "284", "285", "286", "287", "288", "289", 
"290", "291", "292", "293", "294", "295", "296", "297", "298", 
"299", "300", "301", "302", "303", "304", "305", "306", "307", 
"308", "309", "310", "311", "312", "313", "314", "315", "316", 
"317", "318", "319", "320", "321", "322", "323", "324", "325", 
"326", "327", "328", "329", "330", "331", "332", "333", "334", 
"335", "336", "337", "338", "339", "340", "341", "342", "343", 
"344", "345", "346", "347", "348", "349", "350", "351", "352", 
"353", "354", "355", "356", "357", "358", "359", "360", "361", 
"362", "363", "364", "365", "366", "367", "368", "369", "370", 
"371", "372", "373", "374", "375", "376", "377", "378", "379", 
"380", "381", "382", "383", "384", "385", "386", "387", "388", 
"389", "390", "391", "392", "393", "394", "395", "396", "397", 
"398", "399", "400", "401", "402", "403", "404", "405", "406", 
"407", "408", "409", "410", "411", "412", "413", "414", "415", 
"416", "417", "418", "419", "420", "421", "422", "423", "424", 
"425", "426", "427", "428", "429", "430", "431", "432", "433", 
"434", "435", "436", "437", "438", "439", "440", "441", "442", 
"443", "444", "445", "446", "447", "448", "449", "450", "451", 
"452", "453", "454", "455", "456", "457", "458", "459", "460", 
"461", "462", "463", "464", "465", "466", "467", "468", "469", 
"470", "471", "472", "473", "474", "475", "476", "477", "478", 
"479", "480", "481", "482", "483", "484", "485", "486", "487", 
"488", "489", "490", "491", "492", "493", "494", "495", "496", 
"497", "498", "499", "500"))

> dput(x2)
structure(list(counts = c(7L, 0L, 0L, 3L, 0L, 1L, 2L, 1L, 2L, 
2L, 1L, 2L, 2L, 2L, 3L, 0L, 2L, 0L, 2L, 4L, 0L, 0L, 6L, 0L, 0L, 
0L, 0L, 4L, 1L, 9L, 2L, 2L, 2L, 1L, 2L, 5L, 6L, 1L, 1L, 9L, 0L, 
0L, 22L, 1L, 0L, 9L, 1L, 3L, 10L, 1L, 1L, 6L, 0L, 1L, 4L, 2L, 
9L, 7L, 10L, 7L, 6L, 12L, 14L, 3L, 6L, 1L, 7L, 2L, 7L, 1L, 8L, 
16L, 5L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
0L, 1L, 2L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 4L, 1L, 0L, 0L, 0L, 0L, 
0L, 2L, 1L, 1L, 2L, 0L, 1L, 6L, 1L, 0L, 1L, 0L, 5L, 0L, 0L, 0L, 
2L, 3L, 1L, 1L, 2L, 4L, 3L, 3L, 0L, 0L, 1L, 12L, 3L, 1L, 1L, 
14L, 16L, 9L, 0L, 11L, 3L, 20L, 7L, 0L, 5L, 10L, 11L, 4L, 7L, 
2L, 6L, 28L, 32L, 27L, 7L, 28L, 23L, 23L, 4L, 7L, 47L, 8L, 59L, 
18L, 38L, 6L, 7L, 5L, 11L, 5L, 1L, 2L, 3L, 13L, 2L, 22L, 4L, 
30L, 7L, 9L, 4L, 4L, 20L, 16L, 7L, 8L, 8L, 15L, 5L, 6L, 9L, 8L, 
24L, 1L, 5L, 5L, 8L, 8L, 1L, 7L, 0L, 5L, 2L, 0L, 0L, 5L, 3L, 
2L, 1L, 0L, 0L, 1L, 0L, 3L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 2L, 0L, 
1L, 0L, 5L, 2L, 2L, 1L, 0L, 1L, 2L, 1L, 0L, 0L, 1L, 1L, 4L, 1L, 
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 2L, 1L, 1L, 0L, 1L, 
1L, 0L, 1L, 2L, 0L, 1L, 0L, 0L, 5L, 2L, 0L, 2L, 0L, 1L, 3L, 0L, 
4L, 0L, 4L, 3L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 2L, 1L, 1L, 1L, 0L, 
1L, 2L, 2L, 0L, 1L, 0L, 0L, 4L, 4L, 12L, 2L, 3L, 9L, 1L, 3L, 
2L, 3L, 5L, 3L, 2L, 0L, 3L, 0L, 2L, 3L, 1L, 7L, 3L, 0L, 2L, 0L, 
3L, 0L, 1L, 2L, 1L, 1L, 4L, 0L, 1L, 0L, 3L, 1L, 3L, 1L, 0L, 2L, 
0L, 0L, 0L, 0L, 2L, 1L, 0L, 0L, 2L, 0L, 3L, 0L, 0L, 5L, 0L, 3L, 
1L, 1L, 0L, 4L, 1L, 5L, 1L, 6L, 0L, 2L, 0L, 0L, 3L, 0L, 1L, 1L, 
1L, 1L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 3L, 1L, 1L, 2L, 3L, 6L, 2L, 
0L, 1L, 3L, 10L, 3L, 0L, 3L, 1L, 1L, 1L, 0L, 2L, 4L, 4L, 5L, 
0L, 3L, 0L, 1L, 6L, 0L, 5L, 1L, 0L, 2L, 30L, 0L, 0L, 0L, 2L, 
3L, 1L, 1L, 1L, 5L, 3L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 
3L, 0L, 0L, 2L, 1L, 1L, 2L, 2L, 0L, 0L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", 
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", 
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", 
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", 
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", 
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100", 
"101", "102", "103", "104", "105", "106", "107", "108", "109", 
"110", "111", "112", "113", "114", "115", "116", "117", "118", 
"119", "120", "121", "122", "123", "124", "125", "126", "127", 
"128", "129", "130", "131", "132", "133", "134", "135", "136", 
"137", "138", "139", "140", "141", "142", "143", "144", "145", 
"146", "147", "148", "149", "150", "151", "152", "153", "154", 
"155", "156", "157", "158", "159", "160", "161", "162", "163", 
"164", "165", "166", "167", "168", "169", "170", "171", "172", 
"173", "174", "175", "176", "177", "178", "179", "180", "181", 
"182", "183", "184", "185", "186", "187", "188", "189", "190", 
"191", "192", "193", "194", "195", "196", "197", "198", "199", 
"200", "201", "202", "203", "204", "205", "206", "207", "208", 
"209", "210", "211", "212", "213", "214", "215", "216", "217", 
"218", "219", "220", "221", "222", "223", "224", "225", "226", 
"227", "228", "229", "230", "231", "232", "233", "234", "235", 
"236", "237", "238", "239", "240", "241", "242", "243", "244", 
"245", "246", "247", "248", "249", "250", "251", "252", "253", 
"254", "255", "256", "257", "258", "259", "260", "261", "262", 
"263", "264", "265", "266", "267", "268", "269", "270", "271", 
"272", "273", "274", "275", "276", "277", "278", "279", "280", 
"281", "282", "283", "284", "285", "286", "287", "288", "289", 
"290", "291", "292", "293", "294", "295", "296", "297", "298", 
"299", "300", "301", "302", "303", "304", "305", "306", "307", 
"308", "309", "310", "311", "312", "313", "314", "315", "316", 
"317", "318", "319", "320", "321", "322", "323", "324", "325", 
"326", "327", "328", "329", "330", "331", "332", "333", "334", 
"335", "336", "337", "338", "339", "340", "341", "342", "343", 
"344", "345", "346", "347", "348", "349", "350", "351", "352", 
"353", "354", "355", "356", "357", "358", "359", "360", "361", 
"362", "363", "364", "365", "366", "367", "368", "369", "370", 
"371", "372", "373", "374", "375", "376", "377", "378", "379", 
"380", "381", "382", "383", "384", "385", "386", "387", "388", 
"389", "390", "391", "392", "393", "394", "395", "396", "397", 
"398", "399", "400", "401", "402", "403", "404", "405", "406", 
"407", "408", "409", "410", "411", "412", "413", "414", "415", 
"416", "417", "418", "419", "420", "421", "422", "423", "424", 
"425", "426", "427", "428", "429", "430", "431", "432", "433", 
"434", "435", "436", "437", "438", "439", "440", "441", "442", 
"443", "444", "445", "446", "447", "448", "449", "450", "451", 
"452", "453", "454", "455", "456", "457", "458", "459", "460", 
"461", "462", "463", "464", "465", "466", "467", "468", "469", 
"470", "471", "472", "473", "474", "475", "476", "477", "478", 
"479", "480", "481", "482", "483", "484", "485", "486", "487", 
"488", "489", "490", "491", "492", "493", "494", "495", "496", 
"497", "498", "499", "500"))

Plotting the first few rows only, everything looks all right:

library(ggplot2)
plot1 = ggplot() + 
  geom_col(data = x1[1:20, , drop = FALSE], aes(x = -249:-230, y = counts, fill = '+')) +
  geom_col(data = x2[1:20, , drop = FALSE], aes(x = -249:-230, y = -counts, fill = '-')) +
  ggtitle('plot_part') + theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = 'coordinate', y = 'counts', fill = 'strand') + 
  guides(fill = guide_legend(reverse = TRUE))

png("part.png")
print(plot1)
dev.off()

see first image attached

However, when I plot the entire dataframe and zoom in on the left end side things look completely different:

plot2 = ggplot() + 
  geom_col(data = x1, aes(x = -249:250, y = counts, fill = '+')) +
  geom_col(data = x2, aes(x = -249:250, y = -counts, fill = '-')) +
  ggtitle('plot_part') + theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = 'coordinate', y = 'counts', fill = 'strand') + 
  guides(fill = guide_legend(reverse = TRUE))
    
png("whole.png")
print(plot2)
dev.off()

see second image attached

see third image attached

Can someone reproduce this? Am I doing something wrong? Is this a bug in ggplot2? Is there a workaround?

mce1
  • 13
  • 5
  • It would be difficult for anyone to reproduce this without your data. Please read [this](https://stackoverflow.com/a/5963610/8449629). – Z.Lin Jan 25 '21 at 04:32
  • Added data to give a reproducible example using dput(). – mce1 Jan 25 '21 at 07:27
  • This could be a rendering issue. Re-sizing the plot yields different appearances, given how thin your bars are. The more fundamental question here, though, is whether you really want to visualise your data this way. At this resolution, wider histogram bins or density plots may be more efficient at conveying the overall distribution of your data. – Z.Lin Jan 25 '21 at 10:33
  • @Z.Lin Thanks for the edit and for the comment. This is just some very sparse data to illustrate the problem. I still think ggplot could and should plot data more accurately than it does here. – mce1 Jan 25 '21 at 15:09

0 Answers0