0

Can someone help me figuring out what's the mistake here? I've manages to draw the normal curve line on this histogram but probably with the wrong parameters, as the line is almost flat.

Here's the dataset

solomacro_long <– structure(list(year = c(1995, 1996, 1997, 1998, 1999, 2000, 2001, 
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 
2013, 2014, 2015, 2016, 2017, 2018, 2019, 1995, 1996, 1997, 1998, 
1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 1995, 
1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 
2018, 2019, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 
2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 
2015, 2016, 2017, 2018, 2019), country = c("Austria", "Austria", 
"Austria", "Austria", "Austria", "Austria", "Austria", "Austria", 
"Austria", "Austria", "Austria", "Austria", "Austria", "Austria", 
"Austria", "Austria", "Austria", "Austria", "Austria", "Austria", 
"Austria", "Austria", "Austria", "Austria", "Austria", "Belgium", 
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium", 
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium", 
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium", 
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium", 
"Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", 
"Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", 
"Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", 
"Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", 
"Bulgaria", "Croatia", "Croatia", "Croatia", "Croatia", "Croatia", 
"Croatia", "Croatia", "Croatia", "Croatia", "Croatia", "Croatia", 
"Croatia", "Croatia", "Croatia", "Croatia", "Croatia", "Croatia", 
"Croatia", "Croatia", "Croatia", "Croatia", "Croatia", "Croatia", 
"Croatia", "Croatia"), abv = c("aut", "aut", "aut", "aut", "aut", 
"aut", "aut", "aut", "aut", "aut", "aut", "aut", "aut", "aut", 
"aut", "aut", "aut", "aut", "aut", "aut", "aut", "aut", "aut", 
"aut", "aut", "bel", "bel", "bel", "bel", "bel", "bel", "bel", 
"bel", "bel", "bel", "bel", "bel", "bel", "bel", "bel", "bel", 
"bel", "bel", "bel", "bel", "bel", "bel", "bel", "bel", "bel", 
"bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", 
"bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", 
"bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "hrv", "hrv", 
"hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", 
"hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", 
"hrv", "hrv", "hrv", "hrv", "hrv"), variable = c("macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth", 
"macro_1_growth", "macro_1_growth", "macro_1_growth"), value = c(NA, 
0.0330438627237335, -0.0511254116453409, 0.0137458636086369, 
0.0271686242318601, 0.00829201167178373, 0.0523974163847372, 
-0.0354694651171017, -0.0374494817701539, 0.00146959647767253, 
0.0496202619040307, 0.0302299392831828, 0.0130235783503618, -0.0328216723839782, 
0.0423434653951649, 0.00220877922815199, -0.0128501989805205, 
-0.00821314138290452, -0.006717458255453, -0.0471457219425651, 
0.00546133415718586, -0.0298779682348522, -0.0487160187410649, 
0.0079062937240455, -0.019891693409807, NA, -0.0108046160615598, 
-0.020481626565286, -0.0282886358948745, 0.00274487354623876, 
0.0185421531930665, -0.0253759187152902, -0.0263015327275199, 
-0.0148870282905568, -0.0392297356777268, -0.0020267458659039, 
-0.0541860979692431, 0.0531390906566396, 0.00568177746283549, 
0.0240221453374323, -0.023872987774894, 0.0323777637704183, 0.00628046178136143, 
-0.0107871033420092, -0.00912306155191778, -0.0276588806542145, 
0.000932342467327985, -0.0812096241106091, 0.00717414678646944, 
-0.00760474857650106, NA, 0.543407547351052, -0.5771767697921, 
-0.0695159060223425, 0.00383600214018176, 0.0680609856304732, 
-0.0168169725218662, -0.12258474734022, -0.394874977889188, 0.425469287011755, 
0.221653828433469, -0.204587464219887, 0.735243962048276, -0.277316187667829, 
0.37075452570192, -0.459738030435102, 0.00847566789099852, -0.0787951118823751, 
0.0672889757962081, 0.0949321714229949, 0.0106549223289458, -0.00838782482855194, 
-0.0979086531462122, 0.0645599176330203, -0.108896480985501, 
NA, 0.00105809049367522, 0.0269054811247043, 0.0740537143417594, 
0.0557272510974303, 0.0997243132832437, 0.00910803063993137, 
-0.10155981032987, 0.139470241203099, 0.0095847787755341, 0.0601496859523869, 
0.14813450820569, 0.0371361542556079, 0.0791355819902153, 0.0210970857693795, 
0.148650996393849, -0.0860259529634987, -0.0377993657201044, 
0.175765906918165, 0.206969328627839, -0.0545558853033026, 0.0788684920245653, 
-0.0933878080531653, -0.0752863568686467, 0.0479445941743561)), row.names = c(NA, 
-100L), groups = structure(list(country = c("Austria", "Belgium", 
"Bulgaria", "Croatia"), .rows = structure(list(1:25, 26:50, 51:75, 
    76:100), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", 
"list"))), row.names = c(NA, -4L), class = c("tbl_df", "tbl", 
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"))

I run the following code, using the whole dataset (7150 observations)

First, I recoded values higher than 2%, to have a single bar of those

solomacro_long$value[solomacro_long$value > 2.01] <- 2.06 
solomacro_long$value[solomacro_long$value == 2.06] <- 2.00

Then I set the binwidth and, the number of observations and y breaks

bw = 0.03                                     
n_obs = sum(!is.na(solomacro_long$value))     
ybreaks = seq(0,1500,500)

Then I plotted the histogram, but I can't drawn properly the normal curve

solomacro_long %>% ggplot(aes(x = value)) +                      
    geom_histogram(aes(y = ..density..), binwidth = bw, colour = "black") +
    stat_function(fun = dnorm,
                  args = list(mean = mean(solomacro_long$value), sd = sd(solomacro_long$value)),
                  color = "darkred", linetype = "dashed", size = 0.7) +
    scale_x_continuous(limits = c(-1, 2.06),
                       breaks = c(-1, 0, 1, 2)) +
    scale_y_continuous("Density",
                       sec.axis = sec_axis(
                       trans = ~ . * bw * n_obs, name = "Counts", breaks = ybreaks)) +
    xlab("Percentage change") +
    ylab("Frequency") +
    geom_rug() +
    annotate("text", x = 2, y = 0.5, size = 3.5, label = "> 200%", fontface="bold") + 
    annotate("text", x = 1.7, y = 6.5, size = 3.0, label = "D stat: 0.402, p-value < 0.001") +
    annotate("text", x = 1.7, y = 6.2, size = 3.0, label = "L-kurtosis: 0.471") +
    annotate("text", x = 1.7, y = 5.9, size = 3.0, label = "(N = 7150)") +
    theme_light() +
    theme(panel.grid.minor.y = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid.minor.x = element_blank(),
          plot.title = element_text(size = 14, face = "bold"),
          axis.title.x = element_text(size = 11),
          axis.title.y = element_text(size = 11),
          axis.text.x = element_text(size = 10),
          axis.text.y = element_text(size = 10),
          plot.margin = unit(c(.5, .5, .5, .5), "cm"))

Thank you!

tjebo
  • 21,977
  • 7
  • 58
  • 94
  • Does this answer your question? [ggplot2: histogram with normal curve](https://stackoverflow.com/questions/6967664/ggplot2-histogram-with-normal-curve) – tjebo Dec 07 '21 at 11:19
  • I tried several times adjusting the code according to that answer but still can't get the right curve. I don't know which is my mistake and how to fix it – user14514023 Dec 07 '21 at 14:35

0 Answers0