Can someone help me figuring out what's the mistake here? I've manages to draw the normal curve line on this histogram but probably with the wrong parameters, as the line is almost flat.
Here's the dataset
solomacro_long <– structure(list(year = c(1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
2013, 2014, 2015, 2016, 2017, 2018, 2019, 1995, 1996, 1997, 1998,
1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 1995,
1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
2018, 2019, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2018, 2019), country = c("Austria", "Austria",
"Austria", "Austria", "Austria", "Austria", "Austria", "Austria",
"Austria", "Austria", "Austria", "Austria", "Austria", "Austria",
"Austria", "Austria", "Austria", "Austria", "Austria", "Austria",
"Austria", "Austria", "Austria", "Austria", "Austria", "Belgium",
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium",
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium",
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium",
"Belgium", "Belgium", "Belgium", "Belgium", "Belgium", "Belgium",
"Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria",
"Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria",
"Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria",
"Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria", "Bulgaria",
"Bulgaria", "Croatia", "Croatia", "Croatia", "Croatia", "Croatia",
"Croatia", "Croatia", "Croatia", "Croatia", "Croatia", "Croatia",
"Croatia", "Croatia", "Croatia", "Croatia", "Croatia", "Croatia",
"Croatia", "Croatia", "Croatia", "Croatia", "Croatia", "Croatia",
"Croatia", "Croatia"), abv = c("aut", "aut", "aut", "aut", "aut",
"aut", "aut", "aut", "aut", "aut", "aut", "aut", "aut", "aut",
"aut", "aut", "aut", "aut", "aut", "aut", "aut", "aut", "aut",
"aut", "aut", "bel", "bel", "bel", "bel", "bel", "bel", "bel",
"bel", "bel", "bel", "bel", "bel", "bel", "bel", "bel", "bel",
"bel", "bel", "bel", "bel", "bel", "bel", "bel", "bel", "bel",
"bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr",
"bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr",
"bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "bgr", "hrv", "hrv",
"hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv",
"hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv", "hrv",
"hrv", "hrv", "hrv", "hrv", "hrv"), variable = c("macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth", "macro_1_growth",
"macro_1_growth", "macro_1_growth", "macro_1_growth"), value = c(NA,
0.0330438627237335, -0.0511254116453409, 0.0137458636086369,
0.0271686242318601, 0.00829201167178373, 0.0523974163847372,
-0.0354694651171017, -0.0374494817701539, 0.00146959647767253,
0.0496202619040307, 0.0302299392831828, 0.0130235783503618, -0.0328216723839782,
0.0423434653951649, 0.00220877922815199, -0.0128501989805205,
-0.00821314138290452, -0.006717458255453, -0.0471457219425651,
0.00546133415718586, -0.0298779682348522, -0.0487160187410649,
0.0079062937240455, -0.019891693409807, NA, -0.0108046160615598,
-0.020481626565286, -0.0282886358948745, 0.00274487354623876,
0.0185421531930665, -0.0253759187152902, -0.0263015327275199,
-0.0148870282905568, -0.0392297356777268, -0.0020267458659039,
-0.0541860979692431, 0.0531390906566396, 0.00568177746283549,
0.0240221453374323, -0.023872987774894, 0.0323777637704183, 0.00628046178136143,
-0.0107871033420092, -0.00912306155191778, -0.0276588806542145,
0.000932342467327985, -0.0812096241106091, 0.00717414678646944,
-0.00760474857650106, NA, 0.543407547351052, -0.5771767697921,
-0.0695159060223425, 0.00383600214018176, 0.0680609856304732,
-0.0168169725218662, -0.12258474734022, -0.394874977889188, 0.425469287011755,
0.221653828433469, -0.204587464219887, 0.735243962048276, -0.277316187667829,
0.37075452570192, -0.459738030435102, 0.00847566789099852, -0.0787951118823751,
0.0672889757962081, 0.0949321714229949, 0.0106549223289458, -0.00838782482855194,
-0.0979086531462122, 0.0645599176330203, -0.108896480985501,
NA, 0.00105809049367522, 0.0269054811247043, 0.0740537143417594,
0.0557272510974303, 0.0997243132832437, 0.00910803063993137,
-0.10155981032987, 0.139470241203099, 0.0095847787755341, 0.0601496859523869,
0.14813450820569, 0.0371361542556079, 0.0791355819902153, 0.0210970857693795,
0.148650996393849, -0.0860259529634987, -0.0377993657201044,
0.175765906918165, 0.206969328627839, -0.0545558853033026, 0.0788684920245653,
-0.0933878080531653, -0.0752863568686467, 0.0479445941743561)), row.names = c(NA,
-100L), groups = structure(list(country = c("Austria", "Belgium",
"Bulgaria", "Croatia"), .rows = structure(list(1:25, 26:50, 51:75,
76:100), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = c(NA, -4L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
I run the following code, using the whole dataset (7150 observations)
First, I recoded values higher than 2%, to have a single bar of those
solomacro_long$value[solomacro_long$value > 2.01] <- 2.06
solomacro_long$value[solomacro_long$value == 2.06] <- 2.00
Then I set the binwidth and, the number of observations and y breaks
bw = 0.03
n_obs = sum(!is.na(solomacro_long$value))
ybreaks = seq(0,1500,500)
Then I plotted the histogram, but I can't drawn properly the normal curve
solomacro_long %>% ggplot(aes(x = value)) +
geom_histogram(aes(y = ..density..), binwidth = bw, colour = "black") +
stat_function(fun = dnorm,
args = list(mean = mean(solomacro_long$value), sd = sd(solomacro_long$value)),
color = "darkred", linetype = "dashed", size = 0.7) +
scale_x_continuous(limits = c(-1, 2.06),
breaks = c(-1, 0, 1, 2)) +
scale_y_continuous("Density",
sec.axis = sec_axis(
trans = ~ . * bw * n_obs, name = "Counts", breaks = ybreaks)) +
xlab("Percentage change") +
ylab("Frequency") +
geom_rug() +
annotate("text", x = 2, y = 0.5, size = 3.5, label = "> 200%", fontface="bold") +
annotate("text", x = 1.7, y = 6.5, size = 3.0, label = "D stat: 0.402, p-value < 0.001") +
annotate("text", x = 1.7, y = 6.2, size = 3.0, label = "L-kurtosis: 0.471") +
annotate("text", x = 1.7, y = 5.9, size = 3.0, label = "(N = 7150)") +
theme_light() +
theme(panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
axis.title.x = element_text(size = 11),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.margin = unit(c(.5, .5, .5, .5), "cm"))
Thank you!