I think I don't know exactly how ggplot2 legends work, but I have these few lines of code and I can't make him show one.
Here is the dataset:
dati <-
structure(list(quinquennio = c("1995-2000", "1996-2001", "1997-2002",
"1998-2003", "1999-2004", "2000-2005", "2001-2006", "2002-2007",
"2003-2008", "2004-2009", "2005-2010", "2006-2011", "2007-2012",
"2008-2013", "2009-2014", "2010-2015", "2011-2016", "2012-2017",
"2013-2018"), primo_anno = c(588402L, 586231L, 576434L, 562444L,
585496L, 585351L, 593010L, 617309L, 620897L, 613388L, 616645L,
627166L, 618343L, 604995L, 597915L, 598747L, 614302L, 610468L,
612675L), quinto_anno = c(372728L, 380211L, 387806L, 393974L,
401984L, 394144L, 396725L, 413596L, 417736L, 424143L, 426651L,
431424L, 427015L, 425553L, 430832L, 435158L, 452568L, 456038L,
461120L), quinquennio_ok = c("1995\n2000", "1996\n2001", "1997\n2002",
"1998\n2003", "1999\n2004", "2000\n2005", "2001\n2006", "2002\n2007",
"2003\n2008", "2004\n2009", "2005\n2010", "2006\n2011", "2007\n2012",
"2008\n2013", "2009\n2014", "2010\n2015", "2011\n2016", "2012\n2017",
"2013\n2018"), primo_anno_label = c("588k", "586k", "576k", "562k",
"585k", "585k", "593k", "617k", "620k", "613k", "616k", "627k",
"618k", "604k", "597k", "598k", "614k", "610k", "612k"), quinto_anno_label = c("372k",
"380k", "387k", "393k", "401k", "394k", "396k", "413k", "417k",
"424k", "426k", "431k", "427k", "425k", "430k", "435k", "452k",
"456k", "461k")), .Names = c("quinquennio", "primo_anno", "quinto_anno",
"quinquennio_ok", "primo_anno_label", "quinto_anno_label"), row.names = c(NA,
-19L), spec = structure(list(cols = structure(list(quinquennio = structure(list(), class = c("collector_character",
"collector")), primo_anno = structure(list(), class = c("collector_integer",
"collector")), quinto_anno = structure(list(), class = c("collector_integer",
"collector"))), .Names = c("quinquennio", "primo_anno", "quinto_anno"
)), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"), class = c("tbl_df",
"tbl", "data.frame"))
And here is the code:
ggplot(dati) +
geom_text(aes(x=quinquennio_ok, y=primo_anno, label=primo_anno_label, vjust=-1.1), color="dark blue") +
geom_text(aes(x=quinquennio_ok, y=quinto_anno, label=quinto_anno_label, vjust=2), color="dark red") +
geom_segment(
aes(x=quinquennio_ok,
y=primo_anno-4000,
xend=quinquennio_ok,
yend=quinto_anno+10000),
colour="dark blue", size=1, alpha=.4) +
geom_point(aes(x=quinquennio_ok,
y=primo_anno),
size=4, alpha=.5, color="dark blue", show.legend = TRUE) +
geom_point(aes(x=quinquennio_ok,
y=quinto_anno+8000),
size=3, alpha=.5, fill="dark blue", colour="dark blue", shape=25) +
geom_point(aes(x=quinquennio_ok,
y=quinto_anno),
size=3, alpha=.5, color="dark red", show.legend = TRUE) +
theme_minimal() +
theme(legend.position = c(.5,.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line.y = element_line(color="light grey"),
panel.grid.minor.y = element_line(color="light grey")) +
scale_y_continuous(breaks=seq(300000,700000,50000), limits=c(350000,650000),
labels = scales::unit_format(unit="k",scale=.001,sep="")) +
labs(x="Quinquenni",
y="Studenti iscritti",
title="Dispersione scolastica in Italia",
subtitle="Dal 1995 al 2018",
caption="Fonte: Report TuttoScuola 2018")
This is what I get:
I'd just like to have a legend to explain that the blue dots are the number of students enrolled on the first year of the five-year period, and the red ones are the students still enrolled on the last year.
If I'm way too far from the solution, I'd appreciate some reference links to study more about ggplot2 and legends.