11

I have this data frame:

structure(list(month_num = 1:24, founded_month = c(4L, 5L, 6L, 
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 1L, 2L, 3L), founded_year = c(2008L, 2008L, 2008L, 
2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 
2010L, 2010L, 2010L), count = c(270L, 222L, 256L, 250L, 277L, 
268L, 246L, 214L, 167L, 408L, 201L, 225L, 203L, 220L, 230L, 225L, 
177L, 207L, 166L, 135L, 116L, 122L, 69L, 42L), month_abb = c("Apr", 
"May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Jan", 
"Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", 
"Nov", "Dec", "Jan", "Feb", "Mar"), short_year = c("08", "08", 
"08", "08", "08", "08", "08", "08", "08", "09", "09", "09", "09", 
"09", "09", "09", "09", "09", "09", "09", "09", "10", "10", "10"
), proj = c(282, 246, 292, 298, 337, 340, 330, 310, 275, 528, 
333, 369, 359, 388, 410, 417, 381, 423, 394, 375, 368, 386, 345, 
330), label = c("Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", 
"Nov", "Dec", "Jan\n09", "Feb", "Mar", "Apr", "May", "Jun", "Jul", 
"Aug", "Sep", "Oct", "Nov", "Dec", "Jan\n10", "Feb", "Mar")), .Names = c("month_num", 
"founded_month", "founded_year", "count", "month_abb", "short_year", 
"proj", "label"), row.names = c(NA, -24L), class = "data.frame")

and i've got all of this done (I know the code's a bit ugly looking, pointers appreciated):

p <- ggplot(m_summary2, aes(x = month_num, y = count))
p + 
geom_line(colour = rgb(0/255, 172/255, 0/255)) + geom_point(colour = rgb(0/255, 172/255,          
    0/255)) + 
geom_line(aes(x = m_summary2$month_num, y = m_summary2$proj), 
    colour = rgb(18/255, 111/255, 150/255)) + 
geom_point(aes(x = m_summary2$month_num, y = m_summary2$proj), colour = rgb(18/255,   
    111/255, 150/255)) +     
scale_x_continuous("Month", breaks = m_summary2$month_num, labels = m_summary2$label) + 
scale_y_continuous("# Startups Founded") + 
opts(title = paste("# Startups Founded:", m_summary2$month_abb[1], 
    m_summary2$short_year[1], "-", m_summary2$month_abb[nrow(m_summary2)],  
    m_summary2$short_year[nrow(m_summary2)]))

Now I would like to add a legend to clarify that the blue line is a projection and the green line is the current data. I would like to make the changes without altering the dataframe if possible.

Thanks in advance!

Dan
  • 6,008
  • 7
  • 40
  • 41

3 Answers3

5

You can easily achieve this by using melt (in the reshape package). Here is the code you add after you define the data frame.

id1 = c("month_num","founded_month", "founded_year","month_abb","short_year","label");   
m_summary3 = melt(m_summary2, id = id1);
p = ggplot(m_summary3, aes(x = month_num, y = value, group = variable, colour = variable));
c1 = rgb(0/255, 172/255, 0/255);
c2 = rgb(18/255, 111/255, 150/255);
x_scale = scale_x_continuous("Month", breaks = m_summary2$month_num, labels = m_summary2$label);
y_scale = scale_y_continuous("# Startups Founded")

p + geom_line() + scale_colour_manual(values = c(c1,c2)) + x_scale + y_scale;

Ramnath

Ramnath
  • 54,439
  • 16
  • 125
  • 152
  • so that will work, but I was hoping I could do it without altering the dataframe. I'll edit the question to reflect that. – Dan Apr 06 '10 at 01:19
  • well, you could define m_summary3 as a temporary data frame just to draw the figure this way. any reasons you would not want to have a temporary data frame? if you dont want to create m_summary3, then just pass data = melt(m_summary2, id = id1) to your ggplot command and that should take care of it – Ramnath Apr 06 '10 at 01:21
  • yeah i get that, and i could continually reshape dataframes, there really is nothing wrong with that approach. I was just hoping there was a way to do it within ggplot. – Dan Apr 06 '10 at 01:23
5

Here is a way to manually annotate your plot. I have assumed that you save the plot that you have printed as p2. So you need to add this code to what you already have.

 x1 = max(m_summary2$month_num)-3;
 y1 = m_summary2$count[x1];
 y2 = m_summary2$proj[x1];
 a1 = annotate("text", x = x1, y = y1, label = "Current", vjust = -2, hjust = 0.2, colour = c1);
 a2 = annotate("text", x = x1, y = y2, label = "Projection", vjust = -2, hjust = 0.2, colour = c2);       
 p2 + a1 + a2;

Let me know if this works!

Ramnath
  • 54,439
  • 16
  • 125
  • 152
  • after all of that, i'm going to give you the check mark on your first answer. I might as well start using more tools. Thank you for both of your solutions! – Dan Apr 06 '10 at 02:16
  • reshape and plyr (both by Hadley Wickham) are excellent tools to help you modify the structure of your data. there are great tutorials on both. here is one link: http://www.cerebralmastication.com/2009/10/kicking-ass-with-plry/ hope this helps! – Ramnath Apr 06 '10 at 02:21
  • yeah, i use them both. It just bothers me that they can obscure the human readability of a table sometimes. – Dan Apr 06 '10 at 05:55
  • true dan. but i cant imagine doing any data processing these days without plyr and reshape. it is such a godsend (thanks to hadley!) – Ramnath Apr 14 '10 at 15:13
1

This is another way to add a legend manually. This allows you to pick what color belongs to each legend name and can be used as a template. This is the explicit legend.

x <- 1:10
y <- x^2
z <- x^3
values = data.frame(x, y, z)
# Color has to be inside the aesthetic. 
ggplot(values, aes(x=x)) + 
geom_line(aes(y=y, 
              color="x^2")) + 
geom_line(aes(y=z, 
              color="x^3")) + 
scale_color_manual(name="", 
                   values=c("x^2"="cornflowerblue", "x^3"="lightgreen"))

This is a better way to define color variables. You tidy your data before the visualization. This is the implicit legend.

library(tidyverse)

sp500 = rnorm(10, 2400, 50)
nasdaq = rnorm(10, 6250, 100)
date = seq(Sys.Date(), Sys.Date()+9, 1)

dataMatrix = tibble(sp500, nasdaq, date)

dataMatrix %>% 
  # This creates a varaible for the indexes, which is used for coloring the lines. 
  gather(sp500, nasdaq, key="index", value="price") %>% 
    ggplot(aes(x=date, 
               y=price, 
               color=index)) +  
    geom_line() + 
  # This is used for customizing the legend. 
    scale_color_manual(
         name="Index",                   
         values=c("blue", "red"), 
         labels=c("Nasdaq", "S&P 500")) + 
  # This is used for customizing the plot descriptions. 
    labs(title="FINANCIAL MARKETS", 
         subtitle="USA INDEXES", 
         caption="MJR", 
         x="Date", 
         y="Price") 
mjr2000
  • 110
  • 5