1

Here is the code I have for what I thought was a simple line graph

ggplot(data=top15andAllDatasummary.df, aes(x=years, y=calculations, group=1)) +
    geom_line() +
    geom_point()

And I got this error:

Error: Aesthetics must be either length 1 or the same as the data (16): x, y, group

I have data in a dataframe in r. My X-Axis was going to be years and the Y-axis was going to be some calculations (16 of them) I constructed per year.

Edited to add

structure(list(`2001` = c(349.315750645518, 217.47436370343, 
5.17963850977499, 126.661748432313, 57, 39), `2002` = c(703.26693877551, 
429.92, 9.32897959183673, 264.017959183673, 161, 108), `2003` = c(314.897774687065, 
193.792420027816, 4.08936022253129, 117.015994436718, 54, 37), 
    `2004` = c(305.988451086957, 190.680027173913, 3.87839673913043, 
    111.430027173913, 55, 38), `2005` = c(118.528015659408, 74.3175923660387, 
    1.50942011255199, 42.7010031808172, 10, 8), `2006` = c(120.531992244304, 
    73.8279205041202, 1.54362578768783, 45.1604459524964, 10, 
    8), `2007` = c(113.973899988451, 69.7619817530893, 1.44693382607691, 
    42.7649844092851, 10, 8), `2008` = c(110.676242590059, 67.3693570451436, 
    1.36285909712722, 41.9440264477884, 9, 7), `2009` = c(101.965558714192, 
    63.1446534003936, 1.22982724688388, 37.5910780669145, 9, 
    7), `2010` = c(93.9744360902256, 59.8894736842105, 1.14199785177229, 
    32.9429645542427, 9, 7), `2011` = c(91.8911316298046, 58.5660296328108, 
    1.15675327464033, 32.1683487223534, 9, 7), `2012` = c(91.2302181013592, 
    58.598356337583, 1.16773785691708, 31.4641239068591, 8, 6
    ), `2013` = c(87.1390443392165, 55.0509040034438, 1.10277658200603, 
    30.9853637537667, 8, 6), `2014` = c(85.7812132234942, 56.0456831068792, 
    1.09725045469134, 28.6382796619236, 8, 6), `2015` = c(88.331452900479, 
    58.526237360298, 1.22362959020756, 28.5815859499734, 8, 6
    )), .Names = c("2001", "2002", "2003", "2004", "2005", "2006", 
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
"2015"), row.names = c("AllDataMeanByYear", "AllDataMeanAggAssault", 
"AllDataMeanMurderManSlaughter", "AllDataMeanRobbery", "AllDataMedianByYear", 
"AllDataMedianAggAssault"), class = "data.frame")


All Code:

 ## Total
lwdata$total <- lwdata$murdermanslaughter + lwdata$Robbery +    lwdata$Aggravated_assault
## Data Calculations Top 15
top15 <- lwdata[lwdata$total >= lwdata$total[order(lwdata$Year, lwdata$total, decreasing=TRUE)][15] , ]
## Top 15 Means
Top15MeanByYear <- tapply(top15$total,top15$Year,mean)
Top15MeanAggAssault <- tapply(top15$Aggravated_assault,top15$Year,mean)
Top15MeanMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,mean)
Top15MeanRob <- tapply(top15$Robbery,top15$Year,mean)
## All Data Means
AllDataMeanByYear <- tapply(lwdata$total,lwdata$Year,mean)
AllDataMeanAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,mean)
AllDataMeanMurderManSlaughter <- tapply(lwdata$murdermanslaughter,lwdata$Year,mean)
AllDataMeanRobbery <- tapply(lwdata$Robbery,lwdata$Year,mean)
## Top 15 Medians
Top15MedianByYear <- tapply(top15$total,top15$Year,median)
Top15MedianAggAssault <- tapply(top15$Aggravated_assault,top15$Year,median)
Top15MedianMurderManSlaughter <- tapply(top15$murdermanslaughter,top15$Year,median)
Top15MedianRob <- tapply(top15$Robbery,top15$Year,median)
## All Data Medians
AllDataMedianByYear <- tapply(lwdata$total,lwdata$Year,median)
AllDataMedianAggAssault <- tapply(lwdata$Aggravated_assault,lwdata$Year,median)
AllDataMedianMurderManSlaughter <-  tapply(lwdata$murdermanslaughter,lwdata$Year,median)
AllDataMedianRobbery <- tapply(lwdata$Robbery,lwdata$Year,median)
## Rounding Data To Two Decimal Points
Top15MeanByYear <- round(Top15MeanByYear,digits=2)
Top15MeanAggAssault <- round(Top15MeanAggAssault,digits=2)
Top15MeanMurderManSlaughter <- round(Top15MeanMurderManSlaughter,digits=2)
Top15MeanRob <- round(Top15MeanRob,digits=2)
AllDataMeanByYear <- round(AllDataMeanByYear,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanAggAssault <- round(AllDataMeanAggAssault,digits=2)
AllDataMeanRobbery <- round(AllDataMeanRobbery,digits=2)
Top15MedianByYear <- round(Top15MedianByYear,digits=2)
Top15MedianAggAssault <- round(Top15MedianAggAssault,digits=2)
Top15MedianMurderManSlaughter <- round(Top15MedianMurderManSlaughter,digits=2)
Top15MedianRob <- round(Top15MedianRob,digits=2)
AllDataMedianByYear <- round(AllDataMedianByYear,digits=2)
AllDataMedianAggAssault <- round(AllDataMedianAggAssault,digits=2)
AllDataMedianMurderManSlaughter <-     round(AllDataMedianMurderManSlaughter,digits=2)
AllDataMedianRobbery <- round(AllDataMedianRobbery,digits=2)
## Summaries
AllDataSummary <- rbind(AllDataMeanByYear, AllDataMeanAggAssault, AllDataMeanMurderManSlaughter, AllDataMeanRobbery, AllDataMedianByYear, AllDataMedianAggAssault, AllDataMedianMurderManSlaughter, AllDataMedianRobbery)
Top15Summary <- rbind(Top15MeanByYear, Top15MeanAggAssault, Top15MeanMurderManSlaughter, Top15MeanRob,Top15MedianByYear,Top15MedianAggAssault,Top15MedianMurderManSlaughter,Top15MedianRob)
Top15andAllDatasummary <- rbind(AllDataSummary,Top15Summary)
## Class of New Items
class(AllDataSummary)
class(Top15Summary)
class(top15andAllDatasummary)
## Converting Matrices to Data Frames
AllDataSummary.df <- as.data.frame(AllDataSummary)
Top15Summary.df <- as.data.frame(Top15Summary)
Top15andAllDatasummary.df <- as.data.frame(Top15andAllDatasummary)
## Checking of New Classes
class(AllDataSummary.df)
class(Top15Summary.df)
class(Top15andAllDatasummary.df)
## Verifications for Names of New Components
colnames(Top15andAllDatasummary.df)
rownames(Top15andAllDatasummary.df)
## New Components
years <- colnames(Top15andAllDatasummary.df)
calculations <- colnames(Top15andAllDatasummary.df)
## Chicago
Chicago <- top15[which(top15$City=="Chicago"), ] 
## Basic Plots
plot(y=Chicago$total, x=Chicago$Year, type="l", xlab = "Year", ylab = "Total       Violent Crime (minus rape)", main="Chicago-Specific Data", col="blue")
## Data Types for Chicago
str(Chicago)

link to full >100K set of data is here

Laura Walker
  • 307
  • 2
  • 6
  • 16
  • 1
    Would help to have data. Please read http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example – s_baldur Oct 09 '16 at 17:40
  • I'm really new to R and I have to say I'm struggling with trying to get you this data even after looking at that site. I don't want to get kicked off because of this – Laura Walker Oct 09 '16 at 17:50
  • Please, try again. You won't get kicked off, but it is very difficult to help you without the data. Read about the function `dput` (in the link provided by @snoram). If you still find this hard, you can upload your data in a CSV file somewhere we can download it from. In the meantime, at least call `head(yourdata)` and show us the output so we have at least some idea. Also, connect ggplot layers by a single `+` and not two. – jakub Oct 09 '16 at 18:01
  • So your `data.frame` is `top15andAllDatasummary.df`.Could you do `dput(top15andAllDatasummary.df)` or `dput(head(top15andAllDatasummary.df))` and paste the results in your question as code? – s_baldur Oct 09 '16 at 18:02
  • I ran your code and it is giving me `Error in eval(expr, envir, enclos) : object 'years' not found`. This is logical, since *there is no variable called `years` in your data.* Please repeat the procedure with `dput` but this time provide the data on which you actually call the plot. – jakub Oct 09 '16 at 18:33
  • That's odd...because I ran this: > ## New Components years <- colnames(Top15andAllDatasummary.df) calculations <- colnames(Top15andAllDatasummary.df) – Laura Walker Oct 09 '16 at 18:38
  • Is this at all helpful? > str(Top15andAllDatasummary.df) 'data.frame': 16 obs. of 15 variables: $ 2001: num 349.32 217.47 5.18 126.66 57 ... $ 2002: num 703.27 429.92 9.33 264.02 161 ... $ 2003: num 314.9 193.79 4.09 117.02 54 ... $ 2004: num 305.99 190.68 3.88 111.43 55 ... $ 2005: num 118.53 74.32 1.51 42.7 10 ... $ 2006: num 120.53 73.83 1.54 45.16 10 ... $ 2007: num 113.97 69.76 1.45 42.76 10 ... $ 2008: num 110.68 67.37 1.36 41.94 9 ... $ 2009: num 101.97 63.14 1.23 37.59 9 ... (There were rows for 2010-15 but I ran out of space – Laura Walker Oct 09 '16 at 18:47
  • One problem I can see is this: In that ggplot call, you need to specify the variable for `x`. This needs to be the same length as your data - meaning the number of rows (16) - which it isn't, because the variable `years` has a different length (15). Also, please include *all the code* you have used directly in the question (so that we know what you have tried so far.) If you want years to be on the x axis, you need to restructure your data. – jakub Oct 09 '16 at 18:50
  • i've added all of my code and really hope it's not too overwhelming – Laura Walker Oct 09 '16 at 19:05

2 Answers2

6

Your data frame (let's call it df) has a column for each year, and rownames for each of your calculated variables. This is "wide" data, where each row has multiple data values. ggplot is meant to work with "long" data, in which each row has a single column containing a data value, and other columns that tell us things about that data point (i.e., what variable the data point represents and what year it's from.

The tidyverse library of packages, by Hadley Wickham (who also wrote ggplot), makes it easy to transform data from wide to long and back again.

library(tidyverse)

df.new <- mutate(df, variable = rownames(df)) %>% 
    gather(year, value, -variable)

                        variable year      value
1              AllDataMeanByYear 2001 349.315751
2          AllDataMeanAggAssault 2001 217.474364
3  AllDataMeanMurderManSlaughter 2001   5.179639
4             AllDataMeanRobbery 2001 126.661748
5            AllDataMedianByYear 2001  57.000000
6        AllDataMedianAggAssault 2001  39.000000
7              AllDataMeanByYear 2002 703.266939
8          AllDataMeanAggAssault 2002 429.920000
9  AllDataMeanMurderManSlaughter 2002   9.328980
10            AllDataMeanRobbery 2002 264.017959
11           AllDataMedianByYear 2002 161.000000
12       AllDataMedianAggAssault 2002 108.000000
13             AllDataMeanByYear 2003 314.897775
14         AllDataMeanAggAssault 2003 193.792420
15 AllDataMeanMurderManSlaughter 2003   4.089360
16            AllDataMeanRobbery 2003 117.015994
17           AllDataMedianByYear 2003  54.000000
18       AllDataMedianAggAssault 2003  37.000000
19             AllDataMeanByYear 2004 305.988451
20         AllDataMeanAggAssault 2004 190.680027
... and 70 more rows

This long data can then be sent to ggplot. Note that your original attempt used a variable called "years", which did not exist in the data frame. R (and ggplot) have no way of knowing that your column names (2001:2015) somehow magically represent years.

plot.years <- ggplot(data = df.new, aes(x = year, y = value, color = variable, group = variable)) +
    geom_line()
print(plot.years)

enter image description here

jdobres
  • 11,339
  • 1
  • 17
  • 37
3

Based on your data, I would do this:

library(tidyr)
top15andAllDatasummary.df$variable = rownames(top15andAllDatasummary.df)
df.long = gather(data = top15andAllDatasummary.df, 
                 key = years, 
                 value = calculations, 
                 -variable)

The point of this gather call is to restructure your data into this form:

head(df.long)
#                        variable years calculations
# 1             AllDataMeanByYear  2001   349.315751
# 2         AllDataMeanAggAssault  2001   217.474364
# 3 AllDataMeanMurderManSlaughter  2001     5.179639
# 4            AllDataMeanRobbery  2001   126.661748
# 5           AllDataMedianByYear  2001    57.000000
# 6       AllDataMedianAggAssault  2001    39.000000

Having done that, we can proceed to plotting:

ggplot(data = df.long, aes(x = years, 
                           y = calculations, 
                           group=variable, 
                           color=variable)) +
   geom_line() +
   geom_point()

Is this your desired result?

jakub
  • 4,774
  • 4
  • 29
  • 46
  • Oh my goodness, jakub, thank you so much! off to look into ways to give you credit or help scores or points... I may want to edit things but i'm hoping i have it from here! – Laura Walker Oct 09 '16 at 19:27
  • @laura It would be useful for others if you "accept" the answer (by clicking the check mark). In case of further questions regarding this particular piece of code, ask in the comments. – jakub Oct 09 '16 at 19:34
  • How would you subset AllDataMeanByYear? This didn't work: subset(AllDataSummary.df.new, subset=variable=“AllDataMeanByYear”) -> AllDataMeanByYea.Graph Error: unexpected '=' in "subset(AllDataSummary.df.new, subset=variable=" – Laura Walker Oct 09 '16 at 20:21
  • Try reading the help page for `subset` - you can invoke the help page by calling `?subset` in R. Look at the examples, they should give you a hint. – jakub Oct 09 '16 at 21:06
  • I tried that before the question i posted and i still got that error: > subset(df.long, variable==AllDataMeanByYear) Error in eval(expr, envir, enclos) : dims [product 15] do not match the length of object [240] – Laura Walker Oct 09 '16 at 21:17
  • First, I think you are missing quotes around `"AllDataMeanByYear"`. – jakub Oct 09 '16 at 21:20
  • Thank you... one last question promise. If I wanted to add another variable to this? subset(df.long, variable==AllDataMeanByYear) – Laura Walker Oct 09 '16 at 23:22
  • If you want to satisfy multiple conditions (i.e. one variable equals x AND another variable equals y) try `subset(df.long, variable == "AllDataMeanByYear" & AnotherVariable == "somevalue")`. You can read more about logical operatos (such as `&` or `|`) on [this page](http://www.statmethods.net/management/operators.html). It also has a [chapter on subsetting, including the subset function](http://www.statmethods.net/management/subset.html). – jakub Oct 10 '16 at 09:02
  • @laura as an addition to my previous comment, you can select multiple rows ("observations") from a single variable using the `%in%` operator (as in `variable %in% c("value1", "value2")`. – jakub Oct 10 '16 at 09:28
  • can someone please look at my question? https://stackoverflow.com/questions/65042472/r-tibble-vs-ggplot2-plotting-graphs thanks –  Nov 27 '20 at 19:26