1

I'm trying to plot a graph showing a regression line for the whole dataset (in my case, hematology data for different strains of male mice) as well as regression lines for individual strains. I saw a similar post (Regression line for the entire dataset together with regression lines based on groups in R ggplot2 ?), but it doesn't address colors of the regression lines, so it doesn't really help. I've assigned a color from rainbow to each strain to tell them apart on the graph. So ideally, the color of the points for a strain and the color of the regression line for that strain should match.

Edit: Sorry Jaap, I deleted all the ML$, and I get errors for variables not found. I added data=ML, and that still doesn't work. Also, when I added group=Color, the colors are still wrong. This is the original data ML:

ML <- structure(list(Strain = structure(c(15L, 15L, 15L, 15L, 33L, 
33L, 33L, 33L, 33L, 33L, 33L, 33L, 17L, 17L, 17L, 17L, 18L, 18L, 
18L, 9L, 9L, 9L, 35L, 35L, 35L, 35L, 28L, 28L, 28L, 28L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 12L, 12L, 12L, 12L, 23L, 23L, 23L, 
21L, 21L, 21L, 8L, 8L, 8L, 8L, 22L, 22L, 22L, 11L, 11L, 11L, 
11L, 13L, 13L, 13L, 13L, 3L, 3L, 3L, 3L, 14L, 14L, 14L, 22L, 
22L, 22L, 22L, 29L, 29L, 29L, 29L, 32L, 32L, 32L, 32L, 21L, 21L, 
21L, 21L, 24L, 24L, 24L, 24L, 16L, 16L, 16L, 3L, 3L, 3L, 3L, 
6L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 6L, 6L, 14L, 14L, 14L, 
14L, 31L, 31L, 20L, 20L, 26L, 26L, 26L, 26L, 4L, 4L, 4L, 5L, 
18L, 18L, 18L, 18L, 23L, 23L, 23L, 23L, 27L, 27L, 8L, 8L, 29L, 
29L, 29L, 29L, 17L, 17L, 34L, 27L, 27L, 27L, 27L, 9L, 9L, 9L, 
9L, 10L, 10L, 10L, 10L, 6L, 6L, 6L, 4L, 4L, 7L, 7L, 7L, 7L, 20L, 
20L, 20L, 20L, 25L, 25L, 25L, 25L, 30L, 30L, 30L, 30L, 19L, 19L, 
19L, 19L), .Label = c("129X1/SvJ", "A/J", "AXB10/PgnJ", "AXB13/PgnJ", 
"AXB15/PgnJ", "AXB19a/PgnJ", "AXB4/PgnJ", "AXB8/PgnJ", "BALB/cByJ", 
"BALB/cJ", "BTBRT+tf/J", "BXA1/PgnJ", "BXA12/PgnJ", "BXA13/PgnJ", 
"BXA14/PgnJ", "BXA16/PgnJ", "BXA4/PgnJ", "BXA8/PgnJ", "BXD40/TyJ", 
"BXD5/TyJ", "BXD75/RwwJ", "BXH10/TyJ", "BXH14/TyJ", "BXH8/TyJ", 
"C3H/HeJ", "C57BL/6J", "C58/J", "CXB12/HiAJ", "CXB2/ByJ", "DBA/2J", 
"LP/J", "NOD/ShiL_+J", "NOR/LtJ", "NZB/BINJ", "SJL/J"), class = "factor"), 
    Color = structure(c(9L, 9L, 9L, 9L, 28L, 28L, 28L, 28L, 28L, 
    28L, 28L, 28L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 20L, 20L, 
    20L, 26L, 26L, 26L, 26L, 19L, 19L, 19L, 19L, 31L, 31L, 31L, 
    31L, 25L, 25L, 25L, 25L, 14L, 14L, 14L, 14L, 2L, 2L, 2L, 
    4L, 4L, 4L, 22L, 22L, 22L, 22L, 3L, 3L, 3L, 16L, 16L, 16L, 
    16L, 7L, 7L, 7L, 7L, 32L, 32L, 32L, 32L, 8L, 8L, 8L, 3L, 
    3L, 3L, 3L, 21L, 21L, 21L, 21L, 29L, 29L, 29L, 29L, 4L, 4L, 
    4L, 4L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 32L, 32L, 32L, 32L, 
    35L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 35L, 35L, 8L, 8L, 8L, 
    8L, 30L, 30L, 5L, 5L, 15L, 15L, 15L, 15L, 33L, 33L, 33L, 
    34L, 12L, 12L, 12L, 12L, 2L, 2L, 2L, 2L, 17L, 17L, 22L, 22L, 
    21L, 21L, 21L, 21L, 11L, 11L, 27L, 17L, 17L, 17L, 17L, 20L, 
    20L, 20L, 20L, 18L, 18L, 18L, 18L, 35L, 35L, 35L, 33L, 33L, 
    24L, 24L, 24L, 24L, 5L, 5L, 5L, 5L, 13L, 13L, 13L, 13L, 23L, 
    23L, 23L, 23L, 6L, 6L, 6L, 6L), .Label = c("#000FFFFF", "#003AFFFF", 
    "#0066FFFF", "#0092FFFF", "#00BDFFFF", "#00E9FFFF", "#00FF0FFF", 
    "#00FF3AFF", "#00FF66FF", "#00FF92FF", "#00FFBDFF", "#00FFE9FF", 
    "#1D00FFFF", "#1DFF00FF", "#4900FFFF", "#49FF00FF", "#7500FFFF", 
    "#75FF00FF", "#A000FFFF", "#A0FF00FF", "#CC00FFFF", "#CCFF00FF", 
    "#F800FFFF", "#F8FF00FF", "#FF0000FF", "#FF002CFF", "#FF0057FF", 
    "#FF0083FF", "#FF00AFFF", "#FF00DBFF", "#FF2C00FF", "#FF5700FF", 
    "#FF8300FF", "#FFAF00FF", "#FFDB00FF"), class = "factor"), 
    y = c(48.4, 48.3, 49.9, 47.4, 48, 43, 41.6, 41.5, 43.4, 42.7, 
    42.3, 42.6, 54.2, 58.8, 60.1, 56.8, 41.6, 43.1, 43.1, 51.5, 
    54.2, 50.6, 46.2, 57, 52.1, 51.1, 59.8, 65.9, 58.8, 58.1, 
    54.6, 52.3, 53.9, 54.6, 51.8, 53.8, 60.5, 50.8, 41.7, 40.2, 
    40.6, 38.8, 47.6, 48.8, 46.9, 50.8, 48.5, 49.1, 39.4, 41.4, 
    41.1, 40.2, 43.6, 39.3, 45.2, 47.8, 59, 60.3, 45.8, 38.8, 
    39.1, 39.9, 40, 48.3, 49.5, 46.3, 45.6, 58.8, 57.7, 59.3, 
    56.1, 54.3, 46.6, 42.4, 47.9, 46.2, 43.9, 47.9, 61.3, 50.3, 
    49.1, 58.1, 47, 44.9, 48.1, 51.9, 58.9, 56.3, 61.9, 58.2, 
    50, 52.2, 45.9, 44.2, 47.2, 48.5, 44.8, 41.6, 46.1, 43.9, 
    49.5, 49.4, 50.7, 47.6, 48, 41.5, 39.9, 59.7, 59.1, 58, 57.4, 
    48.7, 55.4, 43.5, 46.4, 41.2, 45.5, 39.3, 47.3, 51.9, 48.8, 
    56.7, 51, 42.5, 41.6, 40.6, 41.5, 48.8, 47.9, 48.9, 47, 40.4, 
    41.7, 39.8, 41.6, 50.9, 46.6, 49.9, 45.3, 52.4, 59.6, 54.7, 
    39.5, 42.8, 39.9, 40.9, 54.3, 53.8, 53.6, 54.8, 53.6, 56.7, 
    56.7, 54.1, NA, NA, NA, NA, NA, 47.8, 51.6, 46.7, 47.5, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), x = c(26.6, 29.4, 26.8, 22.9, 9.5, 7.8, 5.6, 7.8, 9.1, 
    8.3, 8.3, 6.8, 24.9, 29, 30.7, 46.2, 26.7, 29.6, 25.5, 29.5, 
    34, 44.8, 6, 9.1, 7.2, 15.2, 44.1, 27.4, 43.5, 36.6, 37.5, 
    40, 34, 39.5, 25.3, 38, 25.8, 23.8, 22.9, 21.6, 16.4, 16.9, 
    28.1, 27.5, 36.2, 8.5, 17.9, 26.1, 13.9, 14.3, 27.6, 25.2, 
    26.4, 16, 17.5, 5, 6.6, 6.1, 3.9, 16.2, 17.5, 20.1, 13.4, 
    13.9, 18.6, 10.1, 11.7, 32.2, 35.9, 31.7, 19.8, 27.8, 17, 
    18.3, 36.2, 26.5, 20.6, 22.6, 11.3, 12.4, 7.4, 8.5, 16.2, 
    17.3, 19.7, 9.3, 20.3, 17.5, 44.9, 26.4, 21.4, 10.4, 13.6, 
    12.1, 20.6, 34.8, 5.5, 12.8, 18.3, 12.6, 18, 20.3, 19, 21.5, 
    17.1, 11.8, 10.6, 47, 45.5, 50.4, 46.4, 5.6, 6.1, 22.9, 21.7, 
    8.3, 9, 7.2, 11.2, 14.2, 12.8, 41.4, 10.4, 29.9, 17.2, 16.7, 
    20.9, 25.1, 17.1, 18.2, 11.2, 4.8, 7.4, 14.5, 13.5, 23.4, 
    19.1, 20, 18, 33.8, 15.8, 4.1, 7.2, 4.9, 5, 6, 40.3, 39.2, 
    25.8, 31.5, 48.2, 45.8, 43.3, 53.6, NA, NA, NA, NA, NA, 18.9, 
    18.5, 22, 24, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA)), .Names = c("Strain", "Color", "y", 
"x"), row.names = c(NA, -179L), class = "data.frame")

Here are the other variables that will be mentioned in my code.

#35, because there're 35 strains
#I did NameY and NameX since I was to put this thing into a loop to plot for may other variables   
MyRainbow <- rainbow(35)
NameY <- "MCV(fl)"
NameX <- "HCT(%)"

The correct colors for points should be like this:

enter image description here

I'm trying to add the regression lines for each strain in ggplot2, but I either get the wrong colors or I get the right colors for the points but the colors of the regression lines don't match those of the points. I'm not satisfied when the colors are not the ones I designated though the colors of the points match those of the corresponding regression lines, since I want to keep the color representing each strain consistent throughout my project (so other similar posts on Stack Overflow didn't answer my answer when it comes to this). Also, I reordered the strains in the dataframe ML, and that didn't make a change.

Here's the code for the scenario in which the colors are wrong but they match:

ggplot(ML, aes(y=y, x=x)) + 
  geom_point(na.rm=T, aes(color=ML$Color)) +
  geom_smooth(method="lm", na.rm=T, se=F, formula=y~x, aes(color=ML$Color)) +
  geom_smooth(method="lm", na.rm=T, se=T, formula=y~x, color="black", lwd=0.8) +
  scale_color_manual("Strain", values=MyRainbow, labels=ML$Strain, guide=F) +
  geom_text(x=(max(ML$x,na.rm=T)+min(ML$x,na.rm=T))/2, y=max(ML$y, na.rm=T), label=lm_eqn(ML), size=4, parse=T) +
  labs(x=paste(NameX), y=paste(NameY))

The image created:

enter image description here

Here's the code by which the points have the correct colors, but the colors of the regression lines don't match those of the points, and the colors of the regression lines are the same as those in the previous scinario:

ggplot(ML, aes(y=y, x=x)) + 
  geom_point(na.rm=T, color=ML$Color) +
  geom_smooth(method="lm", na.rm=T, se=F, formula=y~x, aes(color=ML$Color)) +
  geom_smooth(method="lm", na.rm=T, se=T, formula=y~x, color="black", lwd=0.8) +
  scale_color_manual("Strain", values=MyRainbow, labels=ML$Strain, guide=F) +
  geom_text(x=(max(ML$x,na.rm=T)+min(ML$x,na.rm=T))/2, y=max(ML$y, na.rm=T), label=lm_eqn(ML), size=4, parse=T) +
  labs(x=paste(NameX), y=paste(NameY))

The image produced can be found here:

Then when I do this:

ggplot(ML, aes(y=y, x=x)) + 
  geom_point(na.rm=T, color=ML$Color) +
  geom_smooth(method="lm", na.rm=T, se=F, formula=y~x, color=ML$Color) +
  geom_smooth(method="lm", na.rm=T, se=T, formula=y~x, color="black", lwd=0.8) +
  scale_color_manual("Strain", values=MyRainbow, labels=ML$Strain, guide=F) +
  geom_text(x=(max(ML$x,na.rm=T)+min(ML$x,na.rm=T))/2, y=max(ML$y, na.rm=T), label=lm_eqn(ML), size=4, parse=T) +
  labs(x=paste(NameX), y=paste(NameY))

I get an error: Incompatible lengths for set aesthetics: colour What's going wrong here? How can I turn the regression lines into the right colors?

Thank you very much for your help!

Community
  • 1
  • 1
Lambda Lu
  • 11
  • 1
  • 4
  • 1
    Without data is difficult to help! – Robert Oct 25 '15 at 08:56
  • 1
    A few points: 1) You can remove the `ML$` parts from your `ggplot` code, they unnecessary. 2) You can try whether adding `group=Color` gives a different result. 3) Please add some example data that [reproduce your problem](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example/5963610) (follow the link for some instructions on how to do that). – Jaap Oct 25 '15 at 08:56
  • 2
    @Jaap is spot on. Do not use `$` inside `aes`. That's probably your whole problem. – Roland Oct 25 '15 at 09:18

1 Answers1

1

Using the data you provided in your question (and the code below the plot)

bam

# create new dataset without missing data
df.ML <- na.omit(ML) 

ggplot(df.ML,aes(y = y, x = x)) +
     geom_point(aes(colour = Strain), size = 1, alpha = .8) + 
     guides(col = guide_legend(nrow = 16)) +
     geom_smooth(method="lm", se= F, aes(colour = Strain, group = Strain)) +
     geom_smooth(method = 'lm', colour = 'black', se = T)  +
     scale_colour_manual(values = df.ML$Color)

(retaining my initial answer in case this answers someone else's question).

Using the example you link to in your question adding , colour = group to the second geom_smooth() give me this blot (code below)

bam2

set.seed(1)
library(plyr)
alldata <- ddply(data.frame(group = letters[1:5], x = rnorm(50)), 'group', 
                 mutate, y=runif(1,-1,1) * x +rnorm(10))
library(ggplot2)
ggplot(alldata,aes(y = y, x = x)) +
     geom_point(aes(colour = group, shape = group), size = 3, alpha = .8) + 
     geom_smooth(method="lm", se= F, size = 1, aes(linetype = group, colour = group, group = group)) +
     geom_smooth(method = 'lm',size = 1, colour = 'black', se = F) + theme_bw()
Community
  • 1
  • 1
Eric Fail
  • 8,191
  • 8
  • 72
  • 128
  • Sorry Eric, I can get the color of the lines to match that of the points, but my further trouble is that it's not the color I designated by a variable in my dataframe called Color, but the one that ggplot2 assigned. – Lambda Lu Oct 25 '15 at 18:19
  • I see, would it be possible for you to share a some date with `dput()`? – Eric Fail Oct 25 '15 at 18:50
  • any chance you could share your data using `dput()`, make it a lot easier to grab it. – Eric Fail Oct 26 '15 at 14:42
  • OK, thanks for your patience. Now I've put the `dput()` version. – Lambda Lu Oct 27 '15 at 00:10