I'm trying to plot a graph showing a regression line for the whole dataset (in my case, hematology data for different strains of male mice) as well as regression lines for individual strains. I saw a similar post (Regression line for the entire dataset together with regression lines based on groups in R ggplot2 ?), but it doesn't address colors of the regression lines, so it doesn't really help. I've assigned a color from rainbow
to each strain to tell them apart on the graph. So ideally, the color of the points for a strain and the color of the regression line for that strain should match.
Edit: Sorry Jaap, I deleted all the ML$, and I get errors for variables not found. I added data=ML, and that still doesn't work. Also, when I added group=Color, the colors are still wrong. This is the original data ML:
ML <- structure(list(Strain = structure(c(15L, 15L, 15L, 15L, 33L,
33L, 33L, 33L, 33L, 33L, 33L, 33L, 17L, 17L, 17L, 17L, 18L, 18L,
18L, 9L, 9L, 9L, 35L, 35L, 35L, 35L, 28L, 28L, 28L, 28L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 12L, 12L, 12L, 12L, 23L, 23L, 23L,
21L, 21L, 21L, 8L, 8L, 8L, 8L, 22L, 22L, 22L, 11L, 11L, 11L,
11L, 13L, 13L, 13L, 13L, 3L, 3L, 3L, 3L, 14L, 14L, 14L, 22L,
22L, 22L, 22L, 29L, 29L, 29L, 29L, 32L, 32L, 32L, 32L, 21L, 21L,
21L, 21L, 24L, 24L, 24L, 24L, 16L, 16L, 16L, 3L, 3L, 3L, 3L,
6L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 6L, 6L, 14L, 14L, 14L,
14L, 31L, 31L, 20L, 20L, 26L, 26L, 26L, 26L, 4L, 4L, 4L, 5L,
18L, 18L, 18L, 18L, 23L, 23L, 23L, 23L, 27L, 27L, 8L, 8L, 29L,
29L, 29L, 29L, 17L, 17L, 34L, 27L, 27L, 27L, 27L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 6L, 6L, 6L, 4L, 4L, 7L, 7L, 7L, 7L, 20L,
20L, 20L, 20L, 25L, 25L, 25L, 25L, 30L, 30L, 30L, 30L, 19L, 19L,
19L, 19L), .Label = c("129X1/SvJ", "A/J", "AXB10/PgnJ", "AXB13/PgnJ",
"AXB15/PgnJ", "AXB19a/PgnJ", "AXB4/PgnJ", "AXB8/PgnJ", "BALB/cByJ",
"BALB/cJ", "BTBRT+tf/J", "BXA1/PgnJ", "BXA12/PgnJ", "BXA13/PgnJ",
"BXA14/PgnJ", "BXA16/PgnJ", "BXA4/PgnJ", "BXA8/PgnJ", "BXD40/TyJ",
"BXD5/TyJ", "BXD75/RwwJ", "BXH10/TyJ", "BXH14/TyJ", "BXH8/TyJ",
"C3H/HeJ", "C57BL/6J", "C58/J", "CXB12/HiAJ", "CXB2/ByJ", "DBA/2J",
"LP/J", "NOD/ShiL_+J", "NOR/LtJ", "NZB/BINJ", "SJL/J"), class = "factor"),
Color = structure(c(9L, 9L, 9L, 9L, 28L, 28L, 28L, 28L, 28L,
28L, 28L, 28L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 20L, 20L,
20L, 26L, 26L, 26L, 26L, 19L, 19L, 19L, 19L, 31L, 31L, 31L,
31L, 25L, 25L, 25L, 25L, 14L, 14L, 14L, 14L, 2L, 2L, 2L,
4L, 4L, 4L, 22L, 22L, 22L, 22L, 3L, 3L, 3L, 16L, 16L, 16L,
16L, 7L, 7L, 7L, 7L, 32L, 32L, 32L, 32L, 8L, 8L, 8L, 3L,
3L, 3L, 3L, 21L, 21L, 21L, 21L, 29L, 29L, 29L, 29L, 4L, 4L,
4L, 4L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 32L, 32L, 32L, 32L,
35L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 35L, 35L, 8L, 8L, 8L,
8L, 30L, 30L, 5L, 5L, 15L, 15L, 15L, 15L, 33L, 33L, 33L,
34L, 12L, 12L, 12L, 12L, 2L, 2L, 2L, 2L, 17L, 17L, 22L, 22L,
21L, 21L, 21L, 21L, 11L, 11L, 27L, 17L, 17L, 17L, 17L, 20L,
20L, 20L, 20L, 18L, 18L, 18L, 18L, 35L, 35L, 35L, 33L, 33L,
24L, 24L, 24L, 24L, 5L, 5L, 5L, 5L, 13L, 13L, 13L, 13L, 23L,
23L, 23L, 23L, 6L, 6L, 6L, 6L), .Label = c("#000FFFFF", "#003AFFFF",
"#0066FFFF", "#0092FFFF", "#00BDFFFF", "#00E9FFFF", "#00FF0FFF",
"#00FF3AFF", "#00FF66FF", "#00FF92FF", "#00FFBDFF", "#00FFE9FF",
"#1D00FFFF", "#1DFF00FF", "#4900FFFF", "#49FF00FF", "#7500FFFF",
"#75FF00FF", "#A000FFFF", "#A0FF00FF", "#CC00FFFF", "#CCFF00FF",
"#F800FFFF", "#F8FF00FF", "#FF0000FF", "#FF002CFF", "#FF0057FF",
"#FF0083FF", "#FF00AFFF", "#FF00DBFF", "#FF2C00FF", "#FF5700FF",
"#FF8300FF", "#FFAF00FF", "#FFDB00FF"), class = "factor"),
y = c(48.4, 48.3, 49.9, 47.4, 48, 43, 41.6, 41.5, 43.4, 42.7,
42.3, 42.6, 54.2, 58.8, 60.1, 56.8, 41.6, 43.1, 43.1, 51.5,
54.2, 50.6, 46.2, 57, 52.1, 51.1, 59.8, 65.9, 58.8, 58.1,
54.6, 52.3, 53.9, 54.6, 51.8, 53.8, 60.5, 50.8, 41.7, 40.2,
40.6, 38.8, 47.6, 48.8, 46.9, 50.8, 48.5, 49.1, 39.4, 41.4,
41.1, 40.2, 43.6, 39.3, 45.2, 47.8, 59, 60.3, 45.8, 38.8,
39.1, 39.9, 40, 48.3, 49.5, 46.3, 45.6, 58.8, 57.7, 59.3,
56.1, 54.3, 46.6, 42.4, 47.9, 46.2, 43.9, 47.9, 61.3, 50.3,
49.1, 58.1, 47, 44.9, 48.1, 51.9, 58.9, 56.3, 61.9, 58.2,
50, 52.2, 45.9, 44.2, 47.2, 48.5, 44.8, 41.6, 46.1, 43.9,
49.5, 49.4, 50.7, 47.6, 48, 41.5, 39.9, 59.7, 59.1, 58, 57.4,
48.7, 55.4, 43.5, 46.4, 41.2, 45.5, 39.3, 47.3, 51.9, 48.8,
56.7, 51, 42.5, 41.6, 40.6, 41.5, 48.8, 47.9, 48.9, 47, 40.4,
41.7, 39.8, 41.6, 50.9, 46.6, 49.9, 45.3, 52.4, 59.6, 54.7,
39.5, 42.8, 39.9, 40.9, 54.3, 53.8, 53.6, 54.8, 53.6, 56.7,
56.7, 54.1, NA, NA, NA, NA, NA, 47.8, 51.6, 46.7, 47.5, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), x = c(26.6, 29.4, 26.8, 22.9, 9.5, 7.8, 5.6, 7.8, 9.1,
8.3, 8.3, 6.8, 24.9, 29, 30.7, 46.2, 26.7, 29.6, 25.5, 29.5,
34, 44.8, 6, 9.1, 7.2, 15.2, 44.1, 27.4, 43.5, 36.6, 37.5,
40, 34, 39.5, 25.3, 38, 25.8, 23.8, 22.9, 21.6, 16.4, 16.9,
28.1, 27.5, 36.2, 8.5, 17.9, 26.1, 13.9, 14.3, 27.6, 25.2,
26.4, 16, 17.5, 5, 6.6, 6.1, 3.9, 16.2, 17.5, 20.1, 13.4,
13.9, 18.6, 10.1, 11.7, 32.2, 35.9, 31.7, 19.8, 27.8, 17,
18.3, 36.2, 26.5, 20.6, 22.6, 11.3, 12.4, 7.4, 8.5, 16.2,
17.3, 19.7, 9.3, 20.3, 17.5, 44.9, 26.4, 21.4, 10.4, 13.6,
12.1, 20.6, 34.8, 5.5, 12.8, 18.3, 12.6, 18, 20.3, 19, 21.5,
17.1, 11.8, 10.6, 47, 45.5, 50.4, 46.4, 5.6, 6.1, 22.9, 21.7,
8.3, 9, 7.2, 11.2, 14.2, 12.8, 41.4, 10.4, 29.9, 17.2, 16.7,
20.9, 25.1, 17.1, 18.2, 11.2, 4.8, 7.4, 14.5, 13.5, 23.4,
19.1, 20, 18, 33.8, 15.8, 4.1, 7.2, 4.9, 5, 6, 40.3, 39.2,
25.8, 31.5, 48.2, 45.8, 43.3, 53.6, NA, NA, NA, NA, NA, 18.9,
18.5, 22, 24, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA)), .Names = c("Strain", "Color", "y",
"x"), row.names = c(NA, -179L), class = "data.frame")
Here are the other variables that will be mentioned in my code.
#35, because there're 35 strains
#I did NameY and NameX since I was to put this thing into a loop to plot for may other variables
MyRainbow <- rainbow(35)
NameY <- "MCV(fl)"
NameX <- "HCT(%)"
The correct colors for points should be like this:
I'm trying to add the regression lines for each strain in ggplot2, but I either get the wrong colors or I get the right colors for the points but the colors of the regression lines don't match those of the points. I'm not satisfied when the colors are not the ones I designated though the colors of the points match those of the corresponding regression lines, since I want to keep the color representing each strain consistent throughout my project (so other similar posts on Stack Overflow didn't answer my answer when it comes to this). Also, I reordered the strains in the dataframe ML, and that didn't make a change.
Here's the code for the scenario in which the colors are wrong but they match:
ggplot(ML, aes(y=y, x=x)) +
geom_point(na.rm=T, aes(color=ML$Color)) +
geom_smooth(method="lm", na.rm=T, se=F, formula=y~x, aes(color=ML$Color)) +
geom_smooth(method="lm", na.rm=T, se=T, formula=y~x, color="black", lwd=0.8) +
scale_color_manual("Strain", values=MyRainbow, labels=ML$Strain, guide=F) +
geom_text(x=(max(ML$x,na.rm=T)+min(ML$x,na.rm=T))/2, y=max(ML$y, na.rm=T), label=lm_eqn(ML), size=4, parse=T) +
labs(x=paste(NameX), y=paste(NameY))
The image created:
Here's the code by which the points have the correct colors, but the colors of the regression lines don't match those of the points, and the colors of the regression lines are the same as those in the previous scinario:
ggplot(ML, aes(y=y, x=x)) +
geom_point(na.rm=T, color=ML$Color) +
geom_smooth(method="lm", na.rm=T, se=F, formula=y~x, aes(color=ML$Color)) +
geom_smooth(method="lm", na.rm=T, se=T, formula=y~x, color="black", lwd=0.8) +
scale_color_manual("Strain", values=MyRainbow, labels=ML$Strain, guide=F) +
geom_text(x=(max(ML$x,na.rm=T)+min(ML$x,na.rm=T))/2, y=max(ML$y, na.rm=T), label=lm_eqn(ML), size=4, parse=T) +
labs(x=paste(NameX), y=paste(NameY))
The image produced can be found here:
Then when I do this:
ggplot(ML, aes(y=y, x=x)) +
geom_point(na.rm=T, color=ML$Color) +
geom_smooth(method="lm", na.rm=T, se=F, formula=y~x, color=ML$Color) +
geom_smooth(method="lm", na.rm=T, se=T, formula=y~x, color="black", lwd=0.8) +
scale_color_manual("Strain", values=MyRainbow, labels=ML$Strain, guide=F) +
geom_text(x=(max(ML$x,na.rm=T)+min(ML$x,na.rm=T))/2, y=max(ML$y, na.rm=T), label=lm_eqn(ML), size=4, parse=T) +
labs(x=paste(NameX), y=paste(NameY))
I get an error: Incompatible lengths for set aesthetics: colour What's going wrong here? How can I turn the regression lines into the right colors?
Thank you very much for your help!