1

I wrote the below code to create this cumulative plot but for some reason, the green line is showing thicker than the other lines on the plot.

enter image description here

Below is the code I am using:

plot(ecdf(data1[data1$Group=="0-25",]$Change_in_PM2.5),
     xlim=c(-1,1),
     xlab="ΔPM2.5 (µg/m³)",
     ylab="Cumulative Proportion",
     main="ΔPM2.5 (µg/m³) distribution across Minority rate ranges",
     col="orange")
lines(ecdf(data1[data1$Group=="25-50",]$Change_in_PM2.5),
      col="#CC6666")
lines(ecdf(data1[data1$Group=="50-75",]$Change_in_PM2.5),
      col="#9999CC")
lines(ecdf(data1[data1$Group=="75-100",]$Change_in_PM2.5),
      col="#66CC99")

abline(v=0, col="black", lty=2, lwd=1)
legend(x = c(0.8, 1), y = c(0, 0.45), 
       legend=c("0-25","25-50","50-75","75-100"),  # text in the legend
       cex = 0.77, x.intersp = 0.3, y.intersp = 0.3,
       col=c("orange", "#CC6666", "#9999CC", "#66CC99"),  # point colors
       pch=15,bty="n")  # specify the point type to be a square

Any idea why this would be the case?

Sample data looks like this:

enter image description here

Dput output:

structure(list(Minority_rate = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.41, 0, 0, 0, 0.5 ), Change_in_PM2.5 = c(-0.2465, -0.2424, -0.2332, -0.2313, -0.224, -0.2142, -0.2056, -0.1947, -0.1911, -0.1865, -0.1859, -0.1761, -0.1725, -0.1593, -0.1577, -0.1532, -0.1531, -0.1413, -0.1332, -0.1294, -0.119, -0.1159, -0.1153, -0.0993, -0.0962, -0.499, -0.0859, -0.0817, -0.0806, -0.4755), Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L), .Label = c("0-25", "25-50", "50-75", "75-100"), class = "factor"), Population = c(29308L, 36379L, 29395L, 29582L, 29588L, 23079L, 29546L, 29608L, 75403L, 36379L, 29629L, 36068L, 77297L, 81857L, 29186L, 34215L, 57007L, 36264L, 79050L, 31984L, 24278L, 59723L, 36197L, 21931L, 21257L, 77362L, 36396L, 67234L, 29504L, 186205L), Population_weighted_ChangeinPM2.5 = c(-5.4e-06, -6.6e-06, -5.13e-06, -5.12e-06, -4.96e-06, -3.7e-06, -4.54e-06, -4.31e-06, -1.08e-05, -5.07e-06, -4.12e-06, -4.75e-06, -9.97e-06, -9.75e-06, -3.44e-06, -3.92e-06, -6.53e-06, -3.83e-06, -7.88e-06, -3.1e-06, -2.16e-06, -5.18e-06, -3.12e-06, -1.63e-06, -1.53e-06, -2.89e-05, -2.34e-06, -4.11e-06, -1.78e-06, -6.62e-05)), row.names = c(NA, 30L), class = "data.frame")

T.P.
  • 63
  • 5
  • 1
    It looks like the green line is a series of dots/markers. Can’t explain it and without sample data can’t reproduce it either. – Dave2e Sep 14 '22 at 02:50
  • But the green line is set up similar to the other lines. – T.P. Sep 14 '22 at 17:22
  • Its not allowing me to add the csv file. But I just added a screenshot of the sample data in the description. – T.P. Sep 14 '22 at 22:11
  • 2
    Paste in the output of the `dput(head(data1, 30))` this will be 30 rows from your data frame. Then it will be easier to cut and paste the data. – Dave2e Sep 14 '22 at 22:13
  • You can share your data with the `dput` function: https://youtu.be/3EID3P1oisg?t=35 – Shawn Hemelstrand Sep 14 '22 at 22:15

1 Answers1

1

I am not sure how you originally created the above plot, since the default option just plots markers.
It seems that adding the "vertical=TRUE" and "do.points = FALSE" to the plot statements creates the lines which you are looking for.

Try this:

plot(ecdf(data1[data1$Group=="0-25",]$Change_in_PM2.5),
     xlim=c(-1,1),
     xlab="ΔPM2.5 (µg/m³)",
     ylab="Cumulative Proportion",
     main="ΔPM2.5 (µg/m³) distribution across Minority rate ranges",
     col="orange", verticals = TRUE, do.points = FALSE)
lines(ecdf(data1[data1$Group=="25-50",]$Change_in_PM2.5),
      col="#CC6666", verticals = TRUE, do.points = FALSE)
lines(ecdf(data1[data1$Group=="50-75",]$Change_in_PM2.5),
      col="#9999CC", verticals = TRUE, do.points = FALSE)
lines(ecdf(data1[data1$Group=="75-100",]$Change_in_PM2.5),
      col="#66CC99", verticals = TRUE, do.points = FALSE)

abline(v=0, col="black", lty=2, lwd=1)
legend(x = c(0.8, 1), y = c(0, 0.45), 
       legend=c("0-25","25-50","50-75","75-100"),  # text in the legend
       cex = 0.9, x.intersp = 0.3, y.intersp = 0.99,
       col=c("orange", "#CC6666", "#9999CC", "#66CC99"),  # point colors
       pch=19,bty="n")  # specify the point type to be a square
Dave2e
  • 22,192
  • 18
  • 42
  • 50