22

Edit Updated the question title to reflect that this problem can be generalised to "any two lines", and does not necessarily need to be a fixed y in one line.

Considering the following polygon plot:

ggplot(df, aes(x=year,y=afw)) +
  geom_polygon() +
  scale_x_continuous("", expand=c(0,0), breaks=seq(1910,2010,10)) +
  theme_bw()

enter image description here

However, i want to fill this with two different colors. For example red for the black areas above 0 and blue for the black areas below 0. Unfortunately, using fill=col doesn't fill the correct areas.

I tried the following code (I added the geom_line in order to illustrate where the border of the fill should be):

ggplot(df, aes(x=year,y=afw)) +
  geom_line() +
  geom_polygon(aes(fill=col), alpha=0.5) +
  scale_x_continuous("", expand=c(0,0), breaks=seq(1910,2010,10)) +
  theme_bw()

which gives: enter image description here

As you can see, it's filling a lot more than it's supposed to do. How can I solve this?

The data:

df <- structure(list(year = c(1901, 1901, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2013, 2013), afw = c(0, 0, -0.246246074793035, -2.39463317156723, -2.39785897801884, 0.840850699400514, -0.843020268341422, -3.02043962318013, -0.033342848986583, -2.04947188124465, -0.00431059092206709, 2.49568940907793, 1.96988295746503, 2.26665715101342, 0.986011989723095, 1.79568940907793, 2.06665715101342, -0.601084784470454, -3.21076220382529, 2.65052811875535, 0.46988295746503, -1.09140736511562, 0.0505281187553526, 1.41827005423922, -2.80108478447045, 0.611818441335997, -1.83011704253497, -0.30753639737368, -4.43011704253497, -0.897858978018841, 1.98601198972309, -0.965600913502712, 0.0795603768198685, 0.308592634884385, -5.33011704253497, 4.00214102198116, -0.594633171567228, 0.0698829574650297, -1.60753639737368, -2.81398801027691, -2.21398801027691, -2.4365686554382, 1.53439908649729, 1.06665715101342, -1.87205252640594, -0.688181558664002, 0.0569797316585783, -3.51398801027691, 0.979560376819868, 0.289237796174707, 1.24085069940051, -4.39140736511562, 1.13117328004567, -1.72689123608336, 2.20214102198116, 2.27310876391664, 1.46665715101342, 2.18278618327148, -0.23011704253497, 1.50536682843277, 1.17633457036826, -0.0785041393091639, -1.54947188124465, -3.85269768769626, -4.31398801027691, -0.80753639737368, 1.27956037681987, 1.2376248929489, 0.195689409077933, -3.38172994576078, -4.88172994576078, -0.675278332857551, 2.25375392520697, 0.0924636026263199, -0.446246074793035, 4.06988295746503, 0.350528118755352, -1.48172994576078, 1.81504424778761, -1.42689123608336, 2.22472166714245, 0.376334570368256, -3.88495575221239, 0.211818441335998, 0.586011989723094, 1.14407650585213, 2.55697973165858, 1.92794747359406, 1.20214102198116, 3.83439908649729, 1.64407650585213, 0.986011989723095, 0.753753925206965, 0.508592634884385, 1.911818441336, 2.11504424778761, -4.06560091350271, -2.58495575221239, 1.80859263488438, 1.37956037681987, 1.58923779617471, 1.88601198972309, -0.323665429631744, -0.291407365115615, 0.818270054239223, 0.0569797316585783, 0.795689409077933, 3.32472166714245, 0.595689409077933, -0.733342848986583, -0.955923494147874, -4.32689123608336, 3.29891521552955, 1.85697973165858, 2.74407650585213, 0, 0), col = structure(c(1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L), .Label = c("B", "A"), class = "factor")), .Names = c("year", "afw", "col"), class = c("tbl_df", "data.frame"), row.names = c(NA, -117L))

Note: as you can see in the data, there are 3 rows for both 1901 and 2013. I did this because I wanted to get the fill right. Although the black fill is correct, I seem not to get a working solution with colors.

The original dataset:

orig <- structure(list(year = c(1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013), afw = c(-0.246246074793035, -2.39463317156723, -2.39785897801884, 0.840850699400514, -0.843020268341422, -3.02043962318013, -0.033342848986583, -2.04947188124465, -0.00431059092206709, 2.49568940907793, 1.96988295746503, 2.26665715101342, 0.986011989723095, 1.79568940907793, 2.06665715101342, -0.601084784470454, -3.21076220382529, 2.65052811875535, 0.46988295746503, -1.09140736511562, 0.0505281187553526, 1.41827005423922, -2.80108478447045, 0.611818441335997, -1.83011704253497, -0.30753639737368, -4.43011704253497, -0.897858978018841, 1.98601198972309, -0.965600913502712, 0.0795603768198685, 0.308592634884385, -5.33011704253497, 4.00214102198116, -0.594633171567228, 0.0698829574650297, -1.60753639737368, -2.81398801027691, -2.21398801027691, -2.4365686554382, 1.53439908649729, 1.06665715101342, -1.87205252640594, -0.688181558664002, 0.0569797316585783, -3.51398801027691, 0.979560376819868, 0.289237796174707, 1.24085069940051, -4.39140736511562, 1.13117328004567, -1.72689123608336, 2.20214102198116, 2.27310876391664, 1.46665715101342, 2.18278618327148, -0.23011704253497, 1.50536682843277, 1.17633457036826, -0.0785041393091639, -1.54947188124465, -3.85269768769626, -4.31398801027691, -0.80753639737368, 1.27956037681987, 1.2376248929489, 0.195689409077933, -3.38172994576078, -4.88172994576078, -0.675278332857551, 2.25375392520697, 0.0924636026263199, -0.446246074793035, 4.06988295746503, 0.350528118755352, -1.48172994576078, 1.81504424778761, -1.42689123608336, 2.22472166714245, 0.376334570368256, -3.88495575221239, 0.211818441335998, 0.586011989723094, 1.14407650585213, 2.55697973165858, 1.92794747359406, 1.20214102198116, 3.83439908649729, 1.64407650585213, 0.986011989723095, 0.753753925206965, 0.508592634884385, 1.911818441336, 2.11504424778761, -4.06560091350271, -2.58495575221239, 1.80859263488438, 1.37956037681987, 1.58923779617471, 1.88601198972309, -0.323665429631744, -0.291407365115615, 0.818270054239223, 0.0569797316585783, 0.795689409077933, 3.32472166714245, 0.595689409077933, -0.733342848986583, -0.955923494147874, -4.32689123608336, 3.29891521552955, 1.85697973165858, 2.74407650585213)), .Names = c("year", "afw"), class = c("tbl_df", "data.frame"), row.names = c(NA, -113L))
tjebo
  • 21,977
  • 7
  • 58
  • 94
Jaap
  • 81,064
  • 34
  • 182
  • 193

4 Answers4

19

Get the indices where the y value of two consecutive time steps have different sign. Use linear interpolation between these points to generate new x values where y is zero.

First, a smaller example to make it easier to get a feeling for the linear interpolation and which points are added to the original data:

# original data
d <- data.frame(x = 1:6,
                y = c(-1, 2, 1, 2, -1, 1))

# coerce to data.table
library(data.table)
setDT(d)

# make sure data is ordered by x
setorder(d, x)

# add a grouping variable
# only to keep track of original and interpolated points in this example
d[ , g := "orig"]

# interpolation
d2 = d[ , {
  ix = .I[c(FALSE, abs(diff(sign(d$y))) == 2)]
  if(length(ix)){
    pred_x = sapply(ix, function(i) approx(x = y[c(i-1, i)], y = x[c(i-1, i)], xout = 0)$y)
    rbindlist(.(.SD, data.table(x = pred_x, y = 0, g = "new")))} else .SD
}]

d2   
#           x  y  grp
# 1  1.000000 -1 orig
# 2  2.000000  2 orig
# 3  3.000000  1 orig
# 4  4.000000  2 orig
# 5  5.000000 -1 orig
# 6  6.000000  1 orig
# 13 1.333333  0  new
# 11 4.666667  0  new
# 12 5.500000  0  new

Plot with original and new points differentiated by color:

ggplot(data = d2, aes(x = x, y = y)) +
  geom_area(data = d2[y <= 0], fill = "red", alpha = 0.2) +
  geom_area(data = d2[y >= 0], fill = "blue", alpha = 0.2) +
  geom_point(aes(color = g), size = 4) +
  scale_color_manual(values = c("red", "black")) +
  theme_bw()

enter image description here


Apply on OP's data:

d = as.data.table(orig)
# setorder(d, year)

d2 = d[ , {
  ix = .I[c(FALSE, abs(diff(sign(d$afw))) == 2)]
  if(length(ix)){
    pred_yr = sapply(ix, function(i) approx(afw[c(i-1, i)], year[c(i-1, i)], xout = 0)$y)
    rbindlist(.(.SD, data.table(year = pred_yr, afw = 0)))} else .SD}]

ggplot(data = d2, aes(x = year, y = afw)) +
  geom_area(data = d2[afw <= 0], fill = "red") +
  geom_area(data = d2[afw >= 0], fill = "blue") +
  theme_bw()

enter image description here


In reply to @Jason Whythe's comment, the method above can be modified to account for grouped data. The interpolation is made within each group, and the plot is facetted by group:

# data grouped by 'id' 
d = data.table(
  id = rep(c("a", "b", "c"), c(6, 5, 4)),
  x = as.numeric(c(1:6, 1:5, 1:4)),
  y = c(-1, 2, 1, 2, -1, 1,
        0, -2, 0, -1, -2, 
        2, 1, -1, 1.5))

# again, this variable is just added for illustration 
d[ , g := "orig"]

d2 = d[ , {
  ix = .I[c(FALSE, abs(diff(sign(.SD$y))) == 2)]
  if(length(ix)){
    pred_x = sapply(ix, function(i) approx(x = d$y[c(i-1, i)], y = d$x[c(i-1, i)], xout = 0)$y)
    rbindlist(.(.SD, data.table(x = pred_x, y = 0, g = "new")))} else .SD
}, by = id]

ggplot(data = d2, aes(x = x, y = y)) +
  facet_wrap(~ id) +
  geom_area(data = d2[y <= 0], fill = "red", alpha = 0.2) +
  geom_area(data = d2[y >= 0], fill = "blue", alpha = 0.2) +
  geom_point(aes(color = g), size = 4) +
  scale_color_manual(values = c("red", "black")) +
  theme_bw()

enter image description here


For an alternative base solution adapted from @kohske's answer here (credits to him), see previous edits.

Henrik
  • 65,555
  • 14
  • 143
  • 159
  • Also see [base plot solution for `y != 0`](https://stackoverflow.com/a/54427945/6574038). – jay.sf Jan 29 '19 at 19:11
  • @Henrik, say the data set "orig" considered above was just the data for one particular unit (or location, or similar), in a data set new.orig of multiple units. Any thoughts on how you would apply your solution in a facet_wrap situation? Would you start with a group_by(new.orig,unit) and apply a modified rx function to each group in that? – Jason Whyte Jun 23 '20 at 06:42
  • 1
    @JasonWhyte Thanks for your feedback. I have added an example which accounts for grouped data. Also note that I updated the interpolation procedure. Please let me know if anything is unclear. Cheers. – Henrik Jun 23 '20 at 14:46
18

So this is not perfect and I'm interested to see what others come up with...

The reason for the "multiple" colored areas is that a single polygon is bounded by the data points and the data points are not actually zero.

To solve this, we can interpolate using approx(). For a perfect solution, you would need to determine exactly where the line crosses zero.

interp <- approx(orig$year, orig$afw, n=10000)

orig2 <- data.frame(year=interp$x, afw=interp$y)
orig2$col[orig2$afw >= 0] <- "pos"
orig2$col[orig2$afw < 0] <- "neg"

ggplot(orig2, aes(x=year, y=afw)) +
  geom_area(aes(fill=col)) +
  geom_line() +
  geom_hline(yintercept=0)

Solution

However, you will see this still has issues when you zoom:

Zoomed


To elaborate on my statement above (and further illustrate the original "problem/issue"), consider what happens when you plot each of the original positive and negative datasets separately:

p1 <- ggplot(subset(orig, col == "neg"), aes(x = year, y = afw)) +
  geom_area(aes(fill=col)) +
  scale_fill_manual(values = c("#FF3030", "#00CC66"))

p2 <- ggplot(subset(orig, col == "pos"), aes(x = year, y = afw)) +
  geom_area(aes(fill=col)) +
  scale_fill_manual(values = c("#00CC66", "#FF3030"))

library(gridExtra)

grid.arrange(p2, p1)

Multiple Plots


Of course, you could always solve this by utilizing a different type of visualization:

ggplot(data = orig, aes(x = year, y = afw)) +
  geom_bar(stat = "identity", aes(fill=col), colour = "white")

Alternate Solution

JasonAizkalns
  • 20,243
  • 8
  • 57
  • 116
  • I think your final comment on a geom_bar() solution is very sensible. I'm looking at differences between regularly spaced time points, so there's no need to interpolate to find x-axis crossings. Also, your geom_bar() solution is much easier to implement. – Jason Whyte Jun 24 '20 at 09:55
4

As this "polygon" plot is in fact a line plot with fill below or above, one can make use of ggh4x::stat_difference. The advantage is the simplicity of the code, and you can use the original data.

An alternative package is {ggbraid} which (as of Feb 2023) is not on CRAN for R >= 4.2.2 and you will need to install the development version.

Another alternative is to use two ribbons, but the result is not the most satisfying without a rather involved computation of intersections (see third option below).

With the ggh4x package

orig <- structure(list(year = c(1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013), afw = c(-0.246246074793035, -2.39463317156723, -2.39785897801884, 0.840850699400514, -0.843020268341422, -3.02043962318013, -0.033342848986583, -2.04947188124465, -0.00431059092206709, 2.49568940907793, 1.96988295746503, 2.26665715101342, 0.986011989723095, 1.79568940907793, 2.06665715101342, -0.601084784470454, -3.21076220382529, 2.65052811875535, 0.46988295746503, -1.09140736511562, 0.0505281187553526, 1.41827005423922, -2.80108478447045, 0.611818441335997, -1.83011704253497, -0.30753639737368, -4.43011704253497, -0.897858978018841, 1.98601198972309, -0.965600913502712, 0.0795603768198685, 0.308592634884385, -5.33011704253497, 4.00214102198116, -0.594633171567228, 0.0698829574650297, -1.60753639737368, -2.81398801027691, -2.21398801027691, -2.4365686554382, 1.53439908649729, 1.06665715101342, -1.87205252640594, -0.688181558664002, 0.0569797316585783, -3.51398801027691, 0.979560376819868, 0.289237796174707, 1.24085069940051, -4.39140736511562, 1.13117328004567, -1.72689123608336, 2.20214102198116, 2.27310876391664, 1.46665715101342, 2.18278618327148, -0.23011704253497, 1.50536682843277, 1.17633457036826, -0.0785041393091639, -1.54947188124465, -3.85269768769626, -4.31398801027691, -0.80753639737368, 1.27956037681987, 1.2376248929489, 0.195689409077933, -3.38172994576078, -4.88172994576078, -0.675278332857551, 2.25375392520697, 0.0924636026263199, -0.446246074793035, 4.06988295746503, 0.350528118755352, -1.48172994576078, 1.81504424778761, -1.42689123608336, 2.22472166714245, 0.376334570368256, -3.88495575221239, 0.211818441335998, 0.586011989723094, 1.14407650585213, 2.55697973165858, 1.92794747359406, 1.20214102198116, 3.83439908649729, 1.64407650585213, 0.986011989723095, 0.753753925206965, 0.508592634884385, 1.911818441336, 2.11504424778761, -4.06560091350271, -2.58495575221239, 1.80859263488438, 1.37956037681987, 1.58923779617471, 1.88601198972309, -0.323665429631744, -0.291407365115615, 0.818270054239223, 0.0569797316585783, 0.795689409077933, 3.32472166714245, 0.595689409077933, -0.733342848986583, -0.955923494147874, -4.32689123608336, 3.29891521552955, 1.85697973165858, 2.74407650585213)), .Names = c("year", "afw"), class = c("tbl_df", "data.frame"), row.names = c(NA, -113L))

library(ggh4x)
#> Loading required package: ggplot2

ggplot(orig, aes(x = year)) +
  ## ymin can be set to any level of reference
  ggh4x::stat_difference(aes(ymin = 0, ymax = afw)) +
  geom_line(aes(y = afw)) +
  labs(fill = NULL)

Or use the ggbraid package

## as of Feb 2023, the current CRAN version does not work with R >= 4.2.2
# remotes::install_github("nsgrantham/ggbraid")

library(ggbraid)
library(ggplot2)
ggplot(orig, aes(x = year)) +
  geom_line(aes(y = afw)) +
  geom_braid(aes(ymin = 0, ymax = afw, fill = afw < 0)) 
#> `geom_braid()` using method = 'line'

alternatively, use two ribbons

The problem here is that the fill does not match the line exactly. If you want to go down that route, and want the fill to match perfectly, you will need to calculate the intersecting points as suggested by user Z.Lin in this answer.

## using this more than once, thus I like to add this as a variable
my_lev <- 0

ggplot(data = orig, aes(x = year)) +
  geom_ribbon(aes(
    ymin = my_lev, ymax = ifelse(afw > my_lev, afw, my_lev),
  ), fill = "blue") +
  geom_ribbon(aes(
    ymax = my_lev, ymin = ifelse(afw > my_lev, my_lev, afw)
  ), fill = "red") +
geom_line(aes(y = afw))

Created on 2022-07-14 by the reprex package (v2.0.1)

tjebo
  • 21,977
  • 7
  • 58
  • 94
0
orig 

orig_1 = orig
orig_pos <- ifelse(orig_1$afw <= 0, 0, orig_1$afw) #positive when y >0

orig_2 = orig
orig_neg <- ifelse(orig2$afw > 0, 0, orig$afw) #negative when y<0


df <- cbind.data.frame(orig, orig_neg, orig_pos) # dataframe of orig_neg < y < orig_pos

ggplot(df)+
  geom_area(aes(year, orig_pos), fill = "blue") +
  geom_area(aes(year, orig_neg), fill = "red") +
  theme_bw()+
  scale_x_continuous("", expand=c(0,0), breaks=seq(1910,2010,10))
  • 1
    Welcome to stackoverflow. Please edit your answer and include a brief explanation of the code and how it helps solve the problem in the question. – bad_coder Mar 24 '20 at 05:04