0

I have a df that has 3 subgroup means at 8 time observations ea., except some subgroups weren't observed at specific times. However, there isn't an NA entry in my df... there just isn't a row for that subgroup at that time point at all. Because there is no NA, my geom_line connects through these timepoints, even though there is no observation. I find this misleading and would like to have the geom_line break where there were no subgroup observations.

I tried to insert a new row to stand in for these non-observations.

PE.8Cs[20,]<-c(1,5,"Colleague",NA)

I also tried NA and "NA" and as.integer(NA). The one in the vector represents the type of observation (not important for this - they are all ones). The 5 is the timepoint (out of 8) that is missing for the particular subgroup. Without the additional rows, the df has 19 rows.

Adding this line to the df results in ggplot throwing the error "Discrete value supplied to continuous scale".

Here's the df:

structure(list(item = c("1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), cycle_year = c(1, 
2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8), Group = structure(c(3L, 
1L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 
2L, 3L), .Label = c("Colleague", "Comparison", "TL"), class = "factor"), 
    mean_score = c(2, 3, 2.73684210526316, 2.8, 2.45, 1.875, 
    1.86153846153846, 2.22388059701493, 1.65573770491803, 2.25, 
    3.0625, 2.75, 2.5, 2.53846153846154, 2.5, 2.83720930232558, 
    2.43333333333333, 2.53846153846154, 2.03703703703704)), .Names = c("item", 
"cycle_year", "Group", "mean_score"), row.names = c(NA, 19L), class = "data.frame")

Here's the plot:

plot7.PE.8Cs<-ggplot()+
  annotate("rect",xmin=4.5,xmax=8.5,ymin=-Inf,ymax=Inf, alpha=.5, fill="gray")+
  geom_line(data=PE.8Cs,aes(x=cycle_year,y=mean_score,color=Group), size=1.5 )+
  geom_point(data=PE.8Cs,aes(x=cycle_year,y=mean_score,color=Group), size=3)+
  ylab("Indicator Mean")+
  ggtitle("Procedural Explanation\nTwo Year Data Collection Intervals, by Subgroup")+
  xlab("Data Collection Interval")+
  theme_gdocs()+
  coord_cartesian(xlim=c(1, 8),ylim=c(1,4))+
  theme(legend.title = element_blank(),plot.title = element_text(hjust = 0.5, size=12), axis.text.x = element_text(size=11,angle=315,vjust=0))+
  scale_x_continuous(breaks = 1:8, labels = c("8.1.16-\n10.15.16","10.16.16-\n12.31.16","1.1.17-\n3.15.17","3.16.17-\n6.30.17",
                                              "8.1.17-\n10.15.17","10.16.17-\n12.31.17","1.1.18-\n3.15.18","3.16.18-\n6.30.18"))
plot7.PE.8Cs

Again, I would just like geom_line to break on the timepoints with no subgroup observations, rather that connect through them.

Cody Gray - on strike
  • 239,200
  • 50
  • 490
  • 574
KLB
  • 57
  • 7
  • 2
    Instead of describing your code, it might be advantageous to copy and paste it. – jay.sf Jun 15 '19 at 07:08
  • [Reproducible Example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) please. – M-- Jun 15 '19 at 07:21

1 Answers1

0

This could work:

PE.8Cs <- structure(list(item = c("1", "1", "1", "1", "1", "1", "1", "1", 
                        "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), cycle_year = c(1, 
                                                                                               2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8), Group = structure(c(3L, 
                                                                                                                                                                          1L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 
                                                                                                                                                                          2L, 3L), .Label = c("Colleague", "Comparison", "TL"), class = "factor"), 
               mean_score = c(2, 3, 2.73684210526316, 2.8, 2.45, 1.875, 
                              1.86153846153846, 2.22388059701493, 1.65573770491803, 2.25, 
                              3.0625, 2.75, 2.5, 2.53846153846154, 2.5, 2.83720930232558, 
                              2.43333333333333, 2.53846153846154, 2.03703703703704)), .Names = c("item", 
                                                                                                 "cycle_year", "Group", "mean_score"), row.names = c(NA, 19L), class = "data.frame")

library(tidyverse)

PE.8Cs <- PE.8Cs %>% as_tibble
PE.8Cs <- bind_rows(PE.8Cs, tibble(item = '1',cycle_year = 7,Group = 'Colleague',mean_score = NA))


plot7.PE.8Cs<-PE.8Cs %>%ggplot(aes(x=cycle_year,y=mean_score,color=Group))+
  annotate("rect",xmin=4.5,xmax=8.5,ymin=-Inf,ymax=Inf, alpha=.5, fill="gray")+
  geom_line(size=1.5 )+
  geom_point( size=3)+
  ylab("Indicator Mean")+
  ggtitle("Procedural Explanation\nTwo Year Data Collection Intervals, by Subgroup")+
  xlab("Data Collection Interval")+
  theme_minimal()+
  coord_cartesian(xlim=c(1, 8),ylim=c(1,4))+
  theme(legend.title = element_blank(),plot.title = element_text(hjust = 0.5, size=12), axis.text.x = element_text(size=11,angle=315,vjust=0))+
  scale_x_continuous(breaks = 1:8, labels = c("8.1.16-\n10.15.16","10.16.16-\n12.31.16","1.1.17-\n3.15.17","3.16.17-\n6.30.17",
                                              "8.1.17-\n10.15.17","10.16.17-\n12.31.17","1.1.18-\n3.15.18","3.16.18-\n6.30.18"))
plot7.PE.8Cs
#> Warning: Removed 1 rows containing missing values (geom_point).


And like so if you don't want to add the empty space manually:

PE.8Cs <- structure(list(item = c("1", "1", "1", "1", "1", "1", "1", "1", 
                        "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), cycle_year = c(1, 
                                                                                               2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8), Group = structure(c(3L, 
                                                                                                                                                                          1L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 
                                                                                                                                                                          2L, 3L), .Label = c("Colleague", "Comparison", "TL"), class = "factor"), 
               mean_score = c(2, 3, 2.73684210526316, 2.8, 2.45, 1.875, 
                              1.86153846153846, 2.22388059701493, 1.65573770491803, 2.25, 
                              3.0625, 2.75, 2.5, 2.53846153846154, 2.5, 2.83720930232558, 
                              2.43333333333333, 2.53846153846154, 2.03703703703704)), .Names = c("item", 
                                                                                                 "cycle_year", "Group", "mean_score"), row.names = c(NA, 19L), class = "data.frame")
createMissingTibble <- function(tib){
  tabs <- ftable(tib$cycle_year, tib$Group,row.vars = 1:2)
  out <- tibble(exists = tabs, 
                cycle_year = rep(levels(tib$cycle_year),each = nlevels(tib$Group)),
                Group = rep(levels(tib$Group),times = nlevels(tib$cycle_year)))
  out %>% filter(exists == 0) %>% 
    select(Group, cycle_year) %>% 
    bind_cols(item = rep('1',nrow(.)), 
    mean_score = rep(NA,nrow(.)))
}


library(tidyverse)

PE.8Cs$cycle_year <- as.factor(PE.8Cs$cycle_year)
PE.8Cs <- bind_rows(PE.8Cs, createMissingTibble(PE.8Cs))

PE.8Cs$cycle_year <- as.numeric(PE.8Cs$cycle_year)

plot7.PE.8Cs<-PE.8Cs %>%ggplot(aes(x=cycle_year,y=mean_score,color=Group))+
  annotate("rect",xmin=4.5,xmax=8.5,ymin=-Inf,ymax=Inf, alpha=.5, fill="gray")+
  geom_line(size=1.5 )+
  geom_point( size=3)+
  ylab("Indicator Mean")+
  ggtitle("Procedural Explanation\nTwo Year Data Collection Intervals, by Subgroup")+
  xlab("Data Collection Interval")+
  theme_minimal()+
  coord_cartesian(xlim=c(1, 8),ylim=c(1,4))+
  theme(legend.title = element_blank(),plot.title = element_text(hjust = 0.5, size=12), axis.text.x = element_text(size=11,angle=315,vjust=0))+
  scale_x_continuous(breaks = 1:8, labels = c("8.1.16-\n10.15.16","10.16.16-\n12.31.16","1.1.17-\n3.15.17","3.16.17-\n6.30.17",
                                              "8.1.17-\n10.15.17","10.16.17-\n12.31.17","1.1.18-\n3.15.18","3.16.18-\n6.30.18"))
plot7.PE.8Cs

Max Teflon
  • 1,760
  • 10
  • 16