Edit, have added code below
First, sorry, I can't think if a good reproducible example at the moment, but my question I think can be answered without it.
My data involves some line graphs taken from a manually operated testing machine. Because it's manually operated we get variable start times and thus the data is not properly "overlapped" with one another.
This was solved previously by using the following code:
#import data
x <- read.csv("smoke.csv", head=T, sep=",")
#flag '0' values, remove all zero values
row_sub = apply(x, 1, function(row) all(row > 0))
y <- x[row_sub,]
This worked before because of the small sample size and relatively tight timings. With more samples I'm now getting some 'clipping' in the graphs:
I'm no expert so excuse the explanation: 'row_sub' is a modified version of 'x' which only keeps rows in which ALL values are > 0
The problem with this is illustrated in the attached image right here. We can see the first sample is okay because it probably took the longest to insert into the apparatus. But the operator got better throughout the test, reducing sample feeding times, leading to the extreme clipping seen in sample4.
I know I can easily do this manually by simply deleting the leading zero values for each sample, then clipping the tail end of all the data to make sure they all have equal data points. But I can't figure out how to do it in R.
Edit Here is the data: http://pastebin.com/iEW4sH2a
# Check & load required packages
if (require("grid") == FALSE) install.packages("grid")
if (require("ggplot2") == FALSE) install.packages("ggplot2")
if (require("gridExtra") == FALSE) install.packages("gridExtra")
if (require("flux") == FALSE) install.packages("flux")
if (require("matrixStats") == FALSE) install.packages("matrixStats")
if (require("mgcv") == FALSE) install.packages("mgcv")
# Set working directory, read datafile
setwd("C location here")
x <- read.csv("smoke.csv", head=T, sep=",")
# Remove 'time' column
# flag '0' values, remove zero values
row_sub = apply(x, 1, function(row) all(row > 0, na.rm=TRUE))
y <- x[row_sub,]
rownames(y) <- NULL
# create time axis with appropriate length & attach to df
time <- seq(0,120, by=0.2)
time <- time[0:nrow(y)]
z <- cbind(time, y)
z <- na.omit(z)
#graph parameters
y_max <- 5.0
a.means <- rowMeans(z[,2:5])
b.means <- rowMeans(z[,6:9])
c.means <- rowMeans(z[,10:13])
d.means <- rowMeans(z[,14:17])
all.data <- cbind(z, a.means, b.means, c.means, d.means)
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
require(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
#calculate area under curve
a.auc <- round(auc(z$time, a.means),2)
b.auc <- round(auc(z$time, b.means),2)
c.auc <- round(auc(z$time, c.means),2)
d.auc <- round(auc(z$time, d.means),2)
# Prepare plots
a_graph <- ggplot(data=all.data, aes(time)) +
geom_point(aes(y=a1), alpha=0.1, color="indianred") +
geom_point(aes(y=a2), alpha=0.1, color="indianred1") +
geom_point(aes(y=a3), alpha=0.1, color="indianred2") +
geom_point(aes(y=a4), alpha=0.1, color="indianred3") +
geom_line(aes(y=a.means), size=1, color="indianred4") +
ggtitle("145A: Standard") +
geom_text(aes(75, 1.5, label = a.auc)) +
scale_x_continuous("Time(s)", limits=c(0,120)) +
scale_y_continuous("Smoke(%Opacity)", limits=c(0,y_max))
b_graph <- ggplot(data=all.data, aes(time)) +
geom_point(aes(y=b1), alpha=0.1, color="chartreuse") +
geom_point(aes(y=b2), alpha=0.1, color="chartreuse1") +
geom_point(aes(y=b3), alpha=0.1, color="chartreuse2") +
geom_point(aes(y=b4), alpha=0.1, color="chartreuse3") +
geom_line(aes(y=b.means), size=1, color="chartreuse4") +
ggtitle("145B: +0.5%") +
geom_text(aes(75, 1.5, label = b.auc)) +
scale_x_continuous("Time(s)", limits=c(0,120)) +
scale_y_continuous("Smoke(%Opacity)", limits=c(0,y_max))
c_graph <- ggplot(data=all.data, aes(time)) +
geom_point(aes(y=c1), alpha=0.1, color="turquoise") +
geom_point(aes(y=c2), alpha=0.1, color="turquoise1") +
geom_point(aes(y=c3), alpha=0.1, color="turquoise2") +
geom_point(aes(y=c4), alpha=0.1, color="turquoise3") +
geom_line(aes(y=c.means), size=1, color="turquoise4") +
ggtitle("145C: +1.0%") +
geom_text(aes(75, 1.5, label = c.auc)) +
scale_x_continuous("Time(s)", limits=c(0,120)) +
scale_y_continuous("Smoke(%Opacity)", limits=c(0,y_max))
d_graph <- ggplot(data=all.data, aes(time)) +
geom_point(aes(y=d1), alpha=0.1, color="indianred") +
geom_point(aes(y=d2), alpha=0.1, color="indianred1") +
geom_point(aes(y=d3), alpha=0.1, color="indianred2") +
geom_point(aes(y=d4), alpha=0.1, color="indianred3") +
geom_line(aes(y=d.means), size=1, color="indianred4") +
ggtitle("145A: Standard") +
geom_text(aes(75, 1.5, label = d.auc)) +
scale_x_continuous("Time(s)", limits=c(0,120)) +
scale_y_continuous("Smoke(%Opacity)", limits=c(0,y_max))
sample_names <- as.data.frame(c("145A", "145B", "145C", "145D"))
sample_auc <- as.data.frame(c(a.auc, b.auc, c.auc, d.auc))
sample_all <- as.data.frame(cbind(sample_names,sample_auc))
colnames(sample_all) <- c("x","y")
multiplot(a_graph, b_graph, c_graph, d_graph, cols=2)