find expression to ignore condition if not found and carry on with next

Question

I'm importing a data file from a Scientific instrument and initially it produces results in a 6 line format and we later change it to a 2 line format with a different header. It does work just fine when it finds both headers. However, due to being quick we changed the output format to the 2 lines before it wrote the first results. We now get the error message in the batch window: Error in readQuat(file) : object 'head_ln' no found My question is: what would be the expression to skip this part and carry on with the next object? In our case then the 2 line format.I'd like to emphasize that the script has to work for the cases: 6 line format followed by 2 line format and 2 line format only.

My code is

# Title: 
# Purpose: 
# Author: 

#needed Libraries

library(plyr)

# Convert 100:01.782 (M:S.S) to 100.0297 (M.M) or decimal minutes
MS <- function(x, units="mins") {
  f <- function(...) {
    r <- cbind(...)
    return(
      as.difftime(as.numeric(r[1,]), units="mins") +
        as.difftime(as.numeric(r[2,]), units="secs"))
  }
  return(as.numeric(do.call(f, strsplit(x, ":")), units=units))
}


readQuat <- function(file) {
  # Return data
  ret <- list()

  # Read whole file
  lines <- readLines(file)

  # Remove lines that need to be ignored
  skip_ln <- c()
  for(ln in seq_along(lines)) {
    if(grepl("^(00A PROGRAM MODE|\\*\\*\\*)", lines[ln]))
      skip_ln <- c(skip_ln, -ln)
  }
  # Also skip past sample lists
  sl_head <- "ORDER  POS  ID                    CTIME COUNTS CUCNTS MCW  REP  STD STMS  STIME"
  sl_tail <- "  INSTRUMENT NUMBER        1"
  ln <- 1
  while (ln <= length(lines)) {
    if (grepl(sl_head, lines[ln])) {
      while (ln <= length(lines) && !(grepl(sl_tail, lines[ln]))) {
        skip_ln <- c(skip_ln, -ln)
        ln <- ln + 1
      }
    }
    ln <- ln + 1
  }


  # Remove unneeded lines
  lines <- lines[skip_ln]
  #print(lines)

  # Get ID
  ln <- 1
  ID <- NULL
  while (ln <= length(lines)) {
    if (grepl("^    ID:", lines[ln])) {
      obj <- strsplit(lines[ln], ": ")
      ID <- obj[[1]][2]
      break
    }
    ln <- ln + 1
  }
  if(is.null(ID))
    stop("could not find ID in file")
  attr(ret, "ID") <- ID

  # Find lines between ORDER and NUMBER OF CYCLES
  ln <- 1
  start_ln <- NA
  while (ln <= length(lines)) {
    if (grepl("^ORDER", lines[ln])) {
      start_ln <- ln
      break
    }
    ln <- ln + 1
  }
  while (ln <= length(lines)) {
    if (grepl("NUMBER OF CYCLES", lines[ln])) {
      end_ln <- ln - 1
      break
    }
    ln <- ln + 1
  }
  if (is.na(start_ln)) {
    # Reset line number back to top
    ln <- 1
  } else {
    # Read this section as a table
    dat1 <- read.table(text=textConnection(lines[start_ln:end_ln]), header=TRUE, as.is=TRUE)
    dat1$ID <- factor(dat1$ID)
    dat1$CTIME_mins <- MS(dat1$CTIME, units="mins")
    dat1$COUNTS <- factor(dat1$COUNTS)
    dat1$CUCNTS <- factor(dat1$CUCNTS)
    dat1$MCW <- factor(dat1$MCW)
    dat1$REP <- factor(dat1$REP)
    dat1$STIME <- factor(dat1$STIME)
    ret[["dat1"]] <- dat1
  }

  # Find other data sections
  dat2_header <- "CYC POS REP        CTIME    DTIME1    DTIME2   CUCNTS     SQP    SQP%  STIME$"
  #write.csv(dat2_1_header, "dat2_1_header.csv", row.names = FALSE)
  #dat2_2_header
  dat3_header <- "CYC POS REP        CTIME               ID       CPM1    COUNTS1       CPM2    COUNTS2"

  # Find dat2, and gather six lines for each
  while (ln <= length(lines)) {
    if (grepl(dat2_header, lines[ln])) {
      head_ln <- ln
      break
    }
    ln <- ln + 1
  }
  d2a <- c(head_ln)
  d2b <- c(head_ln + 2)
  # don't import the other three lines
  # d2c <- c(head_ln + 4)

  ln <- head_ln + 1
  while (ln <= length(lines)) {
    if (grepl(dat3_header, lines[ln])) {
      head_ln <- ln
      break
    } else if (grepl("^Q\\d+", lines[ln])) {
      d2a <- c(d2a, ln + 1)
      d2b <- c(d2b, ln + 2)
      # d2c <- c(d2c, ln + 3)
      ln <- ln + 6
    } else {
      ln <- ln + 1
    }
  }
  # Read this section as a table
  con <- textConnection(lines[d2a])
  dat2a <- read.table(con, header=TRUE, as.is=TRUE)
  dat2a$CTIME_mins <- MS(dat2a$CTIME, units="mins")

  con <- textConnection(lines[d2b])
  dat2b <- read.table(con, header=TRUE, as.is=TRUE)

  # combine columns into one data.frame
  dat2 <- cbind(dat2a, dat2b)
  #test whether run with same name has been started multiple times and if yes delete unwanted results
  inx <- with(dat2, which(CYC == 1 & POS == 1 & REP == 1))
  inx <- inx[length(inx)]
  dat2 <- if(inx > 1) dat2[-seq_len(inx - 1), ] else dat2

  ret[["dat2"]] <- dat2

  # We should already be on dat3
  stopifnot(grepl(dat3_header, lines[head_ln]))
  ln <- head_ln
  d3a <- c()
  d3b <- c(head_ln)
  ln <- head_ln
  while (ln <= length(lines)) {
    if (grepl("^Q\\d+", lines[ln])) {
      d3a <- c(d3a, ln)
    } else if (grepl("^ *\\d+", lines[ln], perl=TRUE)) {
      d3b <- c(d3b, ln)
    }
    ln <- ln + 1
  }
  # Read this section as a table
  dat3a <- read.table(text=lines[d3a], header=FALSE, as.is=TRUE)
  dat3b <- read.table(text=lines[d3b], header=TRUE, as.is=TRUE)
  dat3b$ID <- factor(dat3b$ID)
  dat3b$CTIME_mins <- MS(dat3b$CTIME, units="mins")
  ret[["dat3a"]] <- dat3a
#  dat3b_Ordered <- dat3b[order(dat3b$POS), ]
  ret[["dat3b"]] <- dat3b
  invisible(ret)
}

#combineData <- function(combine) {
#  combined <- rbind.fill(combine[["dat2"]], combine[["dat3b"]])
#  write.csv(combined, file = "COMBINED.csv", row.names = FALSE)
#  ret[["combined"]] <- combined
#  ret
#}


processData <- function(dat) {

  # Combine data
  combined <- rbind.fill(dat[["dat2"]], dat[["dat3b"]])
  #Order combined data
  combined_ordered <- combined[order(combined$POS), ]
  needed_columns <- combined_ordered[ ,c("ID", "POS", "CYC", "COUNTS2", "CTIME_mins")]
  #header_retrival <- data.frame(unique_header = character())
  header_retrival <- unique(needed_columns[ ,"ID"])
  #create table with sample name as header

  # all_headers <- paste(rep(header_retrival, each=2), c("counts", "time"), sep="_")
  max.cyc <- max(needed_columns$CYC)
  counter_output <- data.frame(CYC=1:max.cyc)
  for(n in header_retrival) {
    sub <- subset(needed_columns, ID==n)
    if (nrow(sub) < max.cyc) {
      na.row <- needed_columns[0,][1,]
      # extend subset with empty rows at end
      sub <- rbind(sub, na.row[rep(1, max.cyc - nrow(sub)),])
    }
    counter_output[,paste(n, "counts", sep="_")] <- sub$COUNTS2
    counter_output[,paste(n, "time", sep="_")] <- sub$CTIME_mins
  }
  counter_output$CYC <- NULL


      counter <- length(header_retrival)
  #counter <- 1
  #for(i in 1:counter) {
  #  header <- header_retrival[i]
    #write.csv(header, "blabla.csv", row.names = FALSE)
  #  header_paste_helper_counts <- paste0(header, "_counts")
  #  header_paste_helper_time <- paste0(header, "_time")
  #  counter_output <- data.frame(header_paste_helper_counts,
  #                               header_paste_helper_time,
  #                               fix.empty.names = FALSE
  #                               )

    #counter_output[ ,i] <- header_paste_helper_counts
    #counter_output[ ,i+1] <- header_paste_helper_time
    #j <- NULL
    #counter2 <- nrow(needed_columns)
    #for(j in 1:counter2) {

    #}
  #}  
  # Export CSV files, etc
  ID <- attr(dat, "ID")

  file2 <- paste0(ID, "_dat2.csv")
  message(paste("Writing", file2))
  write.csv(dat[["dat2"]], file2, row.names = FALSE)

  file3b <- paste0(ID, "_dat3b.csv")
  message(paste("Writing", file3b))
  write.csv(dat[["dat3b"]], file3b, row.names = FALSE)

  file_combined <- paste0(ID, "_combined.csv")
  message(paste("Writing", file_combined))
  write.csv(combined, file_combined, row.names = FALSE)

  file_combined_ordered <- paste0(ID, "_combined_ordered.csv")
  message(paste("Writing", file_combined_ordered))
  write.csv(combined_ordered, file_combined_ordered, row.names = FALSE)

  file_needed_columns <- paste0(ID, "_needed_columns.csv")
  message(paste("Writing", file_needed_columns))
  write.csv(needed_columns, file_needed_columns, row.names = FALSE)

  file_header_retrival <- paste0(ID, "_header_retrival.csv")
  message(paste("Writing", file_header_retrival))
  write.csv(header_retrival, file_header_retrival, row.names = FALSE)

  file_counter <- paste0(ID, "_counter.csv")
  message(paste("Writing", file_counter))
  write.csv(counter, file_counter, row.names = FALSE)

  file_counter_output <- paste0(ID, "_counter_output.csv")
  message(paste("Writing", file_counter_output))
  write.csv(counter_output, file_counter_output, row.names = FALSE)
}



if (!interactive()) {
  # Processing with Rcmd
  args <- commandArgs(trailingOnly = TRUE)
  if (length(args) != 1)
    stop("one file needs to be supplied")
  file <- args[1]
  dat <- readQuat(file)
  processData(dat)
}

Example for 6 line format:

>  CYC POS REP        CTIME    DTIME1    DTIME2   CUCNTS     SQP    SQP%
> STIME
> 
>                ID       CPM1    COUNTS1   CPM1%       CPM2    COUNTS2   CPM2%
> 
>                         CPM3    COUNTS3   CPM3%       CPM4    COUNTS4   CPM4%
> 
>                         CPM5    COUNTS5   CPM5%       CPM6    COUNTS6   CPM6%
> 
>                         CPM7    COUNTS7   CPM7%       CPM8    COUNTS8   CPM8%
> 
> Q010101N.001 20 SEP 2017  9:06    1   1   1     5:00.900    11.827   
> 11.808    55069     .00     .00   0:00
>              WSTD   11429.37      55069     .43   63994.43     308338     .18
>                          .00          0     .00        .00          0     .00
>                          .00          0     .00        .00          0     .00
>                          .00          0     .00        .00          0     .00

Example for 2 line format:

 CYC POS REP        CTIME               ID       CPM1    COUNTS1       CPM2    COUNTS2

Q030301N.001 21 SEP 2017 13:14
   1   3   1   100:01.749               B1        .23         23        .68         67

So basically it cannot find dat2_header and needs to carry on in finding dat3_header. However it needs to be flexible to do both if needed

It's progress, but it's only part of the code. Can you make your example reproducible and show what the expected result would look like? I wonder if `tryCatch` would work for you. — Roman Luštrik, Oct 17 '17 at 12:57
`@RomanLuštrik` I now added the complete script and two examples — Johannes, Oct 18 '17 at 07:00
This seems very verbose. Can you distill your code and example down to bare minimum? [Here are a few tips](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) on how to do that. — Roman Luštrik, Oct 18 '17 at 10:04

find expression to ignore condition if not found and carry on with next

0 Answers0