0

I'm importing a data file from a Scientific instrument and initially it produces results in a 6 line format and we later change it to a 2 line format with a different header. It does work just fine when it finds both headers. However, due to being quick we changed the output format to the 2 lines before it wrote the first results. We now get the error message in the batch window: Error in readQuat(file) : object 'head_ln' no found My question is: what would be the expression to skip this part and carry on with the next object? In our case then the 2 line format.I'd like to emphasize that the script has to work for the cases: 6 line format followed by 2 line format and 2 line format only.

My code is

# Title: 
# Purpose: 
# Author: 

#needed Libraries

library(plyr)

# Convert 100:01.782 (M:S.S) to 100.0297 (M.M) or decimal minutes
MS <- function(x, units="mins") {
  f <- function(...) {
    r <- cbind(...)
    return(
      as.difftime(as.numeric(r[1,]), units="mins") +
        as.difftime(as.numeric(r[2,]), units="secs"))
  }
  return(as.numeric(do.call(f, strsplit(x, ":")), units=units))
}


readQuat <- function(file) {
  # Return data
  ret <- list()

  # Read whole file
  lines <- readLines(file)

  # Remove lines that need to be ignored
  skip_ln <- c()
  for(ln in seq_along(lines)) {
    if(grepl("^(00A PROGRAM MODE|\\*\\*\\*)", lines[ln]))
      skip_ln <- c(skip_ln, -ln)
  }
  # Also skip past sample lists
  sl_head <- "ORDER  POS  ID                    CTIME COUNTS CUCNTS MCW  REP  STD STMS  STIME"
  sl_tail <- "  INSTRUMENT NUMBER        1"
  ln <- 1
  while (ln <= length(lines)) {
    if (grepl(sl_head, lines[ln])) {
      while (ln <= length(lines) && !(grepl(sl_tail, lines[ln]))) {
        skip_ln <- c(skip_ln, -ln)
        ln <- ln + 1
      }
    }
    ln <- ln + 1
  }


  # Remove unneeded lines
  lines <- lines[skip_ln]
  #print(lines)

  # Get ID
  ln <- 1
  ID <- NULL
  while (ln <= length(lines)) {
    if (grepl("^    ID:", lines[ln])) {
      obj <- strsplit(lines[ln], ": ")
      ID <- obj[[1]][2]
      break
    }
    ln <- ln + 1
  }
  if(is.null(ID))
    stop("could not find ID in file")
  attr(ret, "ID") <- ID

  # Find lines between ORDER and NUMBER OF CYCLES
  ln <- 1
  start_ln <- NA
  while (ln <= length(lines)) {
    if (grepl("^ORDER", lines[ln])) {
      start_ln <- ln
      break
    }
    ln <- ln + 1
  }
  while (ln <= length(lines)) {
    if (grepl("NUMBER OF CYCLES", lines[ln])) {
      end_ln <- ln - 1
      break
    }
    ln <- ln + 1
  }
  if (is.na(start_ln)) {
    # Reset line number back to top
    ln <- 1
  } else {
    # Read this section as a table
    dat1 <- read.table(text=textConnection(lines[start_ln:end_ln]), header=TRUE, as.is=TRUE)
    dat1$ID <- factor(dat1$ID)
    dat1$CTIME_mins <- MS(dat1$CTIME, units="mins")
    dat1$COUNTS <- factor(dat1$COUNTS)
    dat1$CUCNTS <- factor(dat1$CUCNTS)
    dat1$MCW <- factor(dat1$MCW)
    dat1$REP <- factor(dat1$REP)
    dat1$STIME <- factor(dat1$STIME)
    ret[["dat1"]] <- dat1
  }

  # Find other data sections
  dat2_header <- "CYC POS REP        CTIME    DTIME1    DTIME2   CUCNTS     SQP    SQP%  STIME$"
  #write.csv(dat2_1_header, "dat2_1_header.csv", row.names = FALSE)
  #dat2_2_header
  dat3_header <- "CYC POS REP        CTIME               ID       CPM1    COUNTS1       CPM2    COUNTS2"

  # Find dat2, and gather six lines for each
  while (ln <= length(lines)) {
    if (grepl(dat2_header, lines[ln])) {
      head_ln <- ln
      break
    }
    ln <- ln + 1
  }
  d2a <- c(head_ln)
  d2b <- c(head_ln + 2)
  # don't import the other three lines
  # d2c <- c(head_ln + 4)

  ln <- head_ln + 1
  while (ln <= length(lines)) {
    if (grepl(dat3_header, lines[ln])) {
      head_ln <- ln
      break
    } else if (grepl("^Q\\d+", lines[ln])) {
      d2a <- c(d2a, ln + 1)
      d2b <- c(d2b, ln + 2)
      # d2c <- c(d2c, ln + 3)
      ln <- ln + 6
    } else {
      ln <- ln + 1
    }
  }
  # Read this section as a table
  con <- textConnection(lines[d2a])
  dat2a <- read.table(con, header=TRUE, as.is=TRUE)
  dat2a$CTIME_mins <- MS(dat2a$CTIME, units="mins")

  con <- textConnection(lines[d2b])
  dat2b <- read.table(con, header=TRUE, as.is=TRUE)

  # combine columns into one data.frame
  dat2 <- cbind(dat2a, dat2b)
  #test whether run with same name has been started multiple times and if yes delete unwanted results
  inx <- with(dat2, which(CYC == 1 & POS == 1 & REP == 1))
  inx <- inx[length(inx)]
  dat2 <- if(inx > 1) dat2[-seq_len(inx - 1), ] else dat2

  ret[["dat2"]] <- dat2

  # We should already be on dat3
  stopifnot(grepl(dat3_header, lines[head_ln]))
  ln <- head_ln
  d3a <- c()
  d3b <- c(head_ln)
  ln <- head_ln
  while (ln <= length(lines)) {
    if (grepl("^Q\\d+", lines[ln])) {
      d3a <- c(d3a, ln)
    } else if (grepl("^ *\\d+", lines[ln], perl=TRUE)) {
      d3b <- c(d3b, ln)
    }
    ln <- ln + 1
  }
  # Read this section as a table
  dat3a <- read.table(text=lines[d3a], header=FALSE, as.is=TRUE)
  dat3b <- read.table(text=lines[d3b], header=TRUE, as.is=TRUE)
  dat3b$ID <- factor(dat3b$ID)
  dat3b$CTIME_mins <- MS(dat3b$CTIME, units="mins")
  ret[["dat3a"]] <- dat3a
#  dat3b_Ordered <- dat3b[order(dat3b$POS), ]
  ret[["dat3b"]] <- dat3b
  invisible(ret)
}

#combineData <- function(combine) {
#  combined <- rbind.fill(combine[["dat2"]], combine[["dat3b"]])
#  write.csv(combined, file = "COMBINED.csv", row.names = FALSE)
#  ret[["combined"]] <- combined
#  ret
#}


processData <- function(dat) {

  # Combine data
  combined <- rbind.fill(dat[["dat2"]], dat[["dat3b"]])
  #Order combined data
  combined_ordered <- combined[order(combined$POS), ]
  needed_columns <- combined_ordered[ ,c("ID", "POS", "CYC", "COUNTS2", "CTIME_mins")]
  #header_retrival <- data.frame(unique_header = character())
  header_retrival <- unique(needed_columns[ ,"ID"])
  #create table with sample name as header

  # all_headers <- paste(rep(header_retrival, each=2), c("counts", "time"), sep="_")
  max.cyc <- max(needed_columns$CYC)
  counter_output <- data.frame(CYC=1:max.cyc)
  for(n in header_retrival) {
    sub <- subset(needed_columns, ID==n)
    if (nrow(sub) < max.cyc) {
      na.row <- needed_columns[0,][1,]
      # extend subset with empty rows at end
      sub <- rbind(sub, na.row[rep(1, max.cyc - nrow(sub)),])
    }
    counter_output[,paste(n, "counts", sep="_")] <- sub$COUNTS2
    counter_output[,paste(n, "time", sep="_")] <- sub$CTIME_mins
  }
  counter_output$CYC <- NULL


      counter <- length(header_retrival)
  #counter <- 1
  #for(i in 1:counter) {
  #  header <- header_retrival[i]
    #write.csv(header, "blabla.csv", row.names = FALSE)
  #  header_paste_helper_counts <- paste0(header, "_counts")
  #  header_paste_helper_time <- paste0(header, "_time")
  #  counter_output <- data.frame(header_paste_helper_counts,
  #                               header_paste_helper_time,
  #                               fix.empty.names = FALSE
  #                               )

    #counter_output[ ,i] <- header_paste_helper_counts
    #counter_output[ ,i+1] <- header_paste_helper_time
    #j <- NULL
    #counter2 <- nrow(needed_columns)
    #for(j in 1:counter2) {

    #}
  #}  
  # Export CSV files, etc
  ID <- attr(dat, "ID")

  file2 <- paste0(ID, "_dat2.csv")
  message(paste("Writing", file2))
  write.csv(dat[["dat2"]], file2, row.names = FALSE)

  file3b <- paste0(ID, "_dat3b.csv")
  message(paste("Writing", file3b))
  write.csv(dat[["dat3b"]], file3b, row.names = FALSE)

  file_combined <- paste0(ID, "_combined.csv")
  message(paste("Writing", file_combined))
  write.csv(combined, file_combined, row.names = FALSE)

  file_combined_ordered <- paste0(ID, "_combined_ordered.csv")
  message(paste("Writing", file_combined_ordered))
  write.csv(combined_ordered, file_combined_ordered, row.names = FALSE)

  file_needed_columns <- paste0(ID, "_needed_columns.csv")
  message(paste("Writing", file_needed_columns))
  write.csv(needed_columns, file_needed_columns, row.names = FALSE)

  file_header_retrival <- paste0(ID, "_header_retrival.csv")
  message(paste("Writing", file_header_retrival))
  write.csv(header_retrival, file_header_retrival, row.names = FALSE)

  file_counter <- paste0(ID, "_counter.csv")
  message(paste("Writing", file_counter))
  write.csv(counter, file_counter, row.names = FALSE)

  file_counter_output <- paste0(ID, "_counter_output.csv")
  message(paste("Writing", file_counter_output))
  write.csv(counter_output, file_counter_output, row.names = FALSE)
}



if (!interactive()) {
  # Processing with Rcmd
  args <- commandArgs(trailingOnly = TRUE)
  if (length(args) != 1)
    stop("one file needs to be supplied")
  file <- args[1]
  dat <- readQuat(file)
  processData(dat)
}

Example for 6 line format:

>  CYC POS REP        CTIME    DTIME1    DTIME2   CUCNTS     SQP    SQP%
> STIME
> 
>                ID       CPM1    COUNTS1   CPM1%       CPM2    COUNTS2   CPM2%
> 
>                         CPM3    COUNTS3   CPM3%       CPM4    COUNTS4   CPM4%
> 
>                         CPM5    COUNTS5   CPM5%       CPM6    COUNTS6   CPM6%
> 
>                         CPM7    COUNTS7   CPM7%       CPM8    COUNTS8   CPM8%
> 
> Q010101N.001 20 SEP 2017  9:06    1   1   1     5:00.900    11.827   
> 11.808    55069     .00     .00   0:00
>              WSTD   11429.37      55069     .43   63994.43     308338     .18
>                          .00          0     .00        .00          0     .00
>                          .00          0     .00        .00          0     .00
>                          .00          0     .00        .00          0     .00

Example for 2 line format:

 CYC POS REP        CTIME               ID       CPM1    COUNTS1       CPM2    COUNTS2

Q030301N.001 21 SEP 2017 13:14
   1   3   1   100:01.749               B1        .23         23        .68         67

So basically it cannot find dat2_header and needs to carry on in finding dat3_header. However it needs to be flexible to do both if needed

Johannes
  • 69
  • 1
  • 7

0 Answers0