0

I would like to import multiple ascii text files with selected rows in a one shot and I want to add file name as an ID number (variable) and then format the data as required (see the image below)

setwd("working_dir")
library(data.table)
library(WriteXLS)
files <- list.files(pattern=".*.asc")
file.list <- sapply(files, function(x)read.csv(x,header=F,skip = 10,nrows=10,stringsAsFactors=F), simplify=FALSE)
df1 <- rbindlist(file.list, idcol="id")[, id := substr(id,1,7)]
WriteXLS(df1,"all_1.xls",Encoding = "latin1")

this is how df1 data frame looks like and i want to format/select only the required data as shown in my deired result picture

df1 <-structure(list(id = c("9864707", "9864707", "9864707", "9864707", 
"9864707", "9864707", "9864707", "9864707", "9864707", "9864707", 
"9864708", "9864708", "9864708", "9864708", "9864708", "9864708", 
"9864708", "9864708", "9864708", "9864708"), V1 = c("Mean irradiance (kW/m²)", 
"1.000", "Pmax", "267.793", "Module voltage", "Voc", "37.552", 
"Module current", "Isc", "9.217", "Mean irradiance (kW/m²)", 
"1.000", "Pmax", "268.211", "Module voltage", "Voc", "38.234", 
"Module current", "Isc", "9.181"), V2 = c("Cell efficiency (%)", 
"18.4", "Module temperature (°C)", "22.2", "", "Vmp", "31.159", 
"", "Imp", "8.735", "Cell efficiency (%)", "18.4", "Module temperature (°C)", 
"22.2", "", "Vmp", "31.208", "", "Imp", "8.735"), V3 = c("Module efficiency (%)", 
"16.4", "", "", "", "Series resistance", "0.1256", "", "Shunt resistance", 
"191.7", "Module efficiency (%)", "16.5", "", "", "", "Series resistance", 
"0.3718", "", "Shunt resistance", "309.8"), V4 = c("Fill factor (%)", 
"77.4", "", "", "", "", "", "", "", "", "Fill factor (%)", "76.4", 
"", "", "", "", "", "", "", "")), .Names = c("id", "V1", "V2", 
"V3", "V4"), row.names = c(NA, -20L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x000000000a7b0788>)

my actual result looks like these

enter image description here

my desired result

enter image description here

below is the links of sample ascii text files. could anyone of you help me to get my desired result.

sample file 1 sample file 2

R session info enter image description here

Community
  • 1
  • 1
Chanti
  • 525
  • 1
  • 5
  • 15
  • Links to dropbox and pictures of data are not particularly helpful. You should try to include a minimal [reproducible example](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) in the question itself to make it clear what your issue is. – MrFlick Nov 29 '16 at 15:32
  • @MrFlick: Thank you for the suggestion. I updated the question. i provided the reproducibe data to get the desired result – Chanti Nov 29 '16 at 15:42

1 Answers1

1

This is a gross work-around, but it'll work...at least based off the info you provided.

library(dplyr)
library(jsonlite)

# Reproducing the data frame
a <- data.frame(
  id = c("9864707", "9864707", "9864707", "9864707", 
         "9864707", "9864707", "9864707", "9864707", "9864707", "9864707", 
         "9864708", "9864708", "9864708", "9864708", "9864708", "9864708", 
         "9864708", "9864708", "9864708", "9864708"), 
  V1 = c("Mean irradiance (kW/m²)", 
         "1.000", "Pmax", "267.793", "Module voltage", "Voc", "37.552", 
         "Module current", "Isc", "9.217", "Mean irradiance (kW/m²)", 
         "1.000", "Pmax", "268.211", "Module voltage", "Voc", "38.234", 
         "Module current", "Isc", "9.181"), 
  V2 = c("Cell efficiency (%)", 
         "18.4", "Module temperature (°C)", "22.2", "", "Vmp", "31.159", 
         "", "Imp", "8.735", "Cell efficiency (%)", "18.4", 
         "Module temperature (°C)", 
         "22.2", "", "Vmp", "31.208", "", "Imp", "8.735"),
  V3 = c("Module efficiency (%)", 
         "16.4", "", "", "", "Series resistance", "0.1256", "", "Shunt resistance", 
         "191.7", "Module efficiency (%)", "16.5", "", "", "", "Series resistance", 
         "0.3718", "", "Shunt resistance", "309.8"), 
  V4 = c("Fill factor (%)", 
         "77.4", "", "", "", "", "", "", "", "", "Fill factor (%)", "76.4", 
         "", "", "", "", "", "", "", ""),
  stringsAsFactors = FALSE)

# Splitting the data frame into a list of data frames where the id is the key value
b <- split(a, a$id)


# Loop over the list of data frames to apply a cleaning function
c <- lapply(b, function(i){
  # Remove the rows where there is only one identifier and no values; as these
  # are not columns and will result in an unbalanced vector of 14 names, to 12
  # values which we'll get to in a second.
  # 1) Filter the data frame where the cell in column V2 has no characters
  # 2) unlist the data frame to a character vector after removing the id column 
  aa <- i %>% filter(nchar(V2) > 0) %>% select(-id) %>% unlist %>% as.character

  # Remove empty characters
  bb <- aa[nchar(aa) > 0]
  # If we find a letter in the character; we know it's supposed to be a column name
  c_name <- bb[grepl("[A-z]",bb)]

  # Inversely; find the numerics
  d_val <- as.numeric(bb[!grepl("[A-z]",bb)])

  # Bind the numberic values nto a new data frame
  df <- as.data.frame(rbind(d_val),stringsAsFactors = FALSE)

  # name the columns with the names we extracted
  colnames(df) <- c_name

  # Get the unique id of the file (where we split above) and bind it into the
  # above created data frame
  df <- cbind(id = i[['id']][[1]], df)
  # drop the rownames
  row.names(df) <- NULL
  # return the data frame
  df
}) 

# Bind into one data.frame
d <- rbind.pages(c)


> str(d)
# 'data.frame': 2 obs. of  13 variables:
#  $ id                     : chr  "9864707" "9864708"
# $ Mean irradiance (kW/m²): num  1 1
# $ Pmax                   : num  268 268
# $ Voc                    : num  37.6 38.2
# $ Isc                    : num  9.22 9.18
# $ Cell efficiency (%)    : num  18.4 18.4
# $ Module temperature (°C): num  22.2 22.2
# $ Vmp                    : num  31.2 31.2
# $ Imp                    : num  8.73 8.73
# $ Module efficiency (%)  : num  16.4 16.5
# $ Series resistance      : num  0.126 0.372
# $ Shunt resistance       : num  192 310
# $ Fill factor (%)        : num  77.4 76.4
Carl Boneri
  • 2,632
  • 1
  • 13
  • 15
  • "you are awesome". Thank you very much. your solution worked very well. The way you formated the data is awesome, i learned many new points. Thanks #R_GURU – Chanti Nov 30 '16 at 10:03
  • @Chanti thanks for the nice words. I realized after the fact that you said you had multiple files. we're you able to amend the above into a loop that applied to all of them? just curious. – Carl Boneri Nov 30 '16 at 19:41
  • I tried with 6 ascii files and the loop worked well without any errors. In-case if i face any errors with large no of files, i will let you know. thank you once again. – Chanti Nov 30 '16 at 21:26