0

I have written a function that takes any DataFrame and evaluates each column to return a summary table. Now, for any Variable Name that is a factor as classified under the Answer Label column, I would like to shift the Variable Type and Answer Code down one row.

Sample Code:

CreateCodebook <- function(dF){
  numbercols <- length(colnames(dF))

  table <- data.frame()

  for (i in 1:length(colnames(dF))){
    AnswerCode <- if (sapply(dF, is.factor)[i]) 1:nrow(unique(dF[i])) else NA
    AnswerLabel <- if (sapply(dF, is.factor)[i]) as.vector(unique(dF[order(dF[i]),][i])) else "Open ended"
    VariableName <- if (length(AnswerCode) > 1) c(colnames(dF)[i],
                                                  rep(NA,length(AnswerCode) - 1)) else colnames(dF)[i]
    VariableLabel <- if (length(AnswerCode) > 1) c(colnames(dF)[i],
                                                   rep(NA,length(AnswerCode) - 1)) else colnames(dF)[i]
    VariableType <- if (length(AnswerCode) > 1) c(sapply(dF, class)[i],
                                                  rep(NA,length(AnswerCode) - 1)) else sapply(dF, class)[i]

    df = data.frame(VariableName, VariableLabel, AnswerLabel, AnswerCode, VariableType, stringsAsFactors = FALSE)
    names(df) <- c("Variable Name", "Variable Label", "Variable Type", "Answer Code", "Answer Label")
    table <- rbind(table, df)

  }
  rownames(table) <- 1:nrow(table)
  return(table)
}

Using this dataset MASS::anorexia, I get this output from my Function:

  Variable Name Variable Label Variable Type Answer Code Answer Label
1         Treat          Treat           CBT           1       factor
2          <NA>           <NA>          Cont           2         <NA>
3          <NA>           <NA>            FT           3         <NA>
4         Prewt          Prewt    Open ended          NA      numeric
5        Postwt         Postwt    Open ended          NA      numeric

Desired Output:

  Variable Name Variable Label Variable Type Answer Code Answer Label
1         Treat          Treat          <NA>          NA       factor
2          <NA>           <NA>           CBT           1         <NA>
3          <NA>           <NA>          Cont           2         <NA>
4          <NA>           <NA>            FT           3         <NA>
5         Prewt          Prewt    Open ended          NA      numeric
6        Postwt         Postwt    Open ended          NA      numeric
Riley Hun
  • 2,541
  • 5
  • 31
  • 77
  • Make sure you provide [reproducible example](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) with sample input to test the function. – MrFlick Apr 21 '17 at 23:21
  • Thank you. I have provided a reproducible example in my post now. – Riley Hun Apr 22 '17 at 00:35

2 Answers2

2

Hope this will work:

CreateCodebook <- function(dF){
    numbercols <- length(colnames(dF))

    table <- data.frame()

    for (i in 1:length(colnames(dF))){
        AnswerCode <- if (sapply(dF, is.factor)[i]) 1:nrow(unique(dF[i])) else NA
        AnswerLabel <- if (sapply(dF, is.factor)[i]) as.vector(unique(dF[order(dF[i]),][i])) else "Open ended"
        VariableName <- if (length(AnswerCode) > 1) c(colnames(dF)[i],
                                                      rep(NA,length(AnswerCode) - 1)) else colnames(dF)[i]
        VariableLabel <- if (length(AnswerCode) > 1) c(colnames(dF)[i],
                                                       rep(NA,length(AnswerCode) - 1)) else colnames(dF)[i]
        VariableType <- if (length(AnswerCode) > 1) c(sapply(dF, class)[i],
                                                      rep(NA,length(AnswerCode) - 1)) else sapply(dF, class)[i]

        df = data.frame(VariableName, VariableLabel, AnswerLabel, AnswerCode, VariableType, stringsAsFactors = FALSE)
        names(df) <- c("Variable Name", "Variable Label", "Variable Type", "Answer Code", "Answer Label")
        table <- rbind(table, df)

    }


    # add a new column of row id
    table$row <- 1:nrow(table)

    # created new rows to be added
    x <- table[which(table$`Answer Label` == 'factor'), ]
    x[, c(1, 2, 5)] <- NA

    # change original factor rows
    table[which(table$`Answer Label` == 'factor'), 3:4] <- NA

    # combine the two data.frame and reorder rows
    table <- rbind(table, x)
    table <- table[order(table$row), -ncol(table)]

    rownames(table) <- 1:nrow(table)
    return(table)
}
mt1022
  • 16,834
  • 5
  • 48
  • 71
1

The following solution needs functions from the dplyr, tidyr, and data.table package.

# Load packages
library(dplyr)
library(tidyr)
library(data.table)

# A function to adjust the output of the CreateCodebook function
Adjust_factor <- function(dF){

  dF2 <- dF %>%
    # Create a new column called Indicator, which is a copy of Answer Label
    mutate(Indicator = `Answer Label`) %>%
    # Impute NA based on the previous and nearest non-NA value
    fill(Indicator) %>%
    # Create run length group number
    mutate(Index = rleid(Indicator))

  # Split the data frame to list based on the Index
  dF_list <- split(dF2, f = dF2$Index)

  # Adjust each data frame subset
  dF_list2 <- lapply(dF_list, function(x){

    if (x$Indicator[1] == "factor"){ # If Indicator is "factor"

      # Copy and bind the first row
      x <- bind_rows(x[1, ], x)
      # Change the content of the first and second row. Replace the value with NA
      x[1, c("Variable Type", "Answer Code")] <- NA
      x[2, c("Variable Name", "Variable Label", "Answer Label")] <- NA
    } 
    return(x)
  })

  # Combine all data frame
  dF3 <- bind_rows(dF_list2) %>%
    # Remove the Indicator and Index column
    select(-Indicator, -Index)

  return(dF3)
}

# Test the function
library(MASS)
data(anorexia)
dat1 <- anorexia
dat2 <- CreateCodebook(dat1)
dat3 <- Adjust_factor(dat2)

test1 <- data.frame(a = c("a", "b", "c"),
                    b = c(1, 2, 3),
                    c = 10:12,
                    d = seq(as.Date("2001-01-01"), as.Date("2001-01-03"), 1),
                    e = c("o", "p", "q"))

test2 <- CreateCodebook(test1)
test3 <- Adjust_factor(test2)
www
  • 38,575
  • 12
  • 48
  • 84
  • Thanks ycw. This is a great solution. I went w/ the other one though because it didn't rely on any external packages and was integrated into my function. – Riley Hun Apr 22 '17 at 03:25