-1

I have a data frame with one column identifying 4 participants and another column made of just one cell containing 88 observations -- below a reproducible example of the data frame:

pIndex <- c(1,2,3,4)
T1_AllSequence <- c("N11_d1p3_t0p4, N11_d1p3_t0p1, N11_d1p3_t0p3", 
                    "N11_d0p1_t0p4, N11_d0p1_t0p7, N11_d0p1_t0p5",
                    "N7_d1p3_t0p4, N7_d1p3_t0p6, N7_d1p3_t0p4",
                    "N7_d0p5_t0p5, N7_d0p5_t0p6, N7_d0p5_t0p5")
Data <- as.data.frame(cbind(pIndex, T1_AllSequence))
dput(Data)
# structure(list(pIndex = structure(1:4, .Label = c("1", "2", "3", 
#                                                   "4"), class = "factor"), T1_AllSequence = structure(c(2L, 1L, 
#                                                                                                         4L, 3L), .Label = c("N11_d0p1_t0p4, N11_d0p1_t0p7, N11_d0p1_t0p5", 
#                                                                                                                             "N11_d1p3_t0p4, N11_d1p3_t0p1, N11_d1p3_t0p3", "N7_d0p5_t0p5, N7_d0p5_t0p6, N7_d0p5_t0p5", 
#                                                                                                                             "N7_d1p3_t0p4, N7_d1p3_t0p6, N7_d1p3_t0p4"), class = "factor")), class = "data.frame", row.names = c(NA, 
#                                                                                                                                                                                                                                  -4L))

I wrote a function to mutate the multiple observations contained in one cell of the column 'T1_AllSequence'into a long list.

#### Extracting variables from embedded data #### 

i = T1$pIndex

Contours <- #for(i in 1:nrow(T1)){
  function(i){
  c = as.character(Data[i,'T1_AllSequence'])
  Curvature <- as.data.frame(strsplit(c, ",")[[i]])
  Curvature <- dplyr::rename (Curvature,
                              V1 = `strsplit(c, ",")[[i]]`)
  Curvature <- mutate(Curvature,
                      pIndex = i,
                      order = as.integer(1:88),
                      vertex = ifelse(grepl("N7", V1), 7, 11),
                      distance = ifelse(grepl("d0p1", V1), 1,
                                        ifelse(grepl("d0p5", V1), 5,
                                               ifelse(grepl("d0p9", V1), 9, 13))),
                      tension = ifelse(grepl("t0p0", V1), 0,
                                       ifelse(grepl("t0p1", V1), 1,
                                              ifelse(grepl("t0p2", V1), 2,
                                                     ifelse(grepl("t0p3", V1), 3,
                                                            ifelse(grepl("t0p4", V1), 4,
                                                                   ifelse(grepl("t0p5", V1), 5,
                                                                          ifelse(grepl("t0p6", V1), 6,
                                                                                 ifelse(grepl("t0p7", V1), 7,
                                                                                        ifelse(grepl("t0p8", V1), 8,
                                                                                               ifelse(grepl("t0p9", V1), 9, 10)))))))))))
  return(Curvature)
}

Now, I would like to apply my function to all the rows in my dataset, and then bind them together. So far, I tried multiple solutions, which I copy below:

require(plyr)
Stim <- ddply(T1, 1, Contours(T1))
Stim <- data.frame(t(apply(as.matrix(T1), 1, Contours)))
Stim <- apply(T1, 1, Contours())
Stim <- as.data.frame(apply(T1, 1, Contours))
Stim <- apply(T1[,442], 4, Contours)

Unfortunately, none of them works. The output I would like to obtain is something like this:

             V1    PID order vertex distance tension
1 N11_d0p9_t0p0      1     1     11        9       0
2  N7_d1p3_t0p0      1     2      7       13       0
3 N11_d1p3_t0p3      1     3     11       13       3
4  N7_d0p5_t0p7      1     4      7        5       7
5  N7_d0p1_t0p1      1     5      7        1       1
6  N7_d0p9_t0p8      1     6      7        9       8

Here you can download the database to replicate the issue: T1database

Any suggestions on how I could do this would be much appreciated.

Martijn Pieters
  • 1,048,767
  • 296
  • 4,058
  • 3,343
Nicole
  • 23
  • 3
  • Please share a [reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) including a small example of your data used. A good way of presenting your data is using `dput()`. Usally the people here don't like to download external databases. – Martin Gal Jul 22 '20 at 11:03
  • Regarding your `Contours()`-function: take a look at `case_when()` to avoid those nested `ifelse`-functions. Your code is way more easy to read then. – Martin Gal Jul 22 '20 at 11:05
  • 2
    Hi Nicole, welcome to SO! You're much more likely to get helpful answers if you can condense your question down. For example, you might want to create some dummy data and a simpler example that more concisely shows the issue you need help with. More info available here: [How do I ask a good question?](https://stackoverflow.com/help/how-to-ask) – wurli Jul 22 '20 at 11:06
  • @MartinGal thanks for your suggestion, I updated the question providing a reproducible example I also tried the 'case_when()' function, but doesn't work unfortunately – Nicole Jul 22 '20 at 11:25

1 Answers1

1

I tried to reproduce your desired output using dplyr, stringr and tidyr (or just tidyverse):

library(tidyverse)
Data %>%
  mutate(T1_AllSequence = str_split(T1_AllSequence, ",")) %>%
  unnest(T1_AllSequence) %>%
  transmute(V1 = str_remove(T1_AllSequence, " "),
            PID = pIndex,
            order = row_number(),
            vertex = ifelse(grepl("N7", V1), 7, 11),
            distance = case_when(grepl("d0p1", V1) ~ 1,
                                 grepl("d0p5", V1) ~ 5,
                                 grepl("d0p9", V1) ~ 9,
                                 TRUE ~ 13),
            tension = case_when(grepl("t0p0", V1) ~ 0,
                                grepl("t0p1", V1) ~ 1, 
                                grepl("t0p2", V1) ~ 2,
                                grepl("t0p3", V1) ~ 3,
                                grepl("t0p4", V1) ~ 4,
                                grepl("t0p5", V1) ~ 5,
                                grepl("t0p6", V1) ~ 6,
                                grepl("t0p7", V1) ~ 7,
                                grepl("t0p8", V1) ~ 8,
                                grepl("t0p9", V1) ~ 9, 
                                TRUE ~ 10))

returns

# A tibble: 12 x 6
   V1            PID   order vertex distance tension
   <chr>         <chr> <int>  <dbl>    <dbl>   <dbl>
 1 N11_d1p3_t0p4 1         1     11       13       4
 2 N11_d1p3_t0p1 1         2     11       13       1
 3 N11_d1p3_t0p3 1         3     11       13       3
 4 N11_d0p1_t0p4 2         4     11        1       4
 5 N11_d0p1_t0p7 2         5     11        1       7
 6 N11_d0p1_t0p5 2         6     11        1       5
 7 N7_d1p3_t0p4  3         7      7       13       4
 8 N7_d1p3_t0p6  3         8      7       13       6
 9 N7_d1p3_t0p4  3         9      7       13       4
10 N7_d0p5_t0p5  4        10      7        5       5
11 N7_d0p5_t0p6  4        11      7        5       6
12 N7_d0p5_t0p5  4        12      7        5       5
Martin Gal
  • 16,640
  • 5
  • 21
  • 39
  • 1
    Thank you so so much!! I did try to use grepl in my function but for some reason it didn't work... Your code works wonderfully!!! – Nicole Jul 22 '20 at 13:56