0

My last question got marked as a duplicate but although I think they're similar I found the answers didnt work for my dataset. None of the options for questions like this seem to work for me and I don't know what I'm doing wrong! I'm looking for a really clear and simple answer, if possible.

I have a data set called rawdata looking at the ODs of a bacterial species at various time points grown on 10 different carbons that looks something like this:

Time    Carbon1      Carbon2     Carbon3 
 0        0.1          0.3         0.1
 0        0.2          0.4         0.1
 24       0.4          0.6         0.2   
 24       0.35         0.5         0.2 
 48       0.67         0.8         0.3
 48       0.7          0.8         0.4    

I would like to calculate the mean of each time point for each carbon - e.g. carbon 1 at 0 hrs, at 24 hrs and at 48 hrs etc. I've tried a few things so far but nothing has worked. From what I've read, creating a new data frame that contains all the means could work but I have no idea how to do that, or is there a simpler way?

I tried this option:

data2 <- setDT(rawdata)[, lapply(.SD, mean), by=.(Time), .SDcols=c("Carbon1","Carbon2")]

but got the error

Error during wrapup: invalid subscript type 'list'.

Any ideas why?

This is what I get when I input dput(head(rawdata))

structure(list(Time = c(0, 0, 0, 0, 0, 0), Sucrose = structure(c(3L, 
6L, 4L, 8L, 7L, 5L), .Label = c("0.0755", "0.0761", "0.0766", 
"0.0771", "0.0773", "0.0774", "0.0781", "0.0786", "0.095", "0.09648", 
"0.09776", "0.11422", "0.11688", "0.11964", "0.12182", "0.13038", 
"0.2506", "0.2598", "0.265", "0.2654", "0.266", "0.2798", "0.2926", 
"0.3039", "0.3621", "0.3832", "0.3903", "0.4012", "0.4308", "0.4323", 
"0.4412", "0.4467", "Sucrose"), class = "factor"), Citric.Acid = structure(c(3L, 
8L, 7L, 2L, 5L, 6L), .Label = c("0.0757", "0.0759", "0.076", 
"0.0761", "0.0767", "0.0769", "0.077", "0.079", "0.11856", "0.12232", 
"0.13074", "0.14048", "0.1421", "0.14796", "0.15006", "0.1536", 
"0.3533", "0.3769", "0.3812", "0.3825", "0.3855", "0.3937", "0.3951", 
"0.3998", "0.5836", "0.6081", "0.6343", "0.6622", "0.6782", "0.6836", 
"0.7016", "0.7454", "Citric Acid"), class = "factor"), Furmaric.Acid = structure(c(4L, 
6L, 5L, 2L, 6L, 7L), .Label = c("0.0758", "0.0762", "0.0764", 
"0.0767", "0.0768", "0.0769", "0.0784", "0.11578", "0.12912", 
"0.13042", "0.13996", "0.14836", "0.14912", "0.15432", "0.16052", 
"0.3031", "0.3217", "0.3243", "0.3306", "0.3307", "0.3318", "0.3333", 
"0.3377", "0.4045", "0.4065", "0.4086", "0.4165", "0.4328", "0.4508", 
"0.466", "0.5077", "Furmaric Acid"), class = "factor"), Glucose = structure(c(1L, 
6L, 5L, 3L, 4L, 7L), .Label = c("0.0765", "0.0767", "0.0769", 
"0.0777", "0.078", "0.0789", "0.0802", "0.09422", "0.09506", 
"0.10346", "0.10648", "0.11776", "0.12116", "0.1291", "0.13206", 
"0.2444", "0.258", "0.2656", "0.2682", "0.2707", "0.2765", "0.2808", 
"0.2961", "0.337", "0.3405", "0.3409", "0.3469", "0.3623", "0.3824", 
"0.3875", "0.393", "Glucose"), class = "factor"), Glutamine = structure(c(7L, 
6L, 3L, 3L, 5L, 4L), .Label = c("0.0763", "0.0764", "0.078", 
"0.0781", "0.0786", "0.0789", "0.0832", "0.23338", "0.2527", 
"0.25352", "0.25358", "0.259", "0.261", "0.26916", "0.27426", 
"0.353", "0.3595", "0.3628", "0.3788", "0.389", "0.396", "0.4021", 
"0.4087", "0.4168", "0.4551", "0.4576", "0.4683", "0.4802", "0.4886", 
"0.5184", "0.5405", "Glutamine"), class = "factor"), Histidine = structure(c(4L, 
5L, 7L, 3L, 6L, 1L), .Label = c("0.0768", "0.0769", "0.0772", 
"0.0775", "0.078", "0.0781", "0.0796", "0.0865", "0.08678", "0.08904", 
"0.08908", "0.0892", "0.09098", "0.0948", "0.09814", "0.1442", 
"0.1553", "0.1687", "0.1706", "0.1715", "0.1725", "0.1736", "0.1839", 
"0.2972", "0.3041", "0.3152", "0.321", "0.3217", "0.3226", "0.345", 
"Histidine"), class = "factor"), Arabinose = structure(c(3L, 
2L, 4L, 10L, 6L, 5L), .Label = c("0.0761", "0.0767", "0.0771", 
"0.0772", "0.0775", "0.0781", "0.0786", "0.08356", "0.084", "0.0863", 
"0.0873", "0.08758", "0.08806", "0.08996", "0.09204", "0.1121", 
"0.1123", "0.1173", "0.1195", "0.1207", "0.1263", "0.1269", "0.1297", 
"0.1321", "0.1329", "0.1349", "0.1478", "0.1499", "0.1522", "0.1796", 
"0.18712", "0.223", "Arabinose"), class = "factor"), Maleic.Acid = structure(c(4L, 
6L, 4L, 5L, 2L, 7L), .Label = c("0.0756", "0.0759", "0.0762", 
"0.0768", "0.0772", "0.0774", "0.0794", "0.08174", "0.08244", 
"0.08264", "0.08312", "0.0832", "0.08894", "0.09218", "0.0941", 
"0.1119", "0.1161", "0.1173", "0.1179", "0.1227", "0.124", "0.1243", 
"0.1279", "0.12804", "0.1299", "0.1305", "0.1426", "0.1502", 
"0.1562", "0.1596", "0.1638", "Maleic Acid"), class = "factor"), 
    Serine = structure(c(5L, 7L, 4L, 5L, 3L, 6L), .Label = c("0.0759", 
    "0.0761", "0.0765", "0.0775", "0.0776", "0.0778", "0.0783", 
    "0.0808", "0.0831", "0.08386", "0.0845", "0.0846", "0.08534", 
    "0.08744", "0.0891", "0.0909", "0.09392", "0.1117", "0.1127", 
    "0.1137", "0.1181", "0.1184", "0.1199", "0.1205", "0.1221", 
    "0.1236", "0.1283", "0.1352", "0.1376", "0.1398", "0.1405", 
    "Serine"), class = "factor"), Mix = structure(c(6L, 7L, 4L, 
    5L, 1L, 2L), .Label = c("0.076", "0.0769", "0.0771", "0.0772", 
    "0.0787", "0.0788", "0.0809", "0.08128", "0.08164", "0.0842", 
    "0.0865", "0.08664", "0.08746", "0.08788", "0.09592", "0.1597", 
    "0.1651", "0.1711", "0.1736", "0.1839", "0.1842", "0.1869", 
    "0.1957", "0.2861", "0.3118", "0.3166", "0.3212", "0.3304", 
    "0.3741", "0.383", "0.4112", "Mix"), class = "factor")), .Names = c("Time", 
"Sucrose", "Citric.Acid", "Furmaric.Acid", "Glucose", "Glutamine", 
"Histidine", "Arabinose", "Maleic.Acid", "Serine", "Mix"), .internal.selfref = <pointer: (nil)>, row.names = c(NA, 
6L), class = c("data.table", "data.frame"))

Thanks!

Community
  • 1
  • 1
Roseanna
  • 11
  • 3
  • What programming language is this using? It should have a tag on it so that the question appears on that language's 'unanswered questions' list. – Tom Oakley Nov 06 '17 at 14:22
  • It's r, I thought I'd tagged it – Roseanna Nov 06 '17 at 14:26
  • Your `data.table` solution works for me – pogibas Nov 06 '17 at 14:36
  • 1
    Please provide `dput(head(rawdata))`. You have character or factor columns it seems – David Arenburg Nov 06 '17 at 14:45
  • You've got character values instead of numbers. Check how you've got your data and make sure the types of columns are correct. Use `summary(mydata)` often. Once you've got that correct you'll find answers for how to summarize over a bunch of rows and columns already exist. – Spacedman Nov 06 '17 at 14:54
  • Look at the two dupes above. The first one will help you to convert from factor to numeric and the second have all the info you need in order to solve your problem. – David Arenburg Nov 06 '17 at 14:58

1 Answers1

2

melt your data frame, then tapply mean over the index variables:

> head(rawdata)
  Time Carbon1 Carbon2 Carbon3
1    0    0.10     0.3     0.1
2    0    0.20     0.4     0.1
3   24    0.40     0.6     0.2
4   24    0.35     0.5     0.2
5   48    0.67     0.8     0.3
6   48    0.70     0.8     0.4


> d = reshape2::melt(rawdata,id="Time")
> head(d)
  Time variable value
1    0  Carbon1  0.10
2    0  Carbon1  0.20
3   24  Carbon1  0.40
4   24  Carbon1  0.35
5   48  Carbon1  0.67
6   48  Carbon1  0.70


> tapply(d$value, list(d$Time, d$variable), mean)

   Carbon1 Carbon2 Carbon3
0    0.150    0.35    0.10
24   0.375    0.55    0.20
48   0.685    0.80    0.35
Spacedman
  • 92,590
  • 12
  • 140
  • 224
  • Hi, thanks! I think that should be work, but when I get to the tapply step I get 40 of these warning messages: 'In mean.default(X[[i]], ...) : argument is not numeric or logical: returning NA' . Do you have any idea what could be causing this? – Roseanna Nov 06 '17 at 14:42
  • 1
    Your data isn't numeric. You've got character values in there. *Always* check your data types. – Spacedman Nov 06 '17 at 14:46