1

I am very new to R as I'm sure will be obvious from my question.

I have a data frame (d) that looks like this:

 dput(d[1:24,])
structure(list(year = c(1967, 1967, 1967, 1967, 1967, 1967, 1967, 
1967, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 
1968, 1968, 1969, 1969, 1969, 1969), month = c(5, 6, 7, 8, 9, 
10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4
), temp = c(16.545, 20.2275, 24.9425, 24.704, 21.5625, 20.3833333333333, 
18.085, 16.325, 13.725, 13.095, 13.07, 15.2525, 16.4933333333333, 
20.64, 23.0375, 22.4766666666667, 21.1975, 20.458, 17.9725, 16.1866666666667, 
13.78, 13.155, 12.822, 14.0666666666667), date = structure(c(-976, 
-945, -915, -884, -853, -823, -792, -762, -731, -700, -671, -640, 
-610, -579, -549, -518, -487, -457, -426, -396, -365, -334, -306, 
-275), class = "Date")), .Names = c("year", "month", "temp", 
"date"), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", 
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", 
"20", "21", "22", "23", "24"), class = "data.frame")

From this I have found and stored the average value of "temp" for each month:

jan <- 13.80588 feb <- 13.31874 mar <- 13.35263 apr <- 14.31068 may <- 17.00249 jun <- 20.55553 jul <- 23.55765 aug <- 24.55040 sep <- 22.56809 oct <- 20.15921 nov <- 17.70971 dec <- 15.41233

From each of the values in the column "temp" I would like to subtract the average from the corresponding month and add the result into a new column, i.e.: if(d$month==1),5]<-c(d$temp - jan). If for nrow month ==1, then subtract jan from the temp value in the same row.

I have tried to do this using a for loop:

 for (i in 1:nrow(d)){
+   d[which(d$month[i]==1),5]<-c(d$temp[i] - jan)
+   d[which(d$month[i]==2),5]<-c(d$temp[i] - feb)
+   d[which(d$month[i]==3),5]<-c(d$temp[i] - mar)
+   d[which(d$month[i]==4),5]<-c(d$temp[i] - apr)
+   d[which(d$month[i]==5),5]<-c(d$temp[i] - may)
+   d[which(d$month[i]==6),5]<-c(d$temp[i] - jun)
+   d[which(d$month[i]==7),5]<-c(d$temp[i] - jul)
+   d[which(d$month[i]==8),5]<-c(d$temp[i] - aug)
+   d[which(d$month[i]==9),5]<-c(d$temp[i] - sep)
+   d[which(d$month[i]==10),5]<-c(d$temp[i] - oct)
+   d[which(d$month[i]==11),5]<-c(d$temp[i] - nov)
+   d[which(d$month[i]==12),5]<-c(d$temp[i] - dec)
+ }

There were 50 or more warnings (use warnings() to see the first 50)

This results in the correct month being chosen for each but not the corresponding temp entry, R uses the temp value in the first row for every calculation. I'm sure there must be an easier way!!

Thanks in advance

aizaz
  • 3,056
  • 9
  • 25
  • 57
Ella
  • 11
  • 2

1 Answers1

2

R provides you with much easier ways to do these kinds of things. You can skip over creating all those jan, feb variables, and just use ddply, which lets you split a dataframe into chunks, in this case representing the months:

df = structure(list(year = c(1967, 1967, 1967, 1967, 1967, 1967, 1967, 1967, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 1968, 1969, 1969, 1969, 1969), month = c(5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4 ), temp = c(16.545, 20.2275, 24.9425, 24.704, 21.5625, 20.3833333333333, 18.085, 16.325, 13.725, 13.095, 13.07, 15.2525, 16.4933333333333, 20.64, 23.0375, 22.4766666666667, 21.1975, 20.458, 17.9725, 16.1866666666667, 13.78, 13.155, 12.822, 14.0666666666667), date = structure(c(-976, -945, -915, -884, -853, -823, -792, -762, -731, -700, -671, -640, -610, -579, -549, -518, -487, -457, -426, -396, -365, -334, -306, -275), class = "Date")), .Names = c("year", "month", "temp", "date"), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"), class = "data.frame")

library(plyr)
df = ddply(
  df,
  .(month),
  transform,
  temp_normed=temp-mean(temp)
)

This is an example of a split-apply-combine strategy, which the plyr package is very useful for.

Output:

> df
   year month     temp       date temp_normed
1  1968     1 13.72500 1968-01-01 -0.02750000
2  1969     1 13.78000 1969-01-01  0.02750000
3  1968     2 13.09500 1968-02-01 -0.03000000
4  1969     2 13.15500 1969-02-01  0.03000000
5  1968     3 13.07000 1968-03-01  0.12400000
6  1969     3 12.82200 1969-03-01 -0.12400000
7  1968     4 15.25250 1968-04-01  0.59291667
8  1969     4 14.06667 1969-04-01 -0.59291667
9  1967     5 16.54500 1967-05-01  0.02583333
10 1968     5 16.49333 1968-05-01 -0.02583333
11 1967     6 20.22750 1967-06-01 -0.20625000
12 1968     6 20.64000 1968-06-01  0.20625000
13 1967     7 24.94250 1967-07-01  0.95250000
14 1968     7 23.03750 1968-07-01 -0.95250000
15 1967     8 24.70400 1967-08-01  1.11366667
16 1968     8 22.47667 1968-08-01 -1.11366667
17 1967     9 21.56250 1967-09-01  0.18250000
18 1968     9 21.19750 1968-09-01 -0.18250000
19 1967    10 20.38333 1967-10-01 -0.03733333
20 1968    10 20.45800 1968-10-01  0.03733333
21 1967    11 18.08500 1967-11-01  0.05625000
22 1968    11 17.97250 1968-11-01 -0.05625000
23 1967    12 16.32500 1967-12-01  0.06916667
24 1968    12 16.18667 1968-12-01 -0.06916667
Marius
  • 58,213
  • 16
  • 107
  • 105