4

Per each day (TRADEDATE) I have an entry which is uniquely identified by BOOK and COMMODITY. Each entry has a Present Value (PV) which changes daily. I would like to get a column with the difference of the PV to the last calendar day of the previous month. I solved it by means of a loop but I was wondering if anybody can suggest a more elegant (and faster solution):

library(data.table)

bwTab
COMMODITY       BOOK   TRADEDATE PV Desired Column
   1:      CASH HS_OPT_GEN 2012-09-30 66669.68  NA
   2:      CASH HS_OPT_GEN 2012-10-01 76333.83  9664.15
   3:      CASH HS_OPT_GEN 2012-10-02 76333.83  9664.15
   4:      CASH HS_OPT_GEN 2012-10-03 76333.83  9664.15
   5:      CASH HS_OPT_GEN 2012-10-04 76333.83  9664.15
  ---     
3050:       OIL HO_OIL_FIN 2012-09-30 21330.55  NA
  ---     
3066:       OIL HO_OIL_FIN 2012-10-26 42661.28  21330.73
3067:       OIL HO_OIL_FIN 2012-10-27 21330.69  0.14
3068:       OIL HO_OIL_FIN 2012-10-28 21330.68  0.13
3069:       OIL HO_OIL_FIN 2012-10-29 21330.78  0.23

# Here is my solution

# Define a function for last day of previous month
pme <- function(date) {as.Date(paste("01",month(date),year(date),sep="."),"%d.%m.%Y")-1}

difftopme <- function(a) {

if (nrow(bwTab[COMMODITY==a[,COMMODITY] & BOOK==a[,BOOK] & TRADEDATE==pme(a[,STICHTAG]),])==0) {NA} else {
    a[,PV]-bwTab[COMMODITY==a[,COMMODITY] & BOOK==a[,BOOK] & TRADEDATE==pme(a[,TRADEDATE]),PV] }
}

for (i in 1:nrow(bwTab)){a <- difftopme(bwTab[i,]) ; if (i==1){diffPVme <- a} else {diffPVme <- c(a,diffPVme)}}


#########################
dput(bwTab[1000:1010,])
structure(list(COMMODITY = c("ELEC", "ELEC", "ELEC", "ELEC", 
"ELEC", "ELEC", "ELEC", "ELEC", "ELEC", "ELEC", "ELEC"), BOOK = c("HS_OUK_MKT", 
"HS_OUK_MKT", "HS_OUK_MKT", "HS_OUK_MKT", "HS_OUV_EVO", "HS_OUV_EVO", 
"HS_OUV_EVO", "HS_OUV_EVO", "HS_OUV_EVO", "HS_OUV_EVO", "HS_OUV_EVO"
), STICHTAG = structure(c(1353798000, 1353970800, 1354057200, 
1354143600, 1348956000, 1349042400, 1349128800, 1349215200, 1349301600, 
1349388000, 1349474400), class = c("POSIXct", "POSIXt"), tzone = ""), 
    BROKERAGE = c(123406.66, 61791.17, 62229.17, 62492.57, 0, 
    0, 0, 0, 0, 0, 0), DV = c(72873524.86, 38096138.75, 38283589.07, 
    38236199.05, 23171721.81, 23178889.59, 23187553.93, 23187426.98, 
    23173154.67, 23149439.13, 23149469.88), REALIZED = c(47002372.1, 
    23501186.05, 23501186.05, 23501186.05, 22961528, 22961528, 
    22961528, 22961528, 22961528, 22961528, 22961528), PV = c(25871152.76, 
    14594952.7, 14782403.02, 14735013, 210193.81, 217361.59, 
    226025.93, 225898.98, 211626.67, 187911.13, 187941.88), PV_ND = c(25973196.64, 
    14654807.46, 14843080.44, 14795220.35, 210222.01, 217386.44, 
    226048.76, 225920.76, 211641.41, 187919.95, 187949.85), BROKER_R = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CREDIT_R = c(0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0), STRUCTURE_R = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0), BROKER_UR_D = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    ), CREDIT_UR_D = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), STRUCTURE_UR_D = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), BROKER_UN_UND = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), CREDIT_UN_UND = c(0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0), STRUCTURE_UN_UND = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0)), .Names = c("COMMODITY", "BOOK", "STICHTAG", 
"BROKERAGE", "DV", "REALIZED", "PV", "PV_ND", "BROKER_R", "CREDIT_R", 
"STRUCTURE_R", "BROKER_UR_D", "CREDIT_UR_D", "STRUCTURE_UR_D", 
"BROKER_UN_UND", "CREDIT_UN_UND", "STRUCTURE_UN_UND"), sorted = c("COMMODITY", 
"BOOK", "STICHTAG"), class = c("data.table", "data.frame"), row.names = c(NA, 
-11L), .internal.selfref = <pointer: 0x014024a0>)
ddg
  • 2,493
  • 2
  • 20
  • 23
  • WTF? You use `data.table` (not really necessary with only 3k rows) and than loop over the rows with a `for` loop? Can you please use `dput` to provide a representive part of the data.table? – Roland Dec 19 '12 at 14:19
  • also please clean up your sloppy code ;) if you are too lazy to do it yourself, [watch this video](http://www.screenr.com/Vap8) – Anthony Damico Dec 19 '12 at 14:27
  • Hi Roland, I copied the result of the dput above. I am using the data.table mostly bacause I am used to it (I find the syntax very convenient). – ddg Dec 19 '12 at 15:17
  • the `bwTab` that you have dput is *not* the same as what you have in your example. (ie, no `TRADEDATE`) – Ricardo Saporta Dec 19 '12 at 18:42
  • Hi Ricardo, you are right. I tried to put a simplified example. The TRADEDATE would be the STICHTAG in the dput. – ddg Dec 20 '12 at 08:02

1 Answers1

2
# the zoo library has a year-month class,
# which makes it easy to find the month's end
library(zoo)

# just use the first eight records of mtcars as an example
x <- mtcars[ 1:8 , ]

# as an example,
# stick a bunch of dates onto the x data frame
x$TRADEDATE <- 
    c( '2012-10-31' , '2012-11-17' , '2012-11-30' , '2012-12-15' , '2012-12-13' , '2012-12-15' , '2012-08-31' , '2012-09-22' )

# calculate each date's end-of-month of the previous month
# just subtract by 1/12th to get this!
month.ends <- 
    as.Date( 
        as.yearmon( 
            x$TRADEDATE 
        ) - 1/12 , 
        # frac = 1 indicates "the end of this period" --
        # frac = 0 would be the start.
        frac = 1 
    )

# isolate the rows that exactly match the month end date for each given date
month.end.rows <- 
    # convert the rows to an integer vector
    as.integer( 
        # figure out which rows contain the `month.ends` for every record in the data table
        lapply( 
            # run each value in `month.ends` through..
            month.ends , 
            # this new simple which( x == y ) function
            function( x , y ) which( x == y ) , 
            # where `y` is the full contents of the TRADEDATE column of your data frame
            as.Date( x$TRADEDATE ) 
        ) 
    )

# note that month.end.rows' length == nrow( x )
stopifnot( length( month.end.rows ) == nrow( x ) )

# now just subtract something from the same variable using its respective month end date
x[ , "desired.column" ] <- x[ , "carb" ] - x[ month.end.rows , "carb" ]
Anthony Damico
  • 5,779
  • 7
  • 46
  • 77
  • Hi Anthony, many thanks for your answer however it is not quite what I am after. Per each day (TRADEDATE) I have an entry uniquely identified by BOOK and COMMODITY. Each entry has a Present Value (PV) which evolves daily. I would like to get the difference in the PV of each entry each day compared to the last calendar day of the previous month. What your code gives is the days difference to the month-end given a logical test. – ddg Dec 19 '12 at 15:40
  • @ddg that was not clear. [give this a read](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) ;) – Anthony Damico Dec 19 '12 at 17:02