0

I have a dataframe with 5 columns: "Date_Time", "Categ_1", "Categ_2", "Price" and "Volume". The dataframe is ordered by Date_Time. For this ordered dataframe, I would like to calculate the Price percentage change within each group of Categ_1 and Categ_2. I am using the code:

A <- dataset %>% 
    group_by(Categ_1, Categ_2) %>%
    mutate(price_prc_change = (Price/lag(Price)-1)*100)

but the results do not work by group. The % change is calculated based on Price column only and does not take into account Categ_1 or Categ_2.

> dput(droplevels(df[1:50, ]))
structure(list(Date_Time = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 5L, 6L, 
6L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 11L, 11L), .Label = c("2020-04-30 15:26:03.8430000", 
"2020-04-30 15:26:14.5870000", "2020-04-30 15:45:11.4690000", 
"2020-04-30 16:00:37.8760000", "2020-04-30 16:00:39.4960000", 
"2020-04-30 16:01:18.9490000", "2020-04-30 16:02:02.6070000", 
"2020-04-30 16:02:23.2270000", "2020-04-30 16:02:25.3250000", 
"2020-04-30 16:07:34.7980000", "2020-04-30 16:08:33.1210000"), class = "factor"), 
    Categ_1 = structure(c(3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    4L, 4L, 2L, 2L, 4L, 1L, 1L, 3L, 1L, 3L, 5L), .Label = c("ED1", 
    "ED2", "ED3", "ED4", "XHE"), class = "factor"), Categ_2 = structure(c(5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
    8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 3L, 2L, 3L, 2L, 4L, 4L, 
    9L, 9L, 7L, 7L, 1L), .Label = c("H01", "H06Q1", "H07Q1", 
    "H09Q2", "H11", "H12", "H12Q3", "H13", "H15Q4", "H18Q1"), class = "factor"), 
    Price = c(-14.88, -14.88, -14.88, -14.88, -14.89, -14.88, 
    -14.88, -14.88, -14.88, -14.89, -14.48, -14.48, -14.48, -14.48, 
    -14.49, -14.5, -14.48, -14.48, -14.48, -14.48, -14.49, -11.38, 
    -11.38, -11.38, -11.38, -11.38, -11.38, -11.38, -11.39, -11.39, 
    -11.38, -11.38, -11.38, -11.38, -11.38, -11.38, -11.38, -11.39, 
    -11.39, -42, -52.2, 9.6, 9.96, 9.6, 9.96, 9.46, 9.46, 17.86, 
    17.86, 9.56), Volume = c(5.8, 5.8, 5.8, 2.6, 5, 5.8, 5.8, 
    5.8, 2.6, 5, 5.5, 5.5, 5.5, 3.5, 5, 25, 5.5, 5.5, 5.5, 3.5, 
    5, 5.4, 0.6, 4.8, 1.2, 4.2, 1.8, 2, 4, 1, 5.4, 0.6, 4.8, 
    1.2, 4.2, 1.8, 2, 4, 1, 1, 1, 1, 1, 1, 1, 0.1, 0.1, 0.1, 
    0.1, 0.1)), row.names = c(NA, 50L), class = "data.frame")
Oty
  • 37
  • 6
  • Could you please share a sample data using the function `dput` on your dataset and pasting the output in your question? Thanks – Ric S Jun 15 '20 at 15:06
  • Included the sample @Ric S! Thanks – Oty Jun 15 '20 at 15:59
  • Almost always, when `group_by` seems not to work, you've loaded `plyr` after `dplyr` and ignored the warning. [See the related FAQ](https://stackoverflow.com/q/26106146/903061), replace your `mutate` with `dplyr::mutate` to test if this is the case. – Gregor Thomas Jun 15 '20 at 16:02
  • You are very right @GregorThomas, it worked perfectly with `dplyr::mutate` ! Thanks a lot! – Oty Jun 15 '20 at 16:07

0 Answers0