0

I have this data frame: There are 34 letters from A to X and from a to k. Each letter represents a time period so the first period of the day is A and the last period of the day is k. Given that this is financial data, not all letters are present somedays C might be missing, other days I might have only A and B, and this of that nature

set.seed(42)
day_month = rep(seq.Date(as.Date("2006-04-17"), as.Date("2006-04-26"), "day"),35)
day_month = day_month[order(day_month)]
let = rep(c(LETTERS[1:24],letters[1:11]),10)
High = rnorm(350, 14000, 250)
Low = rnorm(350, 13000, 300)
df <- data.frame(day_month, let, High, Low)
df <- df[-1,]
df <- df[-349,]

What I need is to create a variable with the values of high_a, low_a, high_A, low_A, and so on.

The first approach I use was:

df <- df %>% group_by(day_month) %>% summarise(day_month = first(day_month),
                                                                          high_A = nth(High, 2),
                                                                          low_A = nth(Low, 2),
                                                                          high_B = nth(High, 4),
                                                                          low_B = nth(Low, 4),
                                                                          high_D = nth(High, 7),
                                                                          low_D = nth(Low, 7),
                                                                          high_E = nth(High, 9),
                                                                          low_E = nth(Low, 9),
                                                                          high_F = nth(High, 11),
                                                                          low_F = nth(Low, 11),
                                                                          high_G = nth(High, 13),
                                                                          low_G = nth(Low, 13),
                                                                          high_H = nth(High, 15),
                                                                          low_H = nth(Low, 15),
                                                                          high_I = nth(High, 17),
                                                                          low_I = nth(Low, 17),
                                                                          high_J = nth(High, 19),
                                                                          low_J = nth(Low, 19),
                                                                          high_K = nth(High, 21),
                                                                          low_K = nth(Low, 21),
                                                                          high_L = nth(High, 22),
                                                                          low_L = nth(Low, 22),
                                                                          high_M = nth(High, 23),
                                                                          low_M = nth(Low, 23),
                                                                          high_N = nth(High, 24),
                                                                          low_N = nth(Low, 24),
                                                                          high_O = nth(High, 25),
                                                                          low_O = nth(Low, 25),
                                                                          high_P = nth(High, 26),
                                                                          low_P = nth(Low, 26),
                                                                          high_Q = nth(High, 27),
                                                                          low_Q = nth(Low, 27),
                                                                          high_R = nth(High, 28),
                                                                          low_R = nth(Low, 28),
                                                                          high_S = nth(High, 29),
                                                                          low_S = nth(Low, 29),
                                                                          high_T = nth(High, 30),
                                                                          low_T = nth(Low, 30),
                                                                          high_U = nth(High, 31),
                                                                          low_U = nth(Low, 31),
                                                                          high_V = nth(High, 32),
                                                                          low_V = nth(Low, 32),
                                                                          high_W = nth(High, 33),
                                                                          low_W = nth(Low, 33),
                                                                          high_X = nth(High, 34),
                                                                          low_X = nth(Low, 34),
                                                                          high_a = nth(High, 1),
                                                                          low_a = nth(Low, 1),
                                                                          high_b = nth(High, 3),
                                                                          low_b = nth(Low, 3),
                                                                          high_c = nth(High, 5),
                                                                          low_c = nth(Low, 5),
                                                                          high_d = nth(High, 6),
                                                                          low_d = nth(Low, 6),
                                                                          high_e = nth(High, 8),
                                                                          low_e = nth(Low, 8),
                                                                          high_f = nth(High, 9),
                                                                          low_f = nth(Low, 9),
                                                                          high_g = nth(High, 12),
                                                                          low_g = nth(Low, 12),
                                                                          high_h = nth(High, 14),
                                                                          low_h = nth(Low, 14),
                                                                          high_i = nth(High, 16),
                                                                          low_i = nth(Low, 16),
                                                                          high_j = nth(High, 18),
                                                                          low_j = nth(Low, 18),
                                                                          high_k = nth(High, 20),
                                                                          low_k = nth(Low, 20))

This code works but given that some days do not have all observations the data might be inconsistent. I would like to find a function in which I can define the letter I want to use as a condition instead of the row number.

I tried using the same code but instead of nth using subset in the form of: high_A = subset(High, let == "A") But this code creates a data frame without the days with missing observation.

df_2 <- df %>% group_by(day_month) %>% summarise(day_month = first(day_month),
                                                                          high_A = subset(High, let == "A"),
                                                                          low_A = subset(Low, let == "A"),
                                                                          high_B = subset(High, let == "B"),
                                                                          low_B = subset(Low, let == "B"),
                                                                          high_C = subset(High, let == "C"),
                                                                          low_C = subset(Low, let == "C"),
                                                                          high_D = subset(High, let == "D"),
                                                                          low_D = subset(Low, let == "D"),
                                                                          high_E = subset(High, let == "E"),
                                                                          low_E = subset(Low, let == "E"),
                                                                          high_F = subset(High, let == "F"),
                                                                          low_F = subset(Low, let == "F"),
                                                                          high_G = subset(High, let == "G"),
                                                                          low_G = subset(Low, let == "G"),
                                                                          high_H = subset(High, let == "H"),
                                                                          low_H = subset(Low, let == "H"),
                                                                          high_I = subset(High, let == "I"),
                                                                          low_I = subset(Low, let == "I"),
                                                                          high_J = subset(High, let == "J"),
                                                                          low_J = subset(Low, let == "J"),
                                                                          high_K = subset(High, let == "K"),
                                                                          low_K = subset(Low, let == "K"),
                                                                          high_L = subset(High, let == "L"),
                                                                          low_L = subset(Low, let == "L"),
                                                                          high_M = subset(High, let == "M"),
                                                                          low_M = subset(Low, let == "M"),
                                                                          high_N = subset(High, let == "N"),
                                                                          low_N = subset(Low, let == "N"),
                                                                          high_O = subset(High, let == "O"),
                                                                          low_O = subset(Low, let == "O"),
                                                                          high_P = subset(High, let == "P"),
                                                                          low_P = subset(Low, let == "P"),
                                                                          high_Q = subset(High, let == "Q"),
                                                                          low_Q = subset(Low, let == "Q"),
                                                                          high_R = subset(High, let == "R"),
                                                                          low_R = subset(Low, let == "R"),
                                                                          high_S = subset(High, let == "S"),
                                                                          low_S = subset(Low, let == "S"),
                                                                          high_T = subset(High, let == "T"),
                                                                          low_T = subset(Low, let == "T"),
                                                                          high_U = subset(High, let == "U"),
                                                                          low_U = subset(Low, let == "U"),
                                                                          high_V = subset(High, let == "V"),
                                                                          low_V = subset(Low, let == "V"),
                                                                          high_W = subset(High, let == "W"),
                                                                          low_W = subset(Low, let == "W"),
                                                                          high_X = subset(High, let == "X"),
                                                                          low_X = subset(Low, let == "X"),
                                                                          high_a = subset(High, let == "a"),
                                                                          low_a = subset(Low, let == "a"),
                                                                          high_b = subset(High, let == "b"),
                                                                          low_b = subset(Low, let == "b"),
                                                                          high_c = subset(High, let == "c"),
                                                                          low_c = subset(Low, let == "c"),
                                                                          high_d = subset(High, let == "d"),
                                                                          low_d = subset(Low, let == "d"),
                                                                          high_e = subset(High, let == "e"),
                                                                          low_e = subset(Low, let == "e"),
                                                                          high_f = subset(High, let == "f"),
                                                                          low_f = subset(Low, let == "f"),
                                                                          high_g = subset(High, let == "g"),
                                                                          low_g = subset(Low, let == "g"),
                                                                          high_h = subset(High, let == "h"),
                                                                          low_h = subset(Low, let == "h"),
                                                                          high_i = subset(High, let == "i"),
                                                                          low_i = subset(Low, let == "i"),
                                                                          high_j = subset(High, let == "j"),
                                                                          low_j = subset(Low, let == "j"),
                                                                          high_k = subset(High, let == "k"),
                                                                          low_k = subset(Low, let == "k"))

Is there any way I can get the variables I need by getting the values of High and Low base on the let column?

Pastor Soto
  • 336
  • 2
  • 14
  • For ease of understanding and solving, instead of sharing example data with 350 rows and a bunch of letter columns, you could make a minimal example with 20 rows and 3 letter columns? You can still specify that the solution needs to scale up to more columns easily, but it will be easier to see and understand and test solutions with a minimal eample. – Gregor Thomas Jul 23 '21 at 01:47

1 Answers1

1

I think a better approach would be to get the data in wide format using pivot_wider -

library(dplyr)
library(tidyr)

df %>%
  arrange(day_month, let) %>%
  pivot_wider(names_from = let, values_from = c(High, Low)) %>%
  select(day_month, order(sub('.*_', '', names(.))))

# day_month  High_a  Low_a High_A  Low_A High_b  Low_b High_B  Low_B High_c
#   <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1 2006-04-17 14474. 13434.    NA     NA  13892. 12793. 13859. 12750. 13936.
# 2 2006-04-18 14071. 12427. 13571. 12935. 13908. 12789. 13804. 12909. 14046.
# 3 2006-04-19 13722. 13285. 13739. 13239. 13785. 12824. 13977. 12564. 13717.
# 4 2006-04-20 14045. 13119. 14026. 12904. 14142. 12932. 13894. 13486. 13877.
# 5 2006-04-21 14148. 13673. 13978. 12947. 14358. 13612. 13778. 12965. 13752.
# 6 2006-04-22 14032. 13413. 13962. 13276. 13500. 12532. 13638. 12638. 14083.
# 7 2006-04-23 13933. 13122. 13700. 13176. 13902. 12562. 14509. 13027. 14337.
# 8 2006-04-24 14015. 13062. 14512. 13048. 14143. 13300. 14254. 12870. 14011.
# 9 2006-04-25 13963. 13391. 14206. 13140. 13986. 12550. 13584. 13172. 14121.
#10 2006-04-26 13868. 12893. 13896. 13080. 13995. 12737. 13804. 13323. 14168.
# … with 61 more variables: Low_c <dbl>, High_C <dbl>, Low_C <dbl>,
#   High_d <dbl>, Low_d <dbl>, High_D <dbl>, Low_D <dbl>, High_e <dbl>,
#   Low_e <dbl>, High_E <dbl>, Low_E <dbl>, High_f <dbl>, Low_f <dbl>,
#   High_F <dbl>, Low_F <dbl>, High_g <dbl>, Low_g <dbl>, High_G <dbl>,
#   Low_G <dbl>, High_h <dbl>, Low_h <dbl>, High_H <dbl>, Low_H <dbl>,
#   High_i <dbl>, Low_i <dbl>, High_I <dbl>, Low_I <dbl>, High_j <dbl>,
#   Low_j <dbl>, High_J <dbl>, Low_J <dbl>, High_k <dbl>, Low_k <dbl>,
#   High_K <dbl>, Low_K <dbl>, High_L <dbl>, Low_L <dbl>, High_M <dbl>,
#   Low_M <dbl>, High_N <dbl>, Low_N <dbl>, High_O <dbl>, Low_O <dbl>,
#   High_P <dbl>, Low_P <dbl>, High_Q <dbl>, Low_Q <dbl>, High_R <dbl>,
#   Low_R <dbl>, High_S <dbl>, Low_S <dbl>, High_T <dbl>, Low_T <dbl>,
#   High_U <dbl>, Low_U <dbl>, High_V <dbl>, Low_V <dbl>, High_W <dbl>,
#   Low_W <dbl>, High_X <dbl>, Low_X <dbl>

This would make the values missing for the date as NA.

Ronak Shah
  • 377,200
  • 20
  • 156
  • 213