I have this data frame: There are 34 letters from A to X and from a to k. Each letter represents a time period so the first period of the day is A and the last period of the day is k. Given that this is financial data, not all letters are present somedays C might be missing, other days I might have only A and B, and this of that nature
set.seed(42)
day_month = rep(seq.Date(as.Date("2006-04-17"), as.Date("2006-04-26"), "day"),35)
day_month = day_month[order(day_month)]
let = rep(c(LETTERS[1:24],letters[1:11]),10)
High = rnorm(350, 14000, 250)
Low = rnorm(350, 13000, 300)
df <- data.frame(day_month, let, High, Low)
df <- df[-1,]
df <- df[-349,]
What I need is to create a variable with the values of high_a, low_a, high_A, low_A, and so on.
The first approach I use was:
df <- df %>% group_by(day_month) %>% summarise(day_month = first(day_month),
high_A = nth(High, 2),
low_A = nth(Low, 2),
high_B = nth(High, 4),
low_B = nth(Low, 4),
high_D = nth(High, 7),
low_D = nth(Low, 7),
high_E = nth(High, 9),
low_E = nth(Low, 9),
high_F = nth(High, 11),
low_F = nth(Low, 11),
high_G = nth(High, 13),
low_G = nth(Low, 13),
high_H = nth(High, 15),
low_H = nth(Low, 15),
high_I = nth(High, 17),
low_I = nth(Low, 17),
high_J = nth(High, 19),
low_J = nth(Low, 19),
high_K = nth(High, 21),
low_K = nth(Low, 21),
high_L = nth(High, 22),
low_L = nth(Low, 22),
high_M = nth(High, 23),
low_M = nth(Low, 23),
high_N = nth(High, 24),
low_N = nth(Low, 24),
high_O = nth(High, 25),
low_O = nth(Low, 25),
high_P = nth(High, 26),
low_P = nth(Low, 26),
high_Q = nth(High, 27),
low_Q = nth(Low, 27),
high_R = nth(High, 28),
low_R = nth(Low, 28),
high_S = nth(High, 29),
low_S = nth(Low, 29),
high_T = nth(High, 30),
low_T = nth(Low, 30),
high_U = nth(High, 31),
low_U = nth(Low, 31),
high_V = nth(High, 32),
low_V = nth(Low, 32),
high_W = nth(High, 33),
low_W = nth(Low, 33),
high_X = nth(High, 34),
low_X = nth(Low, 34),
high_a = nth(High, 1),
low_a = nth(Low, 1),
high_b = nth(High, 3),
low_b = nth(Low, 3),
high_c = nth(High, 5),
low_c = nth(Low, 5),
high_d = nth(High, 6),
low_d = nth(Low, 6),
high_e = nth(High, 8),
low_e = nth(Low, 8),
high_f = nth(High, 9),
low_f = nth(Low, 9),
high_g = nth(High, 12),
low_g = nth(Low, 12),
high_h = nth(High, 14),
low_h = nth(Low, 14),
high_i = nth(High, 16),
low_i = nth(Low, 16),
high_j = nth(High, 18),
low_j = nth(Low, 18),
high_k = nth(High, 20),
low_k = nth(Low, 20))
This code works but given that some days do not have all observations the data might be inconsistent. I would like to find a function in which I can define the letter I want to use as a condition instead of the row number.
I tried using the same code but instead of nth
using subset
in the form of: high_A = subset(High, let == "A")
But this code creates a data frame without the days with missing observation.
df_2 <- df %>% group_by(day_month) %>% summarise(day_month = first(day_month),
high_A = subset(High, let == "A"),
low_A = subset(Low, let == "A"),
high_B = subset(High, let == "B"),
low_B = subset(Low, let == "B"),
high_C = subset(High, let == "C"),
low_C = subset(Low, let == "C"),
high_D = subset(High, let == "D"),
low_D = subset(Low, let == "D"),
high_E = subset(High, let == "E"),
low_E = subset(Low, let == "E"),
high_F = subset(High, let == "F"),
low_F = subset(Low, let == "F"),
high_G = subset(High, let == "G"),
low_G = subset(Low, let == "G"),
high_H = subset(High, let == "H"),
low_H = subset(Low, let == "H"),
high_I = subset(High, let == "I"),
low_I = subset(Low, let == "I"),
high_J = subset(High, let == "J"),
low_J = subset(Low, let == "J"),
high_K = subset(High, let == "K"),
low_K = subset(Low, let == "K"),
high_L = subset(High, let == "L"),
low_L = subset(Low, let == "L"),
high_M = subset(High, let == "M"),
low_M = subset(Low, let == "M"),
high_N = subset(High, let == "N"),
low_N = subset(Low, let == "N"),
high_O = subset(High, let == "O"),
low_O = subset(Low, let == "O"),
high_P = subset(High, let == "P"),
low_P = subset(Low, let == "P"),
high_Q = subset(High, let == "Q"),
low_Q = subset(Low, let == "Q"),
high_R = subset(High, let == "R"),
low_R = subset(Low, let == "R"),
high_S = subset(High, let == "S"),
low_S = subset(Low, let == "S"),
high_T = subset(High, let == "T"),
low_T = subset(Low, let == "T"),
high_U = subset(High, let == "U"),
low_U = subset(Low, let == "U"),
high_V = subset(High, let == "V"),
low_V = subset(Low, let == "V"),
high_W = subset(High, let == "W"),
low_W = subset(Low, let == "W"),
high_X = subset(High, let == "X"),
low_X = subset(Low, let == "X"),
high_a = subset(High, let == "a"),
low_a = subset(Low, let == "a"),
high_b = subset(High, let == "b"),
low_b = subset(Low, let == "b"),
high_c = subset(High, let == "c"),
low_c = subset(Low, let == "c"),
high_d = subset(High, let == "d"),
low_d = subset(Low, let == "d"),
high_e = subset(High, let == "e"),
low_e = subset(Low, let == "e"),
high_f = subset(High, let == "f"),
low_f = subset(Low, let == "f"),
high_g = subset(High, let == "g"),
low_g = subset(Low, let == "g"),
high_h = subset(High, let == "h"),
low_h = subset(Low, let == "h"),
high_i = subset(High, let == "i"),
low_i = subset(Low, let == "i"),
high_j = subset(High, let == "j"),
low_j = subset(Low, let == "j"),
high_k = subset(High, let == "k"),
low_k = subset(Low, let == "k"))
Is there any way I can get the variables I need by getting the values of High and Low base on the let
column?