0

I am using the grouped Median function (= Median of grouped data) as given in the following link:

how to calculate the median on grouped dataset? (solution by A5C1D2H2I1M1N2O1R2T1)

For simplicity I will stick to the example of a salary range and counts of people who make that amount of money. I have following conundrum: Imagine I am an accountant and I have different categories of employees, so I have the same salary range but 60 columns for salary counts. And I have 6 different companies. So if I were to use this function plainly I would have to repeat the steps 360 times... manually. That is a lot of copy-pasting.

I have tried (my salary range are the row names)

GroupedMedian(1:ncol(mydf), mydf$salary, sep="-")

resulting in the following error:

Error in intervals[1, Midrow] : subscript out of bounds

Does anybody have an idea how to calculate the grouped median on every column and perhaps add it to the table as a row below?

UPDATE As requested dput for my data frame

structure(list(Heu1_C = c(0L, 1L, 13L, 9L, 3L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L), Hi1_C = c(0L, 9L, 18L, 10L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L), Hi2_C = c(0L, 8L, 10L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L), Hi3_R = c(0L, 0L, 2L, 4L, 5L, 2L, 0L, 0L, 0L, 0L, 0L, 
0L), Hi4_I = c(0L, 15L, 9L, 10L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), Hi5_I = c(0L, 4L, 11L, 18L, 2L, 3L, 0L, 0L, 0L, 0L, 0L, 
0L), Ke1_C = c(0L, 8L, 15L, 13L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L), Ke2_C = c(0L, 12L, 10L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), Ke3_I = c(0L, 4L, 12L, 8L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), Ke4_I = c(0L, 5L, 12L, 7L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
Ke5_I = c(0L, 0L, 3L, 4L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
Ke6_R = c(0L, 0L, 2L, 7L, 4L, 2L, 0L, 0L, 0L, 0L, 0L, 0L), 
Ke7_I = c(0L, 9L, 13L, 13L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), Ke8_I = c(0L, 8L, 6L, 13L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), Ke9_I = c(0L, 6L, 12L, 9L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), Ke10_S = c(0L, 2L, 5L, 3L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), Ke11_S = c(0L, 3L, 4L, 5L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), Ku1_A = c(0L, 1L, 4L, 8L, 8L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L), Ku2_C = c(0L, 9L, 12L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), Ku3_I = c(0L, 2L, 8L, 17L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), Ku4_I = c(1L, 6L, 15L, 12L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L), Ku5_I = c(0L, 6L, 14L, 10L, 0L, 0L, 1L, 0L, 0L, 
0L, 0L, 0L), Ku6_I = c(0L, 10L, 10L, 8L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L), Ku7_R = c(0L, 4L, 5L, 13L, 3L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L), Ku8_R = c(0L, 9L, 9L, 10L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L), Ku9_R = c(0L, 0L, 0L, 3L, 3L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L), Ku10_I = c(0L, 4L, 10L, 14L, 1L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L), Ru1_I = c(0L, 13L, 11L, 11L, 7L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L), Ru2_I = c(1L, 8L, 11L, 12L, 1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L), Ru3_C = c(0L, 11L, 13L, 7L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Sch1_C = c(0L, 6L, 7L, 5L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Sch2_I = c(0L, 5L, 8L, 11L, 
4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Sch3_S = c(0L, 6L, 11L, 
10L, 8L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), Sch4_S = c(0L, 2L, 
1L, 2L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se1_C = c(0L, 6L, 
15L, 14L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se2_C = c(1L, 
9L, 10L, 12L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se3_C = c(0L, 
8L, 9L, 8L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se4_S = c(1L, 
1L, 2L, 12L, 11L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se5_S = c(0L, 
1L, 3L, 6L, 14L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Se6_S = c(0L, 
0L, 1L, 6L, 15L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StL1_I = c(0L, 
0L, 5L, 10L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StL2_C = c(0L, 
5L, 8L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StL3_S = c(0L, 
0L, 0L, 2L, 9L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StL4_S = c(0L, 
0L, 0L, 2L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StN1_C = c(0L, 
2L, 12L, 3L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StN2_C = c(0L, 
5L, 16L, 10L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StN3_R = c(0L, 
1L, 2L, 10L, 9L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), StN4_R = c(0L, 
0L, 3L, 9L, 11L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), StN5_R = c(1L, 
0L, 0L, 4L, 6L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), StN6_R = c(0L, 
0L, 0L, 5L, 13L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), StN7_R = c(0L, 
0L, 1L, 4L, 7L, 4L, 0L, 0L, 0L, 0L, 0L, 0L), StN8_S = c(0L, 
0L, 1L, 3L, 8L, 2L, 0L, 0L, 0L, 0L, 0L, 0L), StN9_S = c(0L, 
2L, 4L, 4L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW1_C = c(0L, 
8L, 12L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW2_C = c(0L, 
12L, 16L, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW3_I = c(0L, 
15L, 16L, 10L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW4_I = c(0L, 
6L, 13L, 5L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW5_C = c(0L, 
8L, 12L, 12L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW6_S = c(0L, 
5L, 8L, 8L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), StW7_S = c(0L, 
0L, 1L, 5L, 10L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Heu1_C", 
"Hi1_C", "Hi2_C", "Hi3_R", "Hi4_I", "Hi5_I", "Ke1_C", "Ke2_C", 
"Ke3_I", "Ke4_I", "Ke5_I", "Ke6_R", "Ke7_I", "Ke8_I", "Ke9_I", 
"Ke10_S", "Ke11_S", "Ku1_A", "Ku2_C", "Ku3_I", "Ku4_I", "Ku5_I", 
"Ku6_I", "Ku7_R", "Ku8_R", "Ku9_R", "Ku10_I", "Ru1_I", "Ru2_I", 
"Ru3_C", "Sch1_C", "Sch2_I", "Sch3_S", "Sch4_S", "Se1_C", "Se2_C", 
"Se3_C", "Se4_S", "Se5_S", "Se6_S", "StL1_I", "StL2_C", "StL3_S", 
"StL4_S", "StN1_C", "StN2_C", "StN3_R", "StN4_R", "StN5_R", "StN6_R", 
"StN7_R", "StN8_S", "StN9_S", "StW1_C", "StW2_C", "StW3_I", "StW4_I", 
"StW5_C", "StW6_S", "StW7_S"), class = "data.frame", row.names = c("0 - 1", 
"1 - 2", "2 - 3", "3 - 4", "4 - 5", "5 - 6", "6 - 7", "7 - 8", 
"8 - 9", "9 - 10", "10 - 11", "11 - 12"))
Community
  • 1
  • 1
Gmichael
  • 526
  • 1
  • 5
  • 16
  • Tried with apply and creating an object<-1:nrow(mydf), same problems. – Gmichael May 22 '17 at 15:41
  • The `GroupedMedian` function you mention takes as first argument the counts for each salary range whereas you give it a sequence from 1 to your number of columns. You should include a `dput()` of your data in your question to show how your dataframe is structured. – Lamia May 22 '17 at 15:55
  • Structure can be explained very simply: First column=range of salaries, second to nth column: different companies for which I want an individual grouped median. – Gmichael May 23 '17 at 06:35
  • You just need to give to the function the correct input: `GroupedMedian(df[,i],rownames(df),sep="-")` gives you the grouped median for column i. To get it for all columns at once, do `apply(df,2,GroupedMedian,rownames(df),sep="-")` – Lamia May 23 '17 at 14:31

0 Answers0