I have a data frame that looks something like this
user booking_date origin destination price sale_channel
1 user5 2018-11-01 MAD PMI 58.20 online
2 user7 2018-11-01 DUB MAD 147.50 online
3 user4 2018-11-02 TFS MAD 24.05 online
4 user7 2018-11-01 LPA MAD 37.30 call center
5 user1 2018-11-01 AMS MAD 149.74 travel agency
6 user1 2018-11-01 MAD PMI 19.95 online
Now I want to:
- Filter by a specific date (2018-11-02) which I've done by
df <- df[df$booking_date == '2018-11-02',]
- Group by the user and sum u their total spending on tickets.
I've tried a couple of approaches using group_by or aggregate but all I managed to get was an additional column but not grouping or aggregating the value per user.
- My final output would idealy by something enabling me to extract the 10 users that spend the most on tickets like this for example:
'user1' 'user10' 'user 7' etc.
Example Data
structure(list(user = c("user4", "user5", "user3", "user10",
"user1", "user2", "user7", "user6", "user5", "user6", "user6",
"user7", "user1", "user7", "user4", "user4", "user1", "user7",
"user7", "user8", "user4", "user10", "user4", "user8", "user3",
"user9", "user5", "user2", "user5", "user3", "user3", "user9",
"user6", "user10", "user9", "user5", "user3", "user5", "user7",
"user9", "user2", "user2", "user7", "user10", "user7", "user3",
"user1", "user2", "user8", "user6", "user6", "user10", "user4",
"user7", "user4", "user1", "user4", "user2", "user1", "user7",
"user5", "user4", "user4", "user7", "user10"), booking_date = structure(c(17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837), class = "Date"), origin = c("TFS", "MAD", "TFN", "MAD",
"LYS", "LPA", "MAD", "MAD", "AMS", "MAD", "MAD", "MAD", "MAD",
"LPA", "MAD", "MAD", "MAD", "TXL", "MAD", "MAD", "TXL", "TXL",
"TFS", "MAD", "NAP", "TFS", "PMI", "TLS", "TFS", "NTE", "AMS",
"FUE", "TFN", "CPH", "TFN", "MAD", "SVQ", "SCQ", "SVQ", "MAD",
"PMI", "MAD", "PMI", "MAD", "MAD", "MAD", "MAD", "MAD", "SVQ",
"NCE", "CDG", "MAD", "MAD", "MAD", "MAD", "MAD", "TFN", "LGW",
"LGW", "MAD", "TFN", "MAD", "RNS", "AGP", "CDG"), destination = c("MAD",
"DUB", "MAD", "TFS", "MAD", "MAD", "NAP", "TLS", "MAD", "SCQ",
"LPA", "TFN", "TXL", "MAD", "TFN", "SVQ", "ACE", "MAD", "TLS",
"SCQ", "MAD", "MAD", "MAD", "LPA", "MAD", "MAD", "MAD", "MAD",
"MAD", "MAD", "MAD", "MAD", "MAD", "MAD", "MAD", "CAG", "MAD",
"MAD", "MAD", "LPA", "MAD", "CDG", "MAD", "LPA", "TFS", "TFN",
"PMI", "NAP", "MAD", "MAD", "MAD", "LPA", "LGW", "LPA", "CDG",
"SPC", "MAD", "MAD", "MAD", "SCQ", "MAD", "SVQ", "MAD", "MAD",
"MAD"), price = c(24.0499992371, 41.5400009155, 251.199996948,
15.6000003815, 44.0099983215, 73.8499984741, 115.470001221, 69.4400024414,
81.3899993896, 15.2399997711, 41.1199989319, 274.559997559, 150,
29.3199996948, 332.440002441, 94.9100036621, 97.9800033569, 55.1199989319,
81.7399978638, 4.86000013351, 39.0299987793, 53.6300010681, 39.3199996948,
114.559997559, 65.4000015259, 96.2900009155, 41.75, 28.9099998474,
25.1900005341, 14, 50.3100013733, 47.2999992371, 53.8199996948,
91.3199996948, 77.6800003052, 17.8099994659, 96.5400009155, 27.6900005341,
34.1399993896, 34.3300018311, 15.5600004196, 158.449996948, 45.2999992371,
36.8100013733, 50.6800003052, 62.9000015259, 13.7399997711, 49.4399986267,
66.1100006104, 95.4400024414, 41.8400001526, 69.8300018311, 60.6599998474,
34.3300018311, 97.1399993896, 84.3099975586, 25.8099994659, 185.899993896,
34.8899993896, 304.380004883, 15.1300001144, 29.1399993896, 133.529998779,
208.910003662, 152.960006714), sale_channel = c("online", "online",
"travel agency", "online", "online", "online", "travel agency",
"online", "travel agency", "travel agency", "travel agency",
"online", "travel agency", "online", "online", "travel agency",
"online", "travel agency", "travel agency", "call center", "online",
"online", "online", "travel agency", "travel agency", "travel agency",
"online", "online", "online", "online", "online", "travel agency",
"travel agency", "online", "travel agency", "call center", "online",
"travel agency", "online", "online", "travel agency", "travel agency",
"online", "travel agency", "online", "online", "online", "travel agency",
"online", "travel agency", "travel agency", "online", "online",
"online", "online", "online", "online", "online", "online", "travel agency",
"travel agency", "travel agency", "online", "travel agency",
"online"), total = c(876.0300006858, 250.5000009536, 540.3500022886,
420.1500110628, 424.9299983977, 512.1099882118, 1233.9500045785,
316.9000034332, 250.5000009536, 316.9000034332, 316.9000034332,
1233.9500045785, 424.9299983977, 1233.9500045785, 876.0300006858,
876.0300006858, 424.9299983977, 1233.9500045785, 1233.9500045785,
185.52999830291, 876.0300006858, 420.1500110628, 876.0300006858,
185.52999830291, 540.3500022886, 255.6000022889, 250.5000009536,
512.1099882118, 250.5000009536, 540.3500022886, 540.3500022886,
255.6000022889, 316.9000034332, 420.1500110628, 255.6000022889,
250.5000009536, 540.3500022886, 250.5000009536, 1233.9500045785,
255.6000022889, 512.1099882118, 512.1099882118, 1233.9500045785,
420.1500110628, 1233.9500045785, 540.3500022886, 424.9299983977,
512.1099882118, 185.52999830291, 316.9000034332, 316.9000034332,
420.1500110628, 876.0300006858, 1233.9500045785, 876.0300006858,
424.9299983977, 876.0300006858, 512.1099882118, 424.9299983977,
1233.9500045785, 250.5000009536, 876.0300006858, 876.0300006858,
1233.9500045785, 420.1500110628)), row.names = c(3L,
26L, 37L, 42L, 48L, 82L, 89L, 100L, 112L, 124L, 133L, 144L, 148L,
150L, 166L, 167L, 173L, 182L, 217L, 243L, 259L, 285L, 300L, 304L,
306L, 336L, 341L, 366L, 388L, 397L, 413L, 417L, 423L, 452L, 457L,
473L, 474L, 478L, 482L, 483L, 486L, 496L, 499L, 504L, 510L, 513L,
529L, 531L, 558L, 605L, 615L, 628L, 629L, 664L, 669L, 672L, 684L,
722L, 730L, 752L, 766L, 767L, 779L, 804L, 819L), class = c("data.table",
"data.frame"))
Any recommendations on how to approach this?
Edit: Final Solution
df <- df[df$booking_date == '2018-11-02',]
total <- aggregate(price ~ user, df, sum)
top_10 <- total[order(total$price, decreasing = T), ]
top_10[1:5, 1]
That gives me
"user7" "user4" "user3" "user2" "user1"