I have a dataset containing purchases made by different households across different retailers. For eg
Example Dataset
using dput()
structure(list(Household = c(76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 126,
126, 126, 126, 126, 126, 126, 126, 126), Trip ID = c(1032497498L,
1025776063L, 1029419047L, 1030418100L, 1029502602L, 1034153056L,
1027035051L, 1027533991L, 1033515804L, 1032998207L, 1032066227L,
1028192785L, 1033419039L, 1028730296L, 1027388499L, 1030652869L,
1025638394L, 1034032718L, 1034032718L, 1025678520L, 1029490031L,
1029898838L, 1028024134L, 1030324171L, 1031983761L, 1031983761L,
1033767148L, 1023953965L, 1030954113L, 1030954113L, 1027392968L
), purchase_date = structure(c(1L, 2L, 23L, 50L, 52L, 74L, 77L,
94L, 148L, 158L, 176L, 179L, 196L, 211L, 224L, 246L, 271L, 286L,
286L, 309L, 329L, 346L, 2L, 9L, 46L, 46L, 50L, 58L, 66L, 66L,
68L), .Label = c("2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04",
"2012-01-05", "2012-01-06", "2012-01-07", "2012-01-08", "2012-01-09",
"2012-01-10", "2012-01-11", "2012-01-12", "2012-01-13", "2012-01-14",
"2012-01-15", "2012-01-16", "2012-01-17", "2012-01-18", "2012-01-19",
"2012-01-20", "2012-01-21", "2012-01-22", "2012-01-23", "2012-01-24",
"2012-01-25", "2012-01-26", "2012-01-27", "2012-01-28", "2012-01-29",
"2012-01-30", "2012-01-31", "2012-02-01", "2012-02-02", "2012-02-03",
"2012-02-04", "2012-02-05", "2012-02-06", "2012-02-07", "2012-02-08",
"2012-02-09", "2012-02-10", "2012-02-11", "2012-02-12", "2012-02-13",
"2012-02-14", "2012-02-15", "2012-02-16", "2012-02-17", "2012-02-18",
"2012-02-19", "2012-02-20", "2012-02-21", "2012-02-22", "2012-02-23",
"2012-02-24", "2012-02-25", "2012-02-26", "2012-02-27", "2012-02-28",
"2012-02-29", "2012-03-01", "2012-03-02", "2012-03-03", "2012-03-04",
"2012-03-05", "2012-03-06", "2012-03-07", "2012-03-08", "2012-03-09",
"2012-03-10", "2012-03-11", "2012-03-12", "2012-03-13", "2012-03-14",
"2012-03-15", "2012-03-16", "2012-03-17", "2012-03-18", "2012-03-19",
"2012-03-20", "2012-03-21", "2012-03-22", "2012-03-23", "2012-03-24",
"2012-03-25", "2012-03-26", "2012-03-27", "2012-03-28", "2012-03-29",
"2012-03-30", "2012-03-31", "2012-04-01", "2012-04-02", "2012-04-03",
"2012-04-04", "2012-04-05", "2012-04-06", "2012-04-07", "2012-04-08",
"2012-04-09", "2012-04-10", "2012-04-11", "2012-04-12", "2012-04-13",
"2012-04-14", "2012-04-15", "2012-04-16", "2012-04-17", "2012-04-18",
"2012-04-19", "2012-04-20", "2012-04-21", "2012-04-22", "2012-04-23",
"2012-04-24", "2012-04-25", "2012-04-26", "2012-04-27", "2012-04-28",
"2012-04-29", "2012-04-30", "2012-05-01", "2012-05-02", "2012-05-03",
"2012-05-04", "2012-05-05", "2012-05-06", "2012-05-07", "2012-05-08",
"2012-05-09", "2012-05-10", "2012-05-11", "2012-05-12", "2012-05-13",
"2012-05-14", "2012-05-15", "2012-05-16", "2012-05-17", "2012-05-18",
"2012-05-19", "2012-05-20", "2012-05-21", "2012-05-22", "2012-05-23",
"2012-05-24", "2012-05-25", "2012-05-26", "2012-05-27", "2012-05-28",
"2012-05-29", "2012-05-30", "2012-05-31", "2012-06-01", "2012-06-02",
"2012-06-03", "2012-06-04", "2012-06-05", "2012-06-06", "2012-06-07",
"2012-06-08", "2012-06-09", "2012-06-10", "2012-06-11", "2012-06-12",
"2012-06-13", "2012-06-14", "2012-06-15", "2012-06-16", "2012-06-17",
"2012-06-18", "2012-06-19", "2012-06-20", "2012-06-21", "2012-06-22",
"2012-06-23", "2012-06-24", "2012-06-25", "2012-06-26", "2012-06-27",
"2012-06-28", "2012-06-29", "2012-06-30", "2012-07-01", "2012-07-02",
"2012-07-03", "2012-07-04", "2012-07-05", "2012-07-06", "2012-07-07",
"2012-07-08", "2012-07-09", "2012-07-10", "2012-07-11", "2012-07-12",
"2012-07-13", "2012-07-14", "2012-07-15", "2012-07-16", "2012-07-17",
"2012-07-18", "2012-07-19", "2012-07-20", "2012-07-21", "2012-07-22",
"2012-07-23", "2012-07-24", "2012-07-25", "2012-07-26", "2012-07-27",
"2012-07-28", "2012-07-29", "2012-07-30", "2012-07-31", "2012-08-01",
"2012-08-02", "2012-08-03", "2012-08-04", "2012-08-05", "2012-08-06",
"2012-08-07", "2012-08-08", "2012-08-09", "2012-08-10", "2012-08-11",
"2012-08-12", "2012-08-13", "2012-08-14", "2012-08-15", "2012-08-16",
"2012-08-17", "2012-08-18", "2012-08-19", "2012-08-20", "2012-08-21",
"2012-08-22", "2012-08-23", "2012-08-24", "2012-08-25", "2012-08-26",
"2012-08-27", "2012-08-28", "2012-08-29", "2012-08-30", "2012-08-31",
"2012-09-01", "2012-09-02", "2012-09-03", "2012-09-04", "2012-09-05",
"2012-09-06", "2012-09-07", "2012-09-08", "2012-09-09", "2012-09-10",
"2012-09-11", "2012-09-12", "2012-09-13", "2012-09-14", "2012-09-15",
"2012-09-16", "2012-09-17", "2012-09-18", "2012-09-19", "2012-09-20",
"2012-09-21", "2012-09-22", "2012-09-23", "2012-09-24", "2012-09-25",
"2012-09-26", "2012-09-27", "2012-09-28", "2012-09-29", "2012-09-30",
"2012-10-01", "2012-10-02", "2012-10-03", "2012-10-04", "2012-10-05",
"2012-10-06", "2012-10-07", "2012-10-08", "2012-10-09", "2012-10-10",
"2012-10-11", "2012-10-12", "2012-10-13", "2012-10-14", "2012-10-15",
"2012-10-16", "2012-10-17", "2012-10-18", "2012-10-19", "2012-10-20",
"2012-10-21", "2012-10-22", "2012-10-23", "2012-10-24", "2012-10-25",
"2012-10-26", "2012-10-27", "2012-10-28", "2012-10-29", "2012-10-30",
"2012-10-31", "2012-11-01", "2012-11-02", "2012-11-03", "2012-11-04",
"2012-11-05", "2012-11-06", "2012-11-07", "2012-11-08", "2012-11-09",
"2012-11-10", "2012-11-11", "2012-11-12", "2012-11-13", "2012-11-14",
"2012-11-15", "2012-11-16", "2012-11-17", "2012-11-18", "2012-11-19",
"2012-11-20", "2012-11-21", "2012-11-22", "2012-11-23", "2012-11-24",
"2012-11-25", "2012-11-26", "2012-11-27", "2012-11-28", "2012-11-29",
"2012-11-30", "2012-12-01", "2012-12-02", "2012-12-03", "2012-12-04",
"2012-12-05", "2012-12-06", "2012-12-07", "2012-12-08", "2012-12-09",
"2012-12-10", "2012-12-11", "2012-12-12", "2012-12-13", "2012-12-14",
"2012-12-15", "2012-12-16", "2012-12-17", "2012-12-18", "2012-12-19",
"2012-12-20", "2012-12-21", "2012-12-22", "2012-12-23", "2012-12-24",
"2012-12-25", "2012-12-26", "2012-12-27", "2012-12-28", "2012-12-29"
), class = "factor"), Retailer = c(11024, 11024, 11024,
11024, 11024, 11024, 11024, 11024, 11024, 11024, 11024, 11024,
11024, 11024, 11024, 11024, 2353, 11024, 11024, 2353, 11024,
11024, 63882, 650, 89960, 89960, 650, 89960, 89960, 89960, 650
), Brand = structure(c(19L, 74L, 19L, 48L, 19L, 48L,
19L, 19L, 19L, 48L, 48L, 31L, 46L, 31L, 31L, 48L, 74L, 31L, 74L,
19L, 31L, 19L, 48L, 48L, 31L, 31L, 48L, 31L, 31L, 48L, 48L), .Label = c("ABUNDANCE",
"ALPEN", "AMERICAN BREAKFAST ", "ANNIE'S HOMEGROWN", "ARWHD MLS",
"BARBARA'S", "BEAR NAKED", "BEAR RIVER", "BOB'S RED MILL", "BOKOMO COUNTRY",
"BREAKFAST CHOICE", "BREAKFAST ZONE", "BROOKFARM MACADAMIA ",
"BRUGGEN", "BUCKEYE HEROES", "CADIA", "CASCADIAN FARM ", "CHOCOLATE SPOONERS",
"CTL BR", "DORSET", "ENJOY LIFE PERKY'S CRUNCHY FLX", "EREWHON",
"F-FACTOR", "FAMILIA", "FIELD DAY", "FINAX", "FLEURY FLAKES MARC ANDRE",
"FOOD FOR LIFE EZEKIEL 50", "FORRELLI ", "GEFEN KING", "GENERAL MILLS",
"GERONIMO PEYTON HILLIS REDZONE", "GLUCERNA", "GLUTINO", "GOLDEN FOODS",
"GRANVITA PUFFY'S", "GREENBRIER INT INC-NBL CRN FLK", "HEALTH VALLEY",
"HODGSON MILL", "HOME FAVORITE", "HOSTESS TOASTED OATS", "HSP",
"ISABEL'S WAY ", "JASPER", "JUSTIN VERLANDER'S FASTBLL FLK",
"KASHI", "KAY'S NATURALS BETTER BALANCE", "Kellogg", "KIND",
"KOZY SHACK READY GRAINS", "KRETSCHMER", "LADY LIBERTY", "LIEBER'S",
"LIVING INTENTIONS SPRFD CRL", "LOVE GROWN FOODS", "MAIZORO",
"MANISCHEWITZ", "MILL SELECT ", "MOTHER'S", "MULTIGRAIN SPNRS",
"NASH BROTHERS", "NATURE'S PATH", "NESTLE", "NEW ENGLAND NATURALS",
"NEWMAN'S OWN SWEET ENGH WHT PF", "NUTRISYSTEM NOURISH", "NUTRITIOUS LIVING",
"PAMPA ", "PASKESZ CHOCO RIOS", "PBLC LB BR-NBL HY NT TSD OT DM",
"PEACE", "POST", "PROTEIN PLUS CORN FLAKE", "QUAKER", "RALSTON",
"RLSTN", "SALLY'S ", "SCRUMPTIOUS SPELNDID CRLS CBB", "SEITENBACHER MUESLI",
"SIMPLY FIBER", "SKINNER'S RAISIN BRAN", "SORIANA WHOLE BRAN",
"STREIT'S FRUIT & NUT MUESLI", "SUNBELT", "SWAD", "T. ABRAHAM'S",
"TAANUG CORN FLAKES", "TASTY", "TEMMY'S", "THE", "THREE SISTERS",
"TIKISS SWEETENED PUFFD WHL WHT", "TRU ROOTS", "VOTTO'S", "WEETABIX",
"WHOLESOME GOODNESS", "WILD ROOTS ANCIENT ORIGINS", "WONDER CORN FLAKES",
"YOG ACTIVE"), class = "factor")), row.names = c(90609L, 222436L,
90606L, 688592L, 90607L, 688593L, 90605L, 90604L, 90608L, 668330L,
321377L, 567447L, 945385L, 567445L, 567443L, 892854L, 583186L,
567446L, 583185L, 168138L, 567444L, 60086L, 698120L, 698127L,
3933L, 809409L, 698129L, 15286L, 15284L, 698116L, 319455L), class = "data.frame")
I have to find whether a particular household has bought any brand from anywhere and then bought anything from a particular retailer on the next trip.
So, if, first I chose a retailer, 11024. Then for each trip for eg row 17, I would check whether the next trip is made to retailer 11024. If the next trip is made to the retailer 11024, I would mark row 17 as 1. Then for rows 18 & 19 - they are purchases on the same trip, so I would check row 20, which is the next trip. And, since that trip is not to 11024, I would mark the previous trip (both rows 18 & 19) as 0.
So, the resulting output, if I want to check for retailer "11024", would be
Example Output :
In all those cases when the next trip is made to the retailer 11024, the purchase instances of the previous trip is put to 1. And, whenever the next trip is made to some other retailer, it is put to 0. Trips are sorted by purchase date. Now, it is easy when I have only 1 purchase per trip. I do it by the following code using lead()
e1$next_ret = e1$retailer
e1$next_ret = ifelse(e1$next_ret == 6901,1,0)
temp = e1 %>%
group_by(Household) %>%
mutate(dummy = lead(next_ret, default = 0))
However, when there is more than 1 purchase in a single trip, it doesn't work. Please Help