I have a dataset containing purchases made by different households across different retailers. For eg
Using dput()
structure(list(household_code = c(76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 126,
126, 126, 126, 126, 126, 126, 126, 126), trip_code_uc = c(1032497498L,
1025776063L, 1029419047L, 1030418100L, 1029502602L, 1034153056L,
1027035051L, 1027533991L, 1033515804L, 1032998207L, 1032066227L,
1028192785L, 1033419039L, 1028730296L, 1027388499L, 1030652869L,
1025638394L, 1034032718L, 1034032718L, 1025678520L, 1029490031L,
1029898838L, 1028024134L, 1030324171L, 1031983761L, 1031983761L,
1033767148L, 1023953965L, 1030954113L, 1030954113L, 1027392968L
), purchase_date = structure(c(1L, 2L, 23L, 50L, 52L, 74L, 77L,
94L, 148L, 158L, 176L, 179L, 196L, 211L, 224L, 246L, 271L, 286L,
286L, 309L, 329L, 346L, 2L, 9L, 46L, 46L, 50L, 58L, 66L, 66L,
68L), .Label = c("2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04",
"2012-01-05", "2012-01-06", "2012-01-07", "2012-01-08", "2012-01-09",
"2012-01-10", "2012-01-11", "2012-01-12", "2012-01-13", "2012-01-14",
"2012-01-15", "2012-01-16", "2012-01-17", "2012-01-18", "2012-01-19",
"2012-01-20", "2012-01-21", "2012-01-22", "2012-01-23", "2012-01-24",
"2012-01-25", "2012-01-26", "2012-01-27", "2012-01-28", "2012-01-29",
"2012-01-30", "2012-01-31", "2012-02-01", "2012-02-02", "2012-02-03",
"2012-02-04", "2012-02-05", "2012-02-06", "2012-02-07", "2012-02-08",
"2012-02-09", "2012-02-10", "2012-02-11", "2012-02-12", "2012-02-13",
"2012-02-14", "2012-02-15", "2012-02-16", "2012-02-17", "2012-02-18",
"2012-02-19", "2012-02-20", "2012-02-21", "2012-02-22", "2012-02-23",
"2012-02-24", "2012-02-25", "2012-02-26", "2012-02-27", "2012-02-28",
"2012-02-29", "2012-03-01", "2012-03-02", "2012-03-03", "2012-03-04",
"2012-03-05", "2012-03-06", "2012-03-07", "2012-03-08", "2012-03-09",
"2012-03-10", "2012-03-11", "2012-03-12", "2012-03-13", "2012-03-14",
"2012-03-15", "2012-03-16", "2012-03-17", "2012-03-18", "2012-03-19",
"2012-03-20", "2012-03-21", "2012-03-22", "2012-03-23", "2012-03-24",
"2012-03-25", "2012-03-26", "2012-03-27", "2012-03-28", "2012-03-29",
"2012-03-30", "2012-03-31", "2012-04-01", "2012-04-02", "2012-04-03",
"2012-04-04", "2012-04-05", "2012-04-06", "2012-04-07", "2012-04-08",
"2012-04-09", "2012-04-10", "2012-04-11", "2012-04-12", "2012-04-13",
"2012-04-14", "2012-04-15", "2012-04-16", "2012-04-17", "2012-04-18",
"2012-04-19", "2012-04-20", "2012-04-21", "2012-04-22", "2012-04-23",
"2012-04-24", "2012-04-25", "2012-04-26", "2012-04-27", "2012-04-28",
"2012-04-29", "2012-04-30", "2012-05-01", "2012-05-02", "2012-05-03",
"2012-05-04", "2012-05-05", "2012-05-06", "2012-05-07", "2012-05-08",
"2012-05-09", "2012-05-10", "2012-05-11", "2012-05-12", "2012-05-13",
"2012-05-14", "2012-05-15", "2012-05-16", "2012-05-17", "2012-05-18",
"2012-05-19", "2012-05-20", "2012-05-21", "2012-05-22", "2012-05-23",
"2012-05-24", "2012-05-25", "2012-05-26", "2012-05-27", "2012-05-28",
"2012-05-29", "2012-05-30", "2012-05-31", "2012-06-01", "2012-06-02",
"2012-06-03", "2012-06-04", "2012-06-05", "2012-06-06", "2012-06-07",
"2012-06-08", "2012-06-09", "2012-06-10", "2012-06-11", "2012-06-12",
"2012-06-13", "2012-06-14", "2012-06-15", "2012-06-16", "2012-06-17",
"2012-06-18", "2012-06-19", "2012-06-20", "2012-06-21", "2012-06-22",
"2012-06-23", "2012-06-24", "2012-06-25", "2012-06-26", "2012-06-27",
"2012-06-28", "2012-06-29", "2012-06-30", "2012-07-01", "2012-07-02",
"2012-07-03", "2012-07-04", "2012-07-05", "2012-07-06", "2012-07-07",
"2012-07-08", "2012-07-09", "2012-07-10", "2012-07-11", "2012-07-12",
"2012-07-13", "2012-07-14", "2012-07-15", "2012-07-16", "2012-07-17",
"2012-07-18", "2012-07-19", "2012-07-20", "2012-07-21", "2012-07-22",
"2012-07-23", "2012-07-24", "2012-07-25", "2012-07-26", "2012-07-27",
"2012-07-28", "2012-07-29", "2012-07-30", "2012-07-31", "2012-08-01",
"2012-08-02", "2012-08-03", "2012-08-04", "2012-08-05", "2012-08-06",
"2012-08-07", "2012-08-08", "2012-08-09", "2012-08-10", "2012-08-11",
"2012-08-12", "2012-08-13", "2012-08-14", "2012-08-15", "2012-08-16",
"2012-08-17", "2012-08-18", "2012-08-19", "2012-08-20", "2012-08-21",
"2012-08-22", "2012-08-23", "2012-08-24", "2012-08-25", "2012-08-26",
"2012-08-27", "2012-08-28", "2012-08-29", "2012-08-30", "2012-08-31",
"2012-09-01", "2012-09-02", "2012-09-03", "2012-09-04", "2012-09-05",
"2012-09-06", "2012-09-07", "2012-09-08", "2012-09-09", "2012-09-10",
"2012-09-11", "2012-09-12", "2012-09-13", "2012-09-14", "2012-09-15",
"2012-09-16", "2012-09-17", "2012-09-18", "2012-09-19", "2012-09-20",
"2012-09-21", "2012-09-22", "2012-09-23", "2012-09-24", "2012-09-25",
"2012-09-26", "2012-09-27", "2012-09-28", "2012-09-29", "2012-09-30",
"2012-10-01", "2012-10-02", "2012-10-03", "2012-10-04", "2012-10-05",
"2012-10-06", "2012-10-07", "2012-10-08", "2012-10-09", "2012-10-10",
"2012-10-11", "2012-10-12", "2012-10-13", "2012-10-14", "2012-10-15",
"2012-10-16", "2012-10-17", "2012-10-18", "2012-10-19", "2012-10-20",
"2012-10-21", "2012-10-22", "2012-10-23", "2012-10-24", "2012-10-25",
"2012-10-26", "2012-10-27", "2012-10-28", "2012-10-29", "2012-10-30",
"2012-10-31", "2012-11-01", "2012-11-02", "2012-11-03", "2012-11-04",
"2012-11-05", "2012-11-06", "2012-11-07", "2012-11-08", "2012-11-09",
"2012-11-10", "2012-11-11", "2012-11-12", "2012-11-13", "2012-11-14",
"2012-11-15", "2012-11-16", "2012-11-17", "2012-11-18", "2012-11-19",
"2012-11-20", "2012-11-21", "2012-11-22", "2012-11-23", "2012-11-24",
"2012-11-25", "2012-11-26", "2012-11-27", "2012-11-28", "2012-11-29",
"2012-11-30", "2012-12-01", "2012-12-02", "2012-12-03", "2012-12-04",
"2012-12-05", "2012-12-06", "2012-12-07", "2012-12-08", "2012-12-09",
"2012-12-10", "2012-12-11", "2012-12-12", "2012-12-13", "2012-12-14",
"2012-12-15", "2012-12-16", "2012-12-17", "2012-12-18", "2012-12-19",
"2012-12-20", "2012-12-21", "2012-12-22", "2012-12-23", "2012-12-24",
"2012-12-25", "2012-12-26", "2012-12-27", "2012-12-28", "2012-12-29"
), class = "factor"), retailer_code = c(11024, 11024, 11024,
11024, 11024, 11024, 11024, 11024, 11024, 11024, 11024, 11024,
11024, 11024, 11024, 11024, 2353, 11024, 11024, 2353, 11024,
11024, 63882, 650, 89960, 89960, 650, 89960, 89960, 89960, 650
), Overall_Brand = structure(c(19L, 74L, 19L, 48L, 19L, 48L,
19L, 19L, 19L, 48L, 48L, 31L, 46L, 31L, 31L, 48L, 74L, 31L, 74L,
19L, 31L, 19L, 48L, 48L, 31L, 31L, 48L, 31L, 31L, 48L, 48L), .Label = c("ABUNDANCE",
"ALPEN", "AMERICAN BREAKFAST ", "ANNIE'S HOMEGROWN", "ARWHD MLS",
"BARBARA'S", "BEAR NAKED", "BEAR RIVER", "BOB'S RED MILL", "BOKOMO COUNTRY",
"BREAKFAST CHOICE", "BREAKFAST ZONE", "BROOKFARM MACADAMIA ",
"BRUGGEN", "BUCKEYE HEROES", "CADIA", "CASCADIAN FARM ", "CHOCOLATE SPOONERS",
"CTL BR", "DORSET", "ENJOY LIFE PERKY'S CRUNCHY FLX", "EREWHON",
"F-FACTOR", "FAMILIA", "FIELD DAY", "FINAX", "FLEURY FLAKES MARC ANDRE",
"FOOD FOR LIFE EZEKIEL 50", "FORRELLI ", "GEFEN KING", "GENERAL MILLS",
"GERONIMO PEYTON HILLIS REDZONE", "GLUCERNA", "GLUTINO", "GOLDEN FOODS",
"GRANVITA PUFFY'S", "GREENBRIER INT INC-NBL CRN FLK", "HEALTH VALLEY",
"HODGSON MILL", "HOME FAVORITE", "HOSTESS TOASTED OATS", "HSP",
"ISABEL'S WAY ", "JASPER", "JUSTIN VERLANDER'S FASTBLL FLK",
"KASHI", "KAY'S NATURALS BETTER BALANCE", "Kellogg", "KIND",
"KOZY SHACK READY GRAINS", "KRETSCHMER", "LADY LIBERTY", "LIEBER'S",
"LIVING INTENTIONS SPRFD CRL", "LOVE GROWN FOODS", "MAIZORO",
"MANISCHEWITZ", "MILL SELECT ", "MOTHER'S", "MULTIGRAIN SPNRS",
"NASH BROTHERS", "NATURE'S PATH", "NESTLE", "NEW ENGLAND NATURALS",
"NEWMAN'S OWN SWEET ENGH WHT PF", "NUTRISYSTEM NOURISH", "NUTRITIOUS LIVING",
"PAMPA ", "PASKESZ CHOCO RIOS", "PBLC LB BR-NBL HY NT TSD OT DM",
"PEACE", "POST", "PROTEIN PLUS CORN FLAKE", "QUAKER", "RALSTON",
"RLSTN", "SALLY'S ", "SCRUMPTIOUS SPELNDID CRLS CBB", "SEITENBACHER MUESLI",
"SIMPLY FIBER", "SKINNER'S RAISIN BRAN", "SORIANA WHOLE BRAN",
"STREIT'S FRUIT & NUT MUESLI", "SUNBELT", "SWAD", "T. ABRAHAM'S",
"TAANUG CORN FLAKES", "TASTY", "TEMMY'S", "THE", "THREE SISTERS",
"TIKISS SWEETENED PUFFD WHL WHT", "TRU ROOTS", "VOTTO'S", "WEETABIX",
"WHOLESOME GOODNESS", "WILD ROOTS ANCIENT ORIGINS", "WONDER CORN FLAKES",
"YOG ACTIVE"), class = "factor")), row.names = c(90609L, 222436L,
90606L, 688592L, 90607L, 688593L, 90605L, 90604L, 90608L, 668330L,
321377L, 567447L, 945385L, 567445L, 567443L, 892854L, 583186L,
567446L, 583185L, 168138L, 567444L, 60086L, 698120L, 698127L,
3933L, 809409L, 698129L, 15286L, 15284L, 698116L, 319455L), class = "data.frame")
I have a separate variable for Household, trip ID for a particular purchase instance, retailer, and Brand they purchased. One household may purchase more than 1 brand in 1 trip. I wanted to calculate the repeat purchases in 2 consecutive trips. For example, if a household purchase General Mills and Kellogg both in trip 1 and only General mill in 2nd trip, for general mills, it will be a repeat.
Example output:
Now, it is easy when I have only 1 purchase per trip. I do it by the following code using rle()
e1 = transform(e1, brand_last_dum = ave(as.character(Brand), rleid(Household, Brand), FUN = seq_along))
However, when there is more than 1 purchase, it doesn't work. Data is arranged by the purchase date and more than 1 purchase of the same brand in a trip maybe considered as a different purchase for calculating repeat in the next row. Please Help