17

I have the following 3 tables:

AggData <- structure(list(Path = c("NonBrand", "Brand", "NonBrand,NonBrand", 
"Brand,Brand", "NonBrand,NonBrand,NonBrand", "Brand,Brand,Brand", 
"Brand,NonBrand", "NonBrand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand,Brand", "Brand,Brand,NonBrand", "NonBrand,Brand,Brand", 
"Brand,NonBrand,NonBrand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"NonBrand,NonBrand,Brand", "Brand,NonBrand,Brand", "NonBrand,Brand,NonBrand", 
"NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"NonBrand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,Brand", 
"Brand,Brand,Brand,NonBrand", "Brand,Brand,Brand,Brand,Brand,Brand,Brand", 
"Brand,NonBrand,NonBrand,NonBrand", "NonBrand,NonBrand,Brand,Brand", 
"Brand,Brand,NonBrand,NonBrand", "Brand,NonBrand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,NonBrand,Brand", "NonBrand,Brand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"NonBrand,NonBrand,Brand,NonBrand", "Brand,NonBrand,NonBrand,Brand", 
"NonBrand,Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,Brand", 
"Brand,NonBrand,Brand,NonBrand", "NonBrand,Brand,Brand,NonBrand", 
"Brand,Brand,Brand,Brand,NonBrand", "Brand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", 
"Brand,NonBrand,Brand,Brand,Brand", "NonBrand,Brand,NonBrand,Brand", 
"Brand,Brand,Brand,NonBrand,Brand", "NonBrand,NonBrand,Brand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,Brand,Brand", "Brand,Brand,NonBrand,Brand,Brand", 
"Brand,Brand,Brand,NonBrand,NonBrand", "Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,Brand,NonBrand", "Brand,Brand,NonBrand,NonBrand,NonBrand", 
"NonBrand,Brand,Brand,Brand,Brand,Brand", "NonBrand,Brand,NonBrand,NonBrand,NonBrand", 
"NonBrand,NonBrand,Brand,NonBrand,NonBrand", "NonBrand,NonBrand,NonBrand,NonBrand,NonBrand,Brand", 
"Brand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", "Brand,Brand,Brand,Brand,Brand,NonBrand", 
"NonBrand,Brand,Brand,NonBrand,NonBrand", "Brand,NonBrand,NonBrand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,NonBrand,Brand,Brand", "NonBrand,NonBrand,Brand,Brand,Brand,Brand", 
"NonBrand,NonBrand,NonBrand,NonBrand,Brand,NonBrand", "NonBrand,NonBrand,Brand,NonBrand,Brand", 
"Brand,NonBrand,NonBrand,Brand,NonBrand", "NonBrand,NonBrand,NonBrand,Brand,Brand,Brand", 
"NonBrand,Brand,Brand,NonBrand,Brand", "Brand,NonBrand,NonBrand,NonBrand,NonBrand,Brand", 
"Brand,Brand,NonBrand,NonBrand,NonBrand,NonBrand,NonBrand", "Brand,Brand,Brand,Brand,NonBrand,NonBrand,NonBrand"
), click_count = c(1799265, 874478, 198657, 128159, 45728, 30172, 
20520, 17815, 16718, 9479, 6554, 3722, 3561, 3408, 3391, 3366, 
3256, 2526, 1846, 1708, 1682, 1013, 951, 899, 881, 782, 780, 
703, 642, 625, 615, 601, 453, 442, 414, 407, 362, 343, 313, 284, 
281, 281, 271, 269, 268, 229, 223, 218, 215, 212, 204, 162, 161, 
158, 155, 145, 132, 130, 115, 103, 102, 86, 77, 77, 72, 68, 68, 
67, 58, 52, 32, 18, 18), conv_count = c(30938, 19652, 7401, 3803, 
2014, 1072, 1084, 981, 652, 379, 230, 166, 205, 246, 254, 93, 
239, 104, 112, 51, 76, 23, 66, 81, 55, 29, 62, 57, 50, 37, 17, 
33, 38, 17, 8, 41, 33, 30, 24, 16, 26, 18, 16, 17, 7, 21, 10, 
8, 27, 23, 11, 13, 6, 15, 14, 16, 8, 10, 6, 6, 11, 11, 8, 9, 
8, 8, 9, 7, 7, 6, 6, 6, 7), CR = c(0.0171947989873643, 0.0224728352228415, 
0.0372551684561833, 0.0296740767328085, 0.0440430370888733, 0.0355296301206417, 
0.0528265107212476, 0.0550659556553466, 0.0389998803684651, 0.0399831205823399, 
0.0350930729325603, 0.0445996775926921, 0.057568098848638, 0.0721830985915493, 
0.0749041580654674, 0.0276292335115865, 0.0734029484029484, 0.0411718131433096, 
0.0606717226435536, 0.0298594847775176, 0.0451843043995244, 0.0227048371174729, 
0.0694006309148265, 0.0901001112347052, 0.0624290578887628, 0.0370843989769821, 
0.0794871794871795, 0.0810810810810811, 0.0778816199376947, 0.0592, 
0.0276422764227642, 0.0549084858569052, 0.0838852097130243, 0.0384615384615385, 
0.0193236714975845, 0.100737100737101, 0.0911602209944751, 0.0874635568513119, 
0.0766773162939297, 0.0563380281690141, 0.0925266903914591, 0.0640569395017794, 
0.0590405904059041, 0.0631970260223048, 0.0261194029850746, 0.091703056768559, 
0.0448430493273543, 0.036697247706422, 0.125581395348837, 0.108490566037736, 
0.053921568627451, 0.0802469135802469, 0.0372670807453416, 0.0949367088607595, 
0.0903225806451613, 0.110344827586207, 0.0606060606060606, 0.0769230769230769, 
0.0521739130434783, 0.058252427184466, 0.107843137254902, 0.127906976744186, 
0.103896103896104, 0.116883116883117, 0.111111111111111, 0.117647058823529, 
0.132352941176471, 0.104477611940299, 0.120689655172414, 0.115384615384615, 
0.1875, 0.333333333333333, 0.388888888888889)), .Names = c("Path", 
"click_count", "conv_count", "CR"), row.names = c(NA, -73L), class = "data.frame")

another one here:

breakVector <- structure(list(breakVector = structure(c(1L, 1L), .Label = "NonBrand", class = "factor"), 
    CR = c(0.461541302855402, 0.538458697144598)), .Names = c("breakVector", 
"CR"), row.names = c(NA, -2L), class = "data.frame")

and:

FinalTable <- structure(list(autribution_category = structure(c(2L, 1L), .Label = c("Brand", 
"NonBrand"), class = "factor"), attributed_result = c(0, 0)), .Names = c("autribution_category", 
"attributed_result"), row.names = 1:2, class = "data.frame")

when I run the following command:

if (FinalTable [2,1] == breakVector[1,1]) {
     FinalTable$attributed_result[2] <- FinalTable$attributed_result[2] + 
     breakVector[1,2] * AggData$conv_count[3];
     break}

I get the following error:

Error in Ops.factor(FinalTable[2, 1], breakVector[1, 1]) : 
  level sets of factors are different

This is pretty weird, since both values that im comparing are factors, I don't see any reason why R cant compare the two levels?

nico
  • 50,859
  • 17
  • 87
  • 112
Yehoshaphat Schellekens
  • 2,305
  • 2
  • 22
  • 49
  • Check the levels of FinalTable[,1] and breakVector[,1]. breakVector[,1] <- factor(breakVector[,1], levels=levels(FinalTable[,1])) #and I guess you will not get the error. – akrun Jul 06 '14 at 10:46

2 Answers2

25

FinalTable[2,1] and breakVector[1,1] do not have the same levels:

> FinalTable[2,1]
[1] Brand
Levels: Brand NonBrand
> breakVector[1,1]
[1] NonBrand
Levels: NonBrand

This is easily fixed by using

breakVector[,1] <- factor(breakVector[,1], levels=c("Brand", "NonBrand"))

or, more generally

breakVector[,1] <- factor(breakVector[,1], levels=levels(FinalTable[,1]))
nico
  • 50,859
  • 17
  • 87
  • 112
  • 2
    I don't understand why do i have to have the same number of levels in both "FinalTable" and "breakVector", ass long as R is comparing a specific value in FinalTable[2,1] and breakVector[1,1] ? – Yehoshaphat Schellekens Jul 06 '14 at 10:51
  • 7
    @YehoshaphatSchellekens: because you are comparing factors and not strings. From `?factor`: *Only == and != can be used for factors: **a factor can only be compared to another factor with an identical set of levels (not necessarily in the same ordering) or to a character vector**. Ordered factors are compared in the same way, but the general dispatch mechanism precludes comparing ordered and unordered factors.* – nico Jul 06 '14 at 10:54
  • thanks @nico , this is actually disappointing since i was looking to avoid the use of `as.character` and since i want this algorithm to be applicable on different sets of levels – Yehoshaphat Schellekens Jul 06 '14 at 11:06
  • @Yehoshaphat Schellekens: I added a more general solution, so you don't have to specify the levels manually. – nico Jul 06 '14 at 11:11
  • I didn't know what "levels" are, but found an explanation, which follows, in [another entry](https://stackoverflow.com/questions/20314318/what-are-r-levels): "A factor is a categorical variable that can take only one of a fixed, finite set of possibilities. Those possible categories are the levels." – fraxture Mar 02 '16 at 10:21
  • @ Nico, could you please add the missing square bracket ']' at the end of your last line breakVector[,1] <- factor(breakVector[,1], levels=levels(FinalTable[,1'HERE')). Thank you – HelloWorld Mar 31 '17 at 07:15
2

Perhaps, it will better compare both variables like a string:

if (as.character(FinalTable [2,1]) == as.character(breakVector[1,1])) {
     FinalTable$attributed_result[2] <- FinalTable$attributed_result[2] + 
     breakVector[1,2] * AggData$conv_count[3];
     break}