-2
segment1<-c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)
segment2<-c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2)
rating  <-c(1,1,2,2,1,3,1,1,2,2,3,2,2,2,1,1,2,3,3,1,1,2,3,2)

data<-data.frame(segment1,segment2,rating)

df1<-aggregate(
    x = data.frame(list("count"=data$rating)),
    by = data.frame(list("segment1"=data$segment1, "segment2"=data$segment2, "rating"=data$rating)),
    FUN = length
)

df2<-aggregate(
    x = data.frame(list("count"=df1$count)),
    by = data.frame(list("segment1"=df1$segment1, "segment2"=df1$segment2)),
    FUN = sum
)

df2$rating1<-0
df2$rating2<-0
df2$rating3<-0

df1

  segment1 segment2 rating count
1        1        1      1     6
2        1        2      1     3
3        1        1      2     7
4        1        2      2     3
5        1        1      3     2
6        1        2      3     3

df2

  segment1 segment2 count rating1 rating2 rating3
1        1        1    15       0       0       0
2        1        2     9       0       0       0

I need this output for df2

  segment1 segment2 count rating1 rating2 rating3
1        1        1    15       6       7       2
2        1        2     9       3       3       3
dirtycode
  • 63
  • 1
  • 10
  • How are score and avg to be calculated? It would also be a good idea to update your desired output above for the `rating_x` columns to give us a better idea of what you'd like. – Jake Kaupp Oct 23 '17 at 14:12
  • please learn how to ask questions properly here. Help us to help you. – Andre Elrico Oct 23 '17 at 14:20
  • 1
    Please provide your example data in a format that is easier to read into R, e.g. using `dput()`, read [here](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) how to improve your questions. – tobiasegli_te Oct 23 '17 at 14:21
  • If your dataset is bigger than your example (i.e. have different values for segment1), this solution will certainly need reworking, but, using `df2$rating1 = df1[df1$rating==1,]$count`, `df2$rating2 = df1[df1$rating==2,]$count`, `df2$rating3 = df1[df1$rating==3,]$count` produces the output you want. – xraynaud Oct 23 '17 at 14:41

3 Answers3

1

You can do this with just a group_by/summarize combination. You do not need to need to create any intermediate data frames and join them. Here is the code:

data %>% 
  group_by(segment1, segment2) %>%
  summarize(
    count = n(),
    rating1 = sum(rating == 1),
    rating2 = sum(rating == 2),
    rating3 = sum(rating == 3)
  )

This gives you this:

# A tibble: 2 x 6
# Groups:   segment1 [?]
  segment1 segment2 count rating1 rating2 rating3
     <dbl>    <dbl> <int>   <int>   <int>   <int>
1        1        1    15       6       7       2
2        1        2     9       3       3       3
tbradley
  • 2,210
  • 11
  • 20
0

My input to get the same dataframe as you :
(Maybe post the way you aggregate the first dataframe to get the second one)

   > df1 <- read.table(text ="segment1 segment2 rating count
                         1         1        1      1 1415
                         2         2        2      1 272
                         3         1        1      2 1970
                         4         2        2      2 363
                         5         1        1      3 8484
                         6         2        2      3 1465
                         7         1        1      4 33619
                         8         2        2      4 5332
                         9         1        1      5 58173
                         0         2        2      5 12031",header=T)
    > df2 <- read.table(text =" segment1  segment2    score    count   avg rating_1 rating_2 rating_3 rating_4 rating_5
                         1        1         1   456148   103661   4.4        0        0        0        0        0
                         2        2         2    86876    19463   4.5        0        0        0        0        0",header=T)
    > df2
    segment1 segment2  score  count avg rating_1 rating_2 rating_3 rating_4 rating_5
    1        1        1 456148 103661 4.4        0        0        0        0        0
    2        2        2  86876  19463 4.5        0        0        0        0        0

My script maybe isn't the best looking one but it will work for any number of segments only if segment1==segment2==segment3... (like in the example you provided, is that always the case ???)

   > n_seg=2 # number of segments
     n_rat=5 # number of ratings
     for(i in 1:n_seg) # segment
      {
      for(j in 1:n_rat) # rating
        {
        df2[i,5+j]=df2[i,5+j]+df1$count[df1$segment1==i & df1$rating==j]
        }
      }

My output :

> df2
  segment1 segment2  score  count avg rating_1 rating_2 rating_3 rating_4 rating_5
1        1        1 456148 103661 4.4     1415     1970     8484    33619    58173
2        2        2  86876  19463 4.5      272      363     1465     5332    12031
Aurélien.AB
  • 85
  • 10
0

Maybe this will work for you

library(dplyr)
library(tidyr)

df3 <- df2 %>%
         select(segment1, segment2, count)

df3
  # segment1 segment2 count
# 1        1        1    15
# 2        1        2     9


df1 %>%
  mutate(rating = paste0("rating", rating)) %>%
  spread(rating, count) %>%
  left_join(., df3, by=c("segment1", "segment2")) %>%
  select(segment1, segment2, count, rating1, rating2, rating3)
  # segment1 segment2 count rating1 rating2 rating3
# 1        1        1    15       6       7       2
# 2        1        2     9       3       3       3

To save the answer

ans <- df1 %>%
        mutate(rating = paste0("rating", rating)) %>%
        spread(rating, count) %>%
        left_join(., df3, by=c("segment1", "segment2")) %>%
        select(segment1, segment2, count, rating1, rating2, rating3)
tbradley
  • 2,210
  • 11
  • 20
CPak
  • 13,260
  • 3
  • 30
  • 48