0

Suppose I have this dataframe:

df <- data.frame(town = c("town1","town2","town3"),
             totpop = c(1700, 1500, 1200),
             groupAreceived = c(10, 5, 2),
             groupBreceived = c(9, 4, 1),
             groupCreceived = c(8, 3, 0))

that looks like:

df
   town totpop groupAreceived groupBreceived groupCreceived
1 town1   1700             10              9              8
2 town2   1500              5              4              3
3 town3   1200              2              1              0

I want to create a new dataframe with four columns that replicates the town, group, and amount received with the number of rows based on the total population.

I can do this manually for one town:

town1.a <- data.frame(matrix(ncol = 4, nrow = df$totpop[[1]]))
x <- c("totpop", "group", "received", "town")
colnames(town1.a) <- x

totpop <- c(rep(1, df$totpop[[1]]))
group <- c(rep("A", df$totpop[[1]]))
received <- c(rep(df$groupAreceived[[1]], df$totpop[[1]]))
town <- c(rep("Town1", df$totpop[[1]]))

town1.a$totpop <- totpop
town1.a$group <- group
town1.a$received <- received
town1.a$town <- town

 head(town1.a)
  totpop group received  town
1      1     A       10 Town1
2      1     A       10 Town1
3      1     A       10 Town1
4      1     A       10 Town1
5      1     A       10 Town1
6      1     A       10 Town1

This dataframe will have 1700 rows.

How can I automate this code/ use it in a for loop so that it will do the same thing but for each group?

Thanks in advance.

  • `with(reshape(df,matrix(3:ncol(df),1), dir="long",v.names = "Group")->a,\`rownames<-\`(a[rep(1:nrow(a),totpop),],NULL))` – Onyambu Oct 11 '19 at 00:15

2 Answers2

0
library(tidyverse)
df %>%
  # Reshape into longer form
  pivot_longer(cols = -c(town, totpop), names_to = "group", values_to = "received") %>%
  # remove "group" and "received" from the group column
  mutate(group = group %>% str_remove_all("group|received")) %>%
  # make as many copies of each row as "totpop"
  uncount(totpop) %>%
  mutate(totpop = 1)

# A tibble: 13,200 x 4
   town  group received totpop
   <fct> <chr>    <dbl>  <dbl>
 1 town1 A           10      1
 2 town1 A           10      1
 3 town1 A           10      1
 4 town1 A           10      1
 5 town1 A           10      1
 6 town1 A           10      1
 7 town1 A           10      1
 8 town1 A           10      1
 9 town1 A           10      1
10 town1 A           10      1
# ... with 13,190 more rows

The output here has 13,200 rows, corresponding to (1700+1500+1200) in totpop, times three columns.

Jon Spring
  • 55,165
  • 4
  • 35
  • 53
0

In base R, you can use reshape to convert to long format and then replicate the rows.

long_df <- reshape(df, direction = "long", idvar = c("town", "totpop"), 
            varying = list(names(df)[3:5]), v.names = "Group")
output <- long_df[rep(seq_len(nrow(long_df)), long_df$totpop), ]
row.names(output) <- NULL
head(output)

#   town totpop time Group
#1 town1   1700    1    10
#2 town1   1700    1    10
#3 town1   1700    1    10
#4 town1   1700    1    10
#5 town1   1700    1    10
#6 town1   1700    1    10
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213