1

For a sample dataframe:

library(data.table)
df = structure(list(country = c("AT", "AT", "AT", "BE", "BE", "BE", 
                             "DE", "DE", "DE", "DE", "DE", "DE", "DE", "DE", "DE", "DE", "DE", 
                             "DE", "DE", "DE"), level = c("1", "1", "1", "1", "1", "1", "1", 
                                                          "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"
                             ), region = c("AT2", "AT1", "AT3", "BE2", "BE1", "BE3", "DE4", 
                                           "DE3", "DE9", "DE7", "DE1", "DEE", "DEG", "DE2", "DED", "DEB", 
                                           "DEA", "DEF", "DE6", "DE8"), N = c("348", "707", "648", "952", 
                                                                              "143", "584", "171", "155", "234", "176", "302", "144", "148", 
                                                                              "386", "257", "126", "463", "74", "44", "119"), result = c("24.43", 
                                                                                                                                         "26.59", "20.37", "23.53", "16.78", "25.51", "46.2", "43.23", 
                                                                                                                                         "41.03", "37.5", "33.44", "58.33", "47.97", "34.46", "39.69", 
                                                                                                                                         "31.75", "36.93", "43.24", "36.36", "43.7")), .Names = c("country", 
                                                                                                                                                                                                  "level", "region", "N", "result"), class = c("data.table", "data.frame"
                                                                                                                                                                                                  ), row.names = c(NA, -20L))

I am using the following code to produce a summary table:

variable.country <-setDT(variable.regions)[order(country), list(min_result = min(result),
                                                  max_result = max(result), level= level[1L]), by = country]

I simply want to add another variable to this data table which allows me to know how many regions i.e. rows there are in each country (i.e. AT has 3) - how would I get length or dim to work under these circumstances?

Thanks.

Matt Dowle
  • 58,872
  • 22
  • 166
  • 224
KT_1
  • 8,194
  • 15
  • 56
  • 68

1 Answers1

6

We can use .N to get the length per each 'country'

setDT(variable.regions)[order(country), 
         list(min_result = min(result), 
              len = .N, 
              max_result = max(result), 
              level= level[1L]), 
              by = country]
akrun
  • 874,273
  • 37
  • 540
  • 662