Descriptive table of a dataset

Question

I have the following dataset:

d <- structure(list(country = structure(c(9L, 9L, 5L, 8L, 4L, 3L, 
4L, 5L, 1L, 1L, 6L, 5L, 3L, 2L, 7L, 5L, 4L, 7L, 6L, 2L, 1L, 9L, 
6L, 9L, 9L, 7L, 9L, 7L, 6L, 1L, 2L, 9L, 4L, 9L, 5L, 9L, 1L, 7L, 
6L, 5L, 5L, 2L, 1L, 4L, 2L, 9L, 1L, 5L, 2L, 2L, 1L, 9L, 1L, 2L, 
7L, 7L, 7L, 6L, 4L, 6L, 6L, 3L, 3L, 2L, 6L, 5L, 2L, 8L, 1L, 3L, 
3L, 6L, 8L, 4L, 5L, 3L, 8L, 5L, 5L, 1L, 9L, 3L, 1L, 6L, 6L, 5L, 
9L, 3L, 5L, 6L, 9L, 4L, 7L, 9L, 3L, 2L, 7L), levels = c("Austria", 
"Germany", "Sweden", "Spain", "Italy", "France", "Denmark", "Switzerland", 
"Belgium"), class = "factor"), sex = structure(c(1L, 2L, 2L, 
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 
2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 
2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L), levels = c("Male", 
"Female"), class = "factor"), educ = structure(c(1L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 3L, 3L, 3L, 1L, 1L, 2L, 3L, 
1L, 3L, 3L, 2L, 1L, 1L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 1L, 3L, 2L, 
3L, 1L, 2L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 2L, 1L, 3L, 3L, 2L, 1L, 
2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 1L, 1L, 2L, 
1L, 2L, 1L, 3L, 3L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 3L, 3L), levels = c("A", 
"B", "C"), class = c("ordered", "factor")), make_ends = structure(c(3L, 
3L, 5L, 6L, 5L, 5L, 5L, 4L, 6L, 5L, 5L, 4L, 4L, 6L, 6L, 5L, 6L, 
5L, 3L, 5L, 4L, 6L, 4L, 6L, 5L, 5L, 3L, 6L, 6L, 5L, 5L, 3L, 4L, 
6L, 3L, 5L, 6L, 6L, 4L, 3L, 4L, 5L, 3L, 5L, 5L, 5L, 4L, 5L, 6L, 
4L, 5L, 6L, 5L, 6L, 6L, 4L, 6L, 5L, 6L, 5L, 6L, 5L, 6L, 4L, 5L, 
5L, 5L, 3L, 6L, 6L, 5L, 6L, 6L, 4L, 4L, 6L, 6L, 4L, 6L, 4L, 6L, 
5L, 3L, 3L, 4L, 6L, 6L, 5L, 4L, 6L, 5L, 6L, 1L, 6L, 6L, 3L, 5L
), levels = c("1", "2", "3", "4", "5", "6"), class = "factor"), 
    civil_status = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 
    2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 
    2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L), levels = c("0", 
    "1"), class = "factor"), sphus = structure(c(3L, 1L, 3L, 
    4L, 2L, 3L, 4L, 5L, 4L, 2L, 1L, 4L, 3L, 3L, 4L, 3L, 4L, 1L, 
    2L, 3L, 4L, 1L, 3L, 5L, 3L, 5L, 3L, 4L, 2L, 4L, 1L, 4L, 2L, 
    3L, 3L, 3L, 3L, 4L, 1L, 3L, 3L, 3L, 5L, 4L, 4L, 2L, 3L, 3L, 
    2L, 3L, 3L, 3L, 1L, 4L, 3L, 2L, 2L, 1L, 2L, 3L, 2L, 3L, 2L, 
    5L, 4L, 5L, 3L, 1L, 3L, 3L, 1L, 1L, 2L, 4L, 3L, 2L, 4L, 2L, 
    3L, 3L, 3L, 1L, 4L, 4L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 4L, 3L, 
    4L, 3L, 3L, 2L), levels = c("1", "2", "3", "4", "5"), class = "factor"), 
    numb_children = c(1L, 2L, 2L, 1L, 2L, 3L, 3L, 1L, 1L, 3L, 
    4L, 3L, 1L, 2L, 3L, 2L, 3L, 2L, 4L, 1L, 1L, 2L, 2L, 1L, 2L, 
    1L, 3L, 4L, 2L, 4L, 2L, 2L, 2L, 1L, 3L, 3L, 2L, 2L, 2L, 3L, 
    1L, 2L, 4L, 2L, 1L, 3L, 1L, 2L, 2L, 2L, 3L, 3L, 1L, 4L, 2L, 
    1L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 
    4L, 4L, 3L, 1L, 1L, 3L, 1L, 2L, 2L, 3L, 2L, 5L, 5L, 6L, 3L, 
    2L, 3L, 1L, 3L, 4L, 4L, 2L, 3L, 4L, 3L, 1L, 4L)), row.names = c(NA, 
-97L), class = "data.frame")

What I am trying to do starting from this dataframe is to create a summary table of my dataset. The table should have , each column with the amount of individuals for each country (column) and each row represent a category of the respective variable. To do that I created the next function:

ftab <- function(pais){
      u1 <- datasamp |> filter(country==pais) |> group_by(sex) |> count(sex)
      u2 <- datasamp |> filter(country==pais) |> group_by(educ) |> count(educ)
      u3 <- datasamp |> filter(country==pais) |> group_by(make_ends) |> count(make_ends)
      u4 <- datasamp |> filter(country==pais) |> group_by(civil_status) |> count(civil_status)
      u5 <- datasamp |> filter(country==pais) |> group_by(sphus) |> count(sphus)
      u6 <- datasamp |> filter(country==pais) |> group_by(numb_children) |> count(numb_children)
      res <- rbind(u1, u2, u3, u4, u5, head(u6))
      return(res)
    }

this function receives as a parameter a country

 ftab("Spain")

and I get the next output:

structure(list(sex = structure(c(1L, 2L, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA), levels = c("Male", "Female"), class = "factor"), 
    n = c(3L, 6L, 8L, 1L, 2L, 4L, 3L, 2L, 7L, 3L, 6L, 1L, 4L, 
    4L), educ = structure(c(NA, NA, 1L, 2L, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA), levels = c("A", "B", "C"), class = c("ordered", 
    "factor")), make_ends = structure(c(NA, NA, NA, NA, 4L, 5L, 
    6L, NA, NA, NA, NA, NA, NA, NA), levels = c("1", "2", "3", 
    "4", "5", "6"), class = "factor"), civil_status = structure(c(NA, 
    NA, NA, NA, NA, NA, NA, 1L, 2L, NA, NA, NA, NA, NA), levels = c("0", 
    "1"), class = "factor"), sphus = structure(c(NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, 2L, 4L, NA, NA, NA), levels = c("1", 
    "2", "3", "4", "5"), class = "factor"), numb_children = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 2L, 3L)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -14L), groups = structure(list(
    sex = structure(c(1L, 2L, NA), levels = c("Male", "Female"
    ), class = "factor"), .rows = structure(list(1L, 2L, 3:14), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))

It is like this:

enter image description here

However, what I need is to get only one column with the counting of the respective country and the and other column with the categories of the variables. The idea was with this function to create different columns one for each country and at the end join the columns and add a column with the names of the categories of the variable. But I can't do that I don't know how to get the desired output from the function. Maybe there is a easiest way to do this.

Welcome to SO. Please can you show a small example of your desired output? This helps create a reproducible example (https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) — Paul Stafford Allen, Aug 22 '23 at 12:50

score 2 · Answer 1 · answered Aug 22 '23 at 13:18

something along these lines (d being your data)?

library(dplyr)
library(tidyr)

d |> 
  distinct() |> ## remove duplicates
  mutate(across(sex:numb_children, ~ as.character(.x))) |>
  pivot_longer(sex:numb_children, names_to = 'indicator') |>
  count(country, indicator, value) |>
  pivot_wider(names_from = country, values_from = n) |>
  arrange(indicator, value)

+ # A tibble: 23 x 11
   indicator value Austria Germany Sweden Spain Italy France Denmark Switzerland
   <chr>     <chr>   <int>   <int>  <int> <int> <int>  <int>   <int>       <int>
 1 civil_st~ 0           3       4      1     2     1      1       3           2
 2 civil_st~ 1          10       7      9     7    14     12       7           2
 3 educ      A           3       2      5     8    10      4       1           1
 4 educ      B           8       4      2     1     3      4       5           3
## ...

P.S. note that your example data contains duplicates, hence the distinct

I removed the duplicates from OP's input, to make it (slightly) smaller. Feel free to roll back if you think that's not appropriate — Mark, Aug 22 '23 at 13:45

Descriptive table of a dataset

1 Answers1