0

I have a dataset of about 300 porojects where all variables are categorical except the amount spend on projects. The categorical variables have factors of many levels for instance one of the variables have 151 levels. I am struggling with which visualization approach to use. The variables of interest are, Institution, impact area, region and programs. But

For now i tried to plot the number of projects within each region and also which institution are involved. but the catch is that project can involve different institutions from different regions. I also tried mapping with leaflet but again the oytcome is not very informative. Below is a snapshot of the data.

[![enter image description here][1]][1]


data_lon_lat |>  
    group_by( Region, ID) |> 
  summarise(projectsPerRegion = n(), ) |> 
  ggplot(aes(x = reorder(Region,(-projectsPerRegion),decreasing=T), y = projectsPerRegion) + 
    geom_bar(stat = 'identity') + 
        theme_clean()+
  coord_flip()


I am not convinced about the outcome

zephryl
  • 14,633
  • 3
  • 11
  • 30
Lai Jatta
  • 13
  • 3
  • Hello, could you please provide sample data in a copy-pasteable format, [not an image](https://meta.stackoverflow.com/a/285557/17303805), eg by sharing the result of `dput()` for a subset of your data? Also have a look at [How to make a great R reproducible example](https://stackoverflow.com/q/5963269/17303805). – zephryl Mar 08 '23 at 13:43
  • What specifically are you trying to understand or communicate with the visualization? – zephryl Mar 08 '23 at 13:45
  • I want to understand the distribution of projects by organisation and where the organisations a located. – Lai Jatta Mar 08 '23 at 14:17
  • can one paste a table here? – Lai Jatta Mar 08 '23 at 14:20

2 Answers2

0

Here is a possible way:

library(ggthemes)
library(RColorBrewer)
library(tidyverse)

df %>% 
  count(Region, ID, name="projectsPerRegion") %>% 
  ggplot(aes(y = fct_rev(fct_infreq(Region)), x = projectsPerRegion, fill=as.factor(ID),
             label = projectsPerRegion))+
  geom_col()+
  labs(y="Region", x = "N", fill="ID")+
  geom_text(size = 3, position = position_stack(vjust = 0.5))+
  scale_fill_brewer(palette="Dark2")+
  theme_clean()

data:

structure(list(ID = c(3660, 3660, 3660, 3660, 3660, 3660, 3660, 
4499, 4499, 4499, 4499, 4499, 4499, 4499, 4499, 4499, 4499, 4499, 
4499, 2695, 2695, 2695, 2695, 2695, 2695, 2695, 2695, 2680, 2680, 
2680, 2680, 2684, 2684, 2684, 2684, 2699, 2699, 2699, 2699, 2699
), Institution = c("Kl", "GU", "RSK", "RS", "VS", "RO", "UU", 
"Kl", "RO", "RSK", "UU", "GU", "RO", "VG", "LU", "RS", "RU", 
"RV", "LU", "LU", "Kl", "RSK", "LaB", "AS", "RS", "BLA", "KTS", 
"Kl", "MAB", "RS", "RSK", "DM", "MAb", "SLT", "VG", "UU", "Kl", 
"RU", "UU", "LU"), Lat = c(59.43518273, 57.71045289, 56.02491266, 
59.43518273, 57.71045289, 58.41060411, 59.85981925, 59.43518273, 
58.41060411, 56.02491266, 59.85981925, 57.71045289, 59.27472205, 
57.71045289, 58.41060411, 59.43518273, 59.85981925, 63.82660418, 
56.02491266, 56.02491266, 59.43518273, 56.02491266, 59.85981925, 
56.02491266, 59.43518273, 59.43518273, 59.43518273, 59.43518273, 
59.43518273, 59.43518273, 56.02491266, 59.43518273, 59.43518273, 
59.43518273, 57.71045289, 59.85981925, 59.43518273, 59.85981925, 
63.82660418, 58.41060411), Lon = c("17,99440716", "11,97004898", 
"14,15027361", "17,99440716", "11,97004898", "15,62072862", "17,63887641", 
"17,99440716", "15,62072862", "14,15027361", "17,63887641", "11,97004898", 
"15,21412616", "11,97004898", "15,62072862", "17,99440716", "17,63887641", 
"20,26702294", "14,15027361", "14,15027361", "17,99440716", "14,15027361", 
"17,63887641", "14,15027361", "17,99440716", "17,99440716", "17,99440716", 
"17,99440716", "17,99440716", "17,99440716", "14,15027361", "17,99440716", 
"17,99440716", "17,99440716", "11,97004898", "17,63887641", "17,99440716", 
"17,63887641", "20,26702294", "15,62072862"), Region = c("eastland", 
"west coast", "southcoast", "eastland", "west coast", "east lake", 
"upland", "east coast", "east lake", "southcoast", "upland", 
"west coast", "middle country", "west coast", "east lake", "eastland", 
"upland", "down dykes", "southcoast", "southcoast", "eastland", 
"southcoast", "upland", "southcoast", "eastland", "eastland", 
"eastland", "eastland", "eastland", "eastland", "southcoast", 
"eastland", "eastland", "eastland", "west coast", "upland", "eastland", 
"upland", "down dykes", "east lake")), row.names = c(NA, -40L
), class = c("tbl_df", "tbl", "data.frame"))

enter image description here

TarJae
  • 72,363
  • 6
  • 19
  • 66
0

You could plot the institutions on a map, sized by the number of projects in that region:

library(dplyr)
library(ggplot2)
library(ggrepel)  

scandinavia <- map_data("world") %>% 
  filter(region %in% c(
    "Sweden", "Norway", "Denmark", "Germany", "Poland", "Finland", "Estonia", 
    "Latvia", "Lithuania", "Russia"
  ))

n_projects <- data_long_lat %>% 
  count(Region, Lat, Lon, Institution)

ggplot(n_projects, aes(Lon, Lat)) +
  geom_polygon(
    data = scandanavia, 
    aes(long, lat, group = group), 
    color = "gray50", 
    fill = "gray90"
  ) +
  geom_label_repel(
    aes(label = Institution, fill = Region, size = n),
    show.legend = FALSE,
    force = 2,
    max.overlaps = 30,
    min.segment.length = 0,
    seed = 150
  ) +
  geom_point(aes(fill = Region), size = 3, shape = 21) +
  scale_size(range = c(3, 8)) +
  coord_quickmap(xlim = c(10, 22), ylim = c(55, 64)) +
  theme_void() +
  theme(panel.background = element_rect(fill = "#D0E0FF", color = NA))

Note - the example Region had a value of "east coast" that only appeared once and had identical coordinates as "eastland"; I assumed this was an error and changed it to "eastland".

zephryl
  • 14,633
  • 3
  • 11
  • 30