So I have a medium size database with 113K rows X 14 columns
Month District Age Gender Education Disability Religion Occupation JobSeekers
1 2020-01 Dan U17 Male None None Jewish Unprofessional workers 2
2 2020-01 Dan U17 Male None None Muslims Sales and costumer service 1
3 2020-01 Dan U17 Female None None Other Undefined 1
4 2020-01 Dan 18-24 Male None None Jewish Production and construction 1
5 2020-01 Dan 18-24 Male None None Jewish Academic degree 1
6 2020-01 Dan 18-24 Male None None Jewish Practical engineers and technicians 1
GMI ACU NACU NewSeekers NewFiredSeekers
1 0 0 2 0 0
2 0 0 1 0 0
3 0 0 1 0 0
4 0 0 1 0 0
5 0 0 1 0 0
6 0 0 1 1 1
I grouped it to a smaller tables that contain certain data that i need using
Sorta <- datac %>%
group_by(District, Month,Gender, Occupation) %>%
summarise(JobSeekers=sum(JobSeekers))
The outcome:
District Month Gender Occupation JobSeekers GMI ACU NACU NewSeekers NewFiredSeekers
<chr> <chr> <chr> <chr> <int> <int> <int> <int> <int> <int>
1 Dan 2020-01 Female Academic degree 4560 120 2622 1818 863 597
2 Dan 2020-01 Female Agriculture, forestry and fi~ 14 9 2 3 1 0
3 Dan 2020-01 Female Machine Operators and drivers 57 6 10 41 9 6
4 Dan 2020-01 Female Managers 1913 36 969 908 390 310
5 Dan 2020-01 Female Officials and clerks 1702 120 263 1319 344 243
6 Dan 2020-01 Female Practical engineers and tech~ 2847 66 1125 1656 671 504
than I tried to plot from this table data that should show trends like unemployed numbers by districts, time table showing uneployment growth through time and more Each time and way i tried to do that i get various errors about the character columns so i'm asking for your help plotting characters and numeric values together
Here's the structure:
structure(
list(
District = c(
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan",
"Dan"
),
Month = c(
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01",
"2020-01"
),
Gender = c(
"Female",
"Female",
"Female",
"Female",
"Female",
"Female",
"Female",
"Female",
"Female",
"Female",
"Male",
"Male",
"Male",
"Male",
"Male",
"Male",
"Male",
"Male",
"Male",
"Male"
),
Occupation = c(
"Academic degree",
"Agriculture, forestry and fishing",
"Machine Operators and drivers",
"Managers",
"Officials and clerks",
"Practical engineers and technicians",
"Production and construction",
"Sales and costumer service",
"Undefined",
"Unprofessional workers",
"Academic degree",
"Agriculture, forestry and fishing",
"Machine Operators and drivers",
"Managers",
"Officials and clerks",
"Practical engineers and technicians",
"Production and construction",
"Sales and costumer service",
"Undefined",
"Unprofessional workers"
),
JobSeekers = c(
4560L,
14L,
57L,
1913L,
1702L,
2847L,
480L,
3086L,
893L,
1985L,
2605L,
44L,
1276L,
2236L,
247L,
2249L,
1258L,
2233L,
924L,
2462L
),
GMI = c(
120L,
9L,
6L,
36L,
120L,
66L,
47L,
396L,
155L,
998L,
119L,
26L,
240L,
101L,
30L,
111L,
322L,
359L,
309L,
1124L
),
ACU = c(
2622L,
2L,
10L,
969L,
263L,
1125L,
99L,
392L,
259L,
52L,
1549L,
1L,
49L,
797L,
44L,
829L,
102L,
202L,
124L,
58L
),
NACU = c(
1818L,
3L,
41L,
908L,
1319L,
1656L,
334L,
2298L,
479L,
935L,
937L,
17L,
987L,
1338L,
173L,
1309L,
834L,
1672L,
491L,
1280L
),
NewSeekers = c(
863L,
1L,
9L,
390L,
344L,
671L,
83L,
622L,
201L,
325L,
550L,
5L,
239L,
469L,
53L,
525L,
233L,
432L,
212L,
324L
),
NewFiredSeekers = c(
597L,
0L,
6L,
310L,
243L,
504L,
60L,
375L,
123L,
150L,
447L,
4L,
196L,
405L,
41L,
429L,
162L,
316L,
124L,
190L
)
),
row.names = c(NA,-20L),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"),
groups = structure(
list(
District = c("Dan", "Dan"),
Month = c("2020-01", "2020-01"),
Gender = c("Female", "Male"),
.rows = list(1:10, 11:20)
),
row.names = c(NA,-2L),
class = c("tbl_df", "tbl", "data.frame"),
.drop = TRUE
)
)
2nd ques is about how i can make a map of 'hotspot' areas of unemployed people / occupations / ages
please help!
Update:
dist.oc.mo <- Cdata %>%
group_by(District,Gender,Occupation,Month) %>%
summarise(JobSeekers=sum(JobSeekers),GMI=sum(GMI), ACU=sum(ACU), NACU=sum(NACU), NewSeekers=sum(NewSeekers), NewFiredSeekers=sum(NewFiredSeekers))
p <- ggplot(data = dist.oc.mo) +
geom_bar(mapping = aes(x = Occupation, y = JobSeekers, fill=factor(District)),
stat = "identity", position = "dodge", alpha=0.7 ) +
labs(title = "March-April Jobseekers", subtitle = "This barchart describes unemployment trend for March and April sorted by jobseekers number and occupation type", fill = "District",
x = "Occupation", y = "JobSeekers") +
scale_x_discrete(labels = wrap_format(10)) +
scale_fill_brewer(palette="Set1") +
theme(legend.position = "bottom")
p
[https://i.stack.imgur.com/v0R0V.jpg][1]
Regards, Moshe