I am making a histogram of data using base r and would like to show percentages/proportions, not density, on the y-axis. I realise that there are good arguments against doing so, but in this particular case this is what is required. I have adapted the code suggested in the accepted answer here:
Histogram does not show densities
However, the last line of my adapted version of this code creates a y-axis with unexpected values. I assume this is because I have failed to adapt the last line of code correctly (see reprex below; unfortunately I cannot post the resulting histogram because I don't yet have 10 reputation).
axis(2, at=p2*sum(p1$counts)/100, labels=p2)
I haven’t been able to work out the logic of what is happening. I am pretty new to r and this is my first reprex - constructive criticisms of its execution welcome!
library(tidyverse)
library(ggplot2)
library(scales)
df <- data.frame(
FactorScoreO = c(86.83,81.17,87.17,76.67,84.17,
78.17,82.83,71.67,83,68.33,86.83,78.5,75.17,48,89.33,
92.17,84.33,73.33,99.17,74.33,96.5,73.83,74.33,79,40.5,
26.83,83.67,78.5,95.83,100,64.17,85.83,85.17,56.67,
80.83,96.17,95.83,82.5,79.5,79,91.33,83.33,74.67,74,
82.17,76.83,91.33,93.83,89.17,88.17,65.83,84.33,80,
97.5,58.67,80.67,81,78.83,84.83,80.33,64.83,72,
76.67,96.67,75.83,82.67,66.67,71.83,51.67,73.33,43.67,72,
86,87.5,88.33,74.17,70.67,59.67,94.17,79,65.33,
81.67,54.33,66.33,74.33,68,68.33,73.5,87.5,73.17,85.33,
78.5,72.5,96.67,72.83,85.33,65.17,88.17,83.33,59.17,
96.5,92,60.17,93.33,82.83,86.67,96.67,83.5,88.33,81,
80.83,49.17,76.67,61.83,82.5,76.67,82.17,62.5,83.33,
91.67,85.5,56.33,89.83,61.67,75.83,96.67,98.33,
92.17,100,68.5),
FactorScoreM = c(0,0.25,13.75,35.25,0,43,40.5,15,
15.25,38.5,2.5,0,0,50.25,34,19.25,21,16.5,0,16,0,
26,48.75,30,16.75,55.25,62,40,43.75,0,30.75,6.25,
2.25,49.5,13.75,2,0,6.25,11.25,56.75,52.75,2.5,
26.75,36,43.5,33.25,87.5,3.5,17.5,2.5,23,0,5,25,
52.75,29.25,33,28.75,21.75,11.5,25,48.75,0.25,39.25,15,
30.5,35,50.75,32.5,0,54.75,15.25,6,0,20.25,63.75,
60.5,3,0,11.5,71.25,3.75,53,35.75,51.25,25.75,30.75,
0,0,10.5,46.25,19.5,16.25,1.5,13,23.25,44.75,3.25,
12.5,45,0,0,37,28.25,40.5,5,50,45,0,16.25,7.5,
22,22.5,52,45.5,25,8.5,15.25,11,47.75,0.25,23.5,
18.75,32.5,22.25,35.5,0,64.5,0,0),
Profile = as.factor(c("1","2","3","3",
"2","1","1","3","3","1","3","2","3","1",
"3","3","1","3","3","3","2","1","1","1",
"1","1","1","1","2","2","1","2","1","1",
"1","2","3","1","1","1","1","2","3","1","1",
"1","1","3","3","3","2","2","3","2","1",
"1","2","3","2","3","2","1","2","3","1",
"3","1","1","3","3","1","2","3","2","1",
"1","1","1","3","3","1","3","1","1","1",
"1","1","3","2","1","3","3","3","3","3","3",
"1","3","1","1","3","3","2","2","2","1",
"3","1","3","2","2","1","3","1","3","2",
"3","1","1","2","2","3","3","1","1","1",
"3","1","2","2"))
)
p1 <- hist(df$FactorScoreO[df$Profile=="1"],main="Profile 1", xlab='',ylab='',cex.lab = 2)
p2 <- pretty(p1$counts/sum(p1$counts)*100 )
axis(2, at=p2*sum(p1$counts)/100, labels=p2)
Created on 2021-04-03 by the reprex package (v0.3.0)