0

I am attempting to plot bias of certain estimates in a simulation. The estimates are equal in all rows, but ggplot2 plots the difference as non-zero. I was expecting no blue bars in the graph below (you can ignore the other boxes):

enter image description here

Now if you calculate the differences between the estimated and the simulated with

df$mrdoc2_g2_hat-df$mrdoc2_g2

The output are zeroes throughout.

Am I hitting a bug in the package (unlikely) or is it a rounding problem?

df <- read.csv("df.csv")
library(ggplot2)
library(stringr)
library(dplyr)
library(tidyr)
df %>%
  summarise(
            mrdoc2_g1 = mean(mrdoc2_g1_hat -  sqrt(g1)),
            mrdoc2_g2 = mean(mrdoc2_g2_hat -  sqrt(g2)),
            mrdoc2_b1 = mean(mrdoc2_b1_hat -  sqrt(b1)),
            mrdoc2_b3 = mean(mrdoc2_b3_hat -  sqrt(b3)),
            mrdoc2_ra = mean(mrdoc2_ra_hat -  ra),
            mrdoc2_rc = mean(mrdoc2_rc_hat -  rc),
            mrdoc2_re = mean(mrdoc2_re_hat -  re),
            mrdoc2_rf = mean(mrdoc2_rf_hat -  rf),
            mrdoc2_ey = mean(mrdoc2_ey_hat -  ey),
            mrdoc2_ex = mean(mrdoc2_ex_hat -  ex),
            mrdoc2_ax = mean(mrdoc2_ax_hat -  ax),
            mrdoc2_ay = mean(mrdoc2_ay_hat -  ay),
            mrdoc2_cy = mean(mrdoc2_cy_hat -  cy),
            mrdoc2_cx = mean(mrdoc2_cx_hat -  cx),
            mrdoc_g1 = mean(mrdoc_g1_hat -  sqrt(g1)),
            mrdoc_b1 = mean(mrdoc_b1_hat -  sqrt(b1)),
            mrdoc_b2 = mean(mrdoc_b2_hat -  sqrt(b2)),
            mrdoc_ra = mean(mrdoc_ra_hat -  ra),
            mrdoc_rc = mean(mrdoc_rc_hat -  rc),
            mrdoc_ey = mean(mrdoc_ey_hat -  ey),
            mrdoc_ex = mean(mrdoc_ex_hat -  ex),
            mrdoc_ax = mean(mrdoc_ax_hat -  ax),
            mrdoc_ay = mean(mrdoc_ay_hat -  ay),
            mrdoc_cx = mean(mrdoc_cx_hat -  cx),
            mrdoc_cy = mean(mrdoc_cy_hat -  cy),
            doc_g1 = mean(doc_g1_hat -  sqrt(g1)),
            doc_ra = mean(doc_ra_hat -  ra),
            doc_rc = mean(doc_rc_hat -  rc),
            doc_ey = mean(doc_ey_hat -  ey),
            doc_ex = mean(doc_ex_hat -  ex),
            doc_cx = mean(doc_cx_hat -  cx),
            doc_cy = mean(doc_cy_hat -  cy),
            doc_ax = mean(doc_ax_hat -  ax),
            doc_ay = mean(doc_ay_hat -  ay),
            ) %>%
  # ggplot barplot colored by group x parameters
  pivot_longer(c(
                 mrdoc2_g1, 
                 mrdoc2_g2,mrdoc2_b3,
                 mrdoc2_b1, 
                 mrdoc2_ra, mrdoc2_rc, 
                 mrdoc2_re, mrdoc2_rf, 
                 mrdoc2_ey, mrdoc2_ex, mrdoc2_ax, mrdoc2_ay, mrdoc2_cx, mrdoc2_cy,
                 mrdoc_g1, 
                 mrdoc_b1, mrdoc_b2, 
                 mrdoc_ra, mrdoc_rc,
                 mrdoc_ey, mrdoc_ex, mrdoc_ax, mrdoc_ay, mrdoc_cx, mrdoc_cy,
                 doc_g1,
                 doc_ey, doc_ex, doc_cx, doc_cy, doc_ax, doc_ay,
                 doc_ra, doc_rc
                 ),
               names_to="key", values_to="value") %>%
  mutate(tmp = stringr::str_split_fixed(key, "_", 2),
         group = tmp[,1],
         key = tmp[,2]) %>%  
  # filter(value != 0) %>%
  ggplot(aes(x=key,value, y=value, fill= group)) +
  geom_bar(stat="identity", position="dodge") +
  facet_wrap(~group) + # rcc scales = "free_y") +
  theme(legend.position = "bottom") +
  labs(x = "Parameters", y = "Diff estimated - parameter value", 
       title = "Only DoC parameters")

and data (sorry, it is too long for dput as it is from a simulation):

https://paste.linux.chat/?d44333cecd4f1fc2#kDKkGHRJNsTpA3WgrFSK9HtBsKPXbwYVUAUKFs4rbX2

lf_araujo
  • 1,991
  • 2
  • 16
  • 39
  • 4
    Would it be possible for you to make this a **minimal** reproducible example? There is a lot of code here. If you reduce it to the smallest possible example, you will likely get better, faster help. See [here](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) for some tips – jpsmith Feb 28 '23 at 01:07
  • 1
    Probably a rounding problem. Maybe try add a `mutate( across(where(is.numeric), ~ round(., 10)))` to the data pipeline – Ric Feb 28 '23 at 01:56

0 Answers0