0

I am trying to plot data from the data frame below. I am plotting the MBH variable vs SIG variable.

> dput(head(data))
structure(list(NAME = c("MW", "A1836BCG", "A3565BCG", "Circinus", 
"IC1459", "NGC0221"), HETNAME = c("MW", "PGC049940", "IC4296", 
"ESO097-013", "IC1459", "NGC0221"), SELECTED = c(-3L, 1L, 1L, 
1L, 1L, 1L), RA = c(-1234, 210.424438476562, 204.162719726562, 
213.291305541992, 344.294525146484, 10.6742696762085), DEC = c(-1234, 
-11.6069898605347, -33.9659156799316, -65.338996887207, -36.4624786376953, 
40.8651695251465), DDIST = c(0.00800000037997961, 152.399993896484, 
49.2000007629395, 2.8199999332428, 28.9200000762939, 0.805000007152557
), DISTERR = c(0.00100000004749745, 8.42599964141846, 3.62800002098083, 
0.469999998807907, 3.73900008201599, 0.0299999993294477), MBH = c(6.63299989700317, 
9.57299995422363, 9.11400032043457, 6.05700016021729, 9.39400005340576, 
6.38899993896484), DMBH = c(0.0480000004172325, 0.0610000006854534, 
0.0729999989271164, 0.104999996721745, 0.0790000036358833, 0.193000003695488
), UPPERLIMIT = c(0, 0, 0, 0, 0, 0), SIG = c(2.00449991226196, 
2.458899974823, 2.50729990005493, 2.19580006599426, 2.53150010108948, 
1.8746999502182), DSIG = c(0.0854000002145767, 0.0210999995470047, 
0.0216000005602837, 0.0496999993920326, 0.0220999997109175, 0.017400000244379
), LK = c(-1234, 11.7457618713379, 11.8304710388184, 10.2188024520874, 
11.6403617858887, 9.11000156402588), DLK = c(NaN, 0.0619630999863148, 
0.0749683007597923, 0.117883801460266, 0.10552129894495, 0.0406554006040096
), R50 = c(-1234, 0.894908607006073, 0.977801084518433, 0.176689803600311, 
0.799116313457489, -0.902832627296448), DR50 = c(NaN, 0.0635827034711838, 
0.061846598982811, 0.125314205884933, 0.0776361972093582, 0.0408081002533436
), LKOR = c(-1234, 10.8508558273315, 10.8526668548584, 10.0421104431152, 
10.8412437438965, 10.0128335952759), DLKOR = c(NaN, 0.0388201996684074, 
0.0399360992014408, 0.0895494967699051, 0.0507926009595394, 0.0231896005570889
), C28 = c(-1234, 4.90141534805298, 5.30291795730591, 6.73140382766724, 
5.93526411056519, 4.97699069976807), DC28 = c(NaN, 0.45829850435257, 
0.217240497469902, 0.617741227149963, 0.27682238817215, 0.318778187036514
), LAGN = c(-1234, -8.30424213409424, -9.30169773101807, -11.9125881195068, 
-9.76244735717773, -12.8816366195679), DLAGN = c(NaN, 8.60000000102445e-06, 
3.80000005861802e-06, 9.60000033956021e-06, 6.70000008540228e-06, 
4.80000016978011e-06), CR50LK = c(-1234, 0.00318760005757213, 
0.00392520008608699, 0.0107906004413962, 0.0072910999879241, 
0.0013901999918744), CR50C28 = c(-1234, 0.0170335993170738, 0.008929000236094, 
0.0635076984763145, 0.0151899997144938, 0.0112423002719879), 
    CLKC28 = c(-1234, 0.0133392997086048, 0.00487559987232089, 
    0.0239662993699312, 0.011875499971211, 0.00726540014147758
    ), CLKORC28 = c(-1234, -0.0036943000741303, -0.00405329978093505, 
    -0.0395413003861904, -0.00331449997611344, -0.00397690013051033
    ), TYPE = c("star", "gas", "gas", "maser", "star", "star"
    ), REF = c("ghez08,gillessen09", "dalla-bonta09", "dalla-bonta09", 
    "greenhill03", "cappellari02", "vdb10"), HETMGSSIG = c(-1234, 
    245.100006103516, -1234, -1234, -1234, 63.4000015258789), 
    HETMGSDSIG = c(-1234, 16.6000003814697, -1234, -1234, -1234, 
    12.3999996185303)), row.names = c(NA, 6L), class = "data.frame")

Here is my code:

library(ggplot2)
library(latex2exp)

data <- read.csv('C:\\Users\\Owner\\Desktop\\Stage_2022\\BH_M-sigma_compilation\\Data\\BHcompilation_updated.csv')

methods = c('reverb' = 'black', 'gas' = 'blue', 'stars' = 'red', 'maser' = 'green', 'omitted' = 'grey')
upplims = c('yes' = 6, 'no' = 20)
sizes = c(2, 1)

ggplot(data, aes(SIG, MBH)) +
  geom_point(aes(colour = TYPE, shape = factor(UPPERLIMIT), size = factor(UPPERLIMIT))) +
  scale_color_manual(values = methods) +
  scale_shape_manual(values = upplims) +
  scale_size_manual(values = sizes) + 
  labs(x = 'velocity dispersion (log km/s)', y = TeX(r'($M_{BH}$ log($M_\odot$) )')) +
  theme_bw()

And here is the error I keep encountering:

Warning message:
Removed 308 rows containing missing values (geom_point).

It then plots an empty graph. I looked online and it said that this could be caused by empty data. My data frame contains NaN's, but not on every row and not in the data I am trying to plot. It could also be caused by forcing x and y limits which is also not the case here. Thus I am out of ideas and don't understand what is causing the error.

Thank you very much.

Os GS
  • 69
  • 6
  • Does it work w/o the scale_* lines? I suspect there's a mismatch between, for instance, the `factor` variables going into shape and size and the character/numeric values in your "values". – Jon Spring May 25 '22 at 16:15
  • 3
    It will always be easier to help if you can make your question reproducible. (And when I do that with my questions, it often helps me realize something important.) One simple way would be to include the output of `dput(head(data))` in the body of your question, so that we can load up an exact copy of some of your data. – Jon Spring May 25 '22 at 16:17
  • Images are not a good way for posting data (or code). See [this Meta](https://meta.stackoverflow.com/a/285557/8245406) and a [relevant xkcd](https://xkcd.com/2116/). Can you post sample data in `dput` format? Please [edit the question](https://stackoverflow.com/posts/72381018/edit) with the code you've tried and with the output of `dput(data)`. Or, if it is too big with the output of `dput(head(data, 20))`. – Rui Barradas May 25 '22 at 16:19
  • @JonSpring Indeed, it works without the scale_* lines. Is there another way to obtain the effect desired with the scale_* lines? Sorry I'm fairly new to R and don't know ggplot2 very well yet – Os GS May 25 '22 at 17:38
  • Does this answer help? https://stackoverflow.com/a/26219443/6851825 – Jon Spring May 25 '22 at 17:58
  • 1
    In your example rows the `UPPERLIMIT` value is zero, but you only define shapes for `6` and `20`... – Jon Spring May 25 '22 at 18:00

0 Answers0