0

I am trying to run and plot a model pretty much in the same way the following link does but I get a completely different output : https://rpubs.com/phle/r_tutorial_regression_discontinuity_design

This is my dataframe:

structure(list(Date = structure(c(15365, 15359, 15363, 15367, 
15379, 15380, 15380, 15387, 15390, 15393, 15381, 15397, 15394, 
15410, 15412, 15407, 15403, 15406, 15424, 15415, 15417, 15420, 
15424, 15425, 15425, 15460, 15436, 15444, 15449, 15456, 15459, 
15460, 15483, 15464, 15466, 15484, 15499, 15492, 15492, 15493, 
15495, 15496, 15496, 15498, 15513, 15514, 15500, 15502, 15503, 
15504, 15506, 15507, 15507, 15508, 15509, 15509, 15510, 15512, 
15550, 15540, 15541, 15544, 15570, 15559, 15562, 15568, 15578, 
15603, 15584, 15591, 15603, 15605, 15612, 15623, 15614, 15635, 
15624, 15630, 15632, 15635, 15639, 15651, 15647, 15648, 15673, 
15658, 15653, 15654, 15657, 15667, 15659, 15665, 15666, 15667, 
15669, 15673, 15678, 15675, 15688, 15679, 15681, 15684, 15703, 
15694, 15696, 15702, 15724, 15709, 15714, 15717), class = "Date"), 
    Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", 
    "1"), class = "factor"), Y = c(3.93182563272433, 4.52178857704904, 
    4.12713438504509, 3.85014760171006, 2.30258509299405, 2.89037175789616, 
    4.34380542185368, 3.93182563272433, 4.21950770517611, 2.99573227355399, 
    3.09104245335832, 4.04305126783455, 4.45434729625351, 4.04305126783455, 
    4.07753744390572, 4.00733318523247, 3.61091791264422, 3.2188758248682, 
    4.15888308335967, 4.81218435537242, 4.21950770517611, 4.40671924726425, 
    4.21950770517611, 2.77258872223978, 3.78418963391826, 4.20469261939097, 
    4.39444915467244, 4.18965474202643, 4.51085950651685, 3.63758615972639, 
    4.48863636973214, 2.77258872223978, 0, 4.04305126783455, 
    4.06044301054642, 1.09861228866811, 3.98898404656427, 1.09861228866811, 
    2.07944154167984, 1.38629436111989, 2.63905732961526, 3.09104245335832, 
    3.13549421592915, 4.72738781871234, 3.78418963391826, 4.17438726989564, 
    3.40119738166216, 3.76120011569356, 3.2188758248682, 2.89037175789616, 
    3.63758615972639, 3.3322045101752, 4.26267987704132, 3.78418963391826, 
    3.13549421592915, 1.79175946922805, 3.85014760171006, 3.78418963391826, 
    4.76217393479776, 4.78749174278205, 4.07753744390572, 4.30406509320417, 
    4.63472898822964, 4.59511985013459, 4.48863636973214, 3.61091791264422, 
    4.56434819146784, 3.76120011569356, 3.71357206670431, 4.44265125649032, 
    1.94591014905531, 3.97029191355212, 4.47733681447821, 3.68887945411394, 
    4.55387689160054, 4.20469261939097, 4.45434729625351, 4.35670882668959, 
    4.02535169073515, 4.24849524204936, 4.44265125649032, 3.43398720448515, 
    4.26267987704132, 2.39789527279837, 2.77258872223978, 4.0943445622221, 
    3.89182029811063, 4.35670882668959, 2.19722457733622, 4.02535169073515, 
    3.36729582998647, 3.78418963391826, 4.61512051684126, 1.79175946922805, 
    3.04452243772342, 3.93182563272433, 3.17805383034795, 4.18965474202643, 
    4.55387689160054, 1.6094379124341, 4.27666611901606, 3.71357206670431, 
    3.49650756146648, 4.17438726989564, 3.29583686600433, 2.99573227355399, 
    4.52178857704904, 4.20469261939097, 4.47733681447821, 4.60517018598809
    )), row.names = c(288L, 292L, 293L, 296L, 297L, 298L, 299L, 
300L, 301L, 302L, 303L, 304L, 305L, 307L, 308L, 309L, 310L, 311L, 
312L, 313L, 314L, 315L, 316L, 318L, 319L, 320L, 321L, 322L, 323L, 
324L, 325L, 326L, 327L, 329L, 330L, 331L, 332L, 333L, 334L, 335L, 
336L, 337L, 338L, 340L, 341L, 342L, 343L, 344L, 345L, 346L, 347L, 
348L, 349L, 351L, 352L, 353L, 354L, 355L, 356L, 357L, 358L, 359L, 
360L, 362L, 363L, 364L, 365L, 366L, 367L, 368L, 369L, 370L, 371L, 
373L, 374L, 375L, 376L, 377L, 378L, 379L, 380L, 381L, 382L, 384L, 
385L, 386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L, 395L, 396L, 
397L, 398L, 399L, 400L, 401L, 402L, 403L, 404L, 407L, 408L, 409L, 
410L, 411L, 412L, 413L), class = "data.frame")

I run the following regression

mod1 <- lm(Y ~ Treatment + I(Date - as.Date("2012-07-15")) + Treatment*I(Date - as.Date("2012-07-15")) , data = fakedata)

and get the following output:

> summary(mod1)

Call:
lm(formula = Y ~ Treatment + I(Date - as.Date("2012-07-15")) + 
    Treatment * I(Date - as.Date("2012-07-15")), data = fakedata)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.3272 -0.3419  0.2500  0.5483  1.4873 

Coefficients:
                                            Estimate Std. Error
(Intercept)                                 3.019447   0.227294
Treatment1                                  1.275264   0.359427
I(Date - as.Date("2012-07-15"))            -0.005807   0.002263
Treatment1:I(Date - as.Date("2012-07-15"))  0.001757   0.003305
                                           t value Pr(>|t|)    
(Intercept)                                 13.284  < 2e-16 ***
Treatment1                                   3.548  0.00058 ***
I(Date - as.Date("2012-07-15"))             -2.566  0.01170 *  
Treatment1:I(Date - as.Date("2012-07-15"))   0.532  0.59604    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8682 on 106 degrees of freedom
Multiple R-squared:  0.1154,    Adjusted R-squared:  0.09038 
F-statistic:  4.61 on 3 and 106 DF,  p-value: 0.004509

As you see coefficient Treatment1:I(Date - as.Date("2012-07-15")) is positive (0.001757). So the second local regression's slope should be positive.

Then I try to plot the same regression using library(ggplot2):

fakedata  %>%
  select(Date, Y) %>%
  mutate(Treatment = as.factor(ifelse(Date >= as.Date("2012-07-15"), 1, 0))) %>%
  ggplot(aes(x = Date, y = Y, color = Treatment)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  scale_color_brewer(palette = "Accent") +
  geom_vline(xintercept = as.Date("2012-07-15"), color = "red",
             size = 1, linetype = "dashed") +
  labs(y = "Y",
       x = "Date")

This is the output I get: Output

As you see, the second local regression is downward sloping. But it definitely should not be the case. Can you tell me why?

  • Without some additional data we probably can't help you, please consider our "house rules" [how-to-make-a-great-r-reproducible-example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example), cheers! – jay.sf Jul 03 '22 at 16:01

1 Answers1

0

I don't think there is a contradiction here. Presumably there is a negative coefficient for the + I(Date - as.Date("2012-02-08")) term (which applies to both sections), and there is a smaller positive coefficient for the treated condition. Thus the slope for the treated portion is a little less negative, since it reflects the sum of both coefficients.

Here's a reproducible example:

mtcars2 <- data.frame(Y = mtcars$disp,
                      Treatment = as.factor(mtcars$vs),
                      X = -mtcars$qsec)

Like your data, X (like your time term) has a negative slope, here -21.45. And Treatment1:X has a smaller positive slope:

lm(Y ~ Treatment + X + Treatment*X, mtcars2)

#Call:
#lm(formula = Y ~ Treatment + X + Treatment * X, data = mtcars2)
#
#Coefficients:
# (Intercept)    Treatment1             X  Treatment1:X  
#      -51.00          5.91        -21.45         12.27 

Graphically, this means the Treated portion has a less negative slope than the untreated portion, since treated Y varies with X from both the X term and the Treatment1:X term.

ggplot(mtcars2, aes(X, Y, color = Treatment)) +
  geom_point() +
  geom_smooth(method = "lm")

enter image description here

Jon Spring
  • 55,165
  • 4
  • 35
  • 53