0

Struggling to get clustering to work with ivreg in R. Clustering is based on a variable with 255 unique values.

I've been trying to translate this stata code to R

ivreg2 X (Y = A?)  iv1 iv2 iv3 if year==`year', 
  cluster(clustervariable)

Currently I'm stuck at:

ivreg(x ~ Y | A + iv1  + iv2 + iv3, subset = (year == 1), 
  cluster = clustervariable)

Yet somehow the cluster argument just does not want to work.

Hopefully someone can help me out?

I'm using district_id as my clustervariable.

Dput 10:

    structure(list(year = structure(c(1986, 1986, 1986, 1986, 1986, 1986, 1986, 1986, 1986, 1986), format.stata = "%10.0g"), AGS_shp = structure(c("01001000", "01002000", "01003000", "01004000", "01051001", "01051002", "01051003", "01051004", "01051005", "01051006"), label = "municipality id", format.stata = "%12s"), district_id = structure(c(870, 803, 742, 757, 768, 768, 768, 
768, 784, 768), label = "district id", format.stata = "%9.0g"), 
POINT_X = structure(c(528292.375, 573715.3125, 613565.1875, 
564411.75, 518185.3125, 515892, 512847.25, 510285.4375, 511789.125, 
502602.40625), label = "X-Koord. Meter (ETRS89)", format.stata = "%9.0g"), 
POINT_Y = structure(c(6071490, 6020275.5, 5970738, 5992986, 
5998579.5, 6001090.5, 5977218, 5994236.5, 6008978, 5985803.5
), label = "Y-Koord. Meter (ETRS89)", format.stata = "%9.0g"), 
dist = structure(c(19500.8359375, 36689.5625, 38394.0703125, 
10221.001953125, -2788.59765625, -5973.6005859375, -16922.044921875, 
-10602.9267578125, -9339.908203125, -19388.501953125), label = "distance to ZRG border", format.stata = "%9.0g"), 
instrument_dist = structure(c(-9, -36, -36, -7, 27, 27, 27, 
27, 14, 27), label = "M_d, min. distance that includes majority share of district", format.stata = "%9.0g"), 
Treat = structure(c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0), label = "ZRG Treatment=1", format.stata = "%10.0g"), 
empl_dens = structure(c(6.47233724594116, 6.80416679382324, 
5.90950536727905, 6.00565242767334, 3.61452412605286, 0.262787610292435, 
1.37793660163879, 1.10464441776276, 0.160169258713722, 1.16168344020844
), label = "log employment per km^2", format.stata = "%9.0g"), 
btax_area = structure(c(4.72647666931152, 4.72262144088745, 
4.06436109542847, 3.86139726638794, 1.44978511333466, -1.93443703651428, 
1.16171360015869, 0.293714225292206, NA, -1.182865858078), label = "log business tax base per km^2", format.stata = "%9.0g"), 
gdp_cap = structure(c(2.94030094146729, 2.92247271537781, 
2.95752573013306, 2.92384767532349, 2.77792525291443, 2.72932410240173, 
2.79197883605957, 2.71332430839539, 2.70004200935364, 2.86503338813782
), label = "log income per capita", format.stata = "%9.0g"), 
pop_dens = structure(c(7.32683563232422, 7.64434671401978, 
6.88798666000366, 6.99128293991089, 5.31554889678955, 3.30731010437012, 
4.25432205200195, 4.22798776626587, 3.538893699646, 3.52020859718323
), label = "log population per km^2", format.stata = "%9.0g"), 
gdp_area = structure(c(9.29452610015869, 9.56402587890625, 
8.85938262939453, 8.94584655761719, 6.96987533569336, 4.93802165985107, 
6.04171752929688, 5.83287191390991, 5.1288948059082, 5.26964664459229
), label = "log income per km^2", format.stata = "%9.0g"), 
L_1 = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), label = "Land==01", format.stata = "%8.0g"), 
L_2 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Land==03", format.stata = "%8.0g"), 
L_3 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Land==05", format.stata = "%8.0g"), 
L_4 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Land==06", format.stata = "%8.0g"), 
L_5 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Land==09", format.stata = "%8.0g"), 
Border_TreatControl = structure(c(0, 0, 0, 0, NA, 0, 0, 0, 
0, 0), label = "municipality borders ZRG boundary", format.stata = "%9.0g"), 
ptax_rate = structure(c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), label = "logit property tax rate", format.stata = "%10.0gc"), 
btax_rate = structure(c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), label = "logit business tax rate", format.stata = "%10.0gc"), 
distb = structure(c(22837.744140625, 47420.34375, 53039.19140625, 
25665.048828125, -12221.42578125, -13654.2373046875, -31139.49609375, 
-21222.0078125, -18305.630859375, -32118.302734375), label = "distance to border point", format.stata = "%9.0g"), 
elig = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0), instrument_abs = structure(c(9, 
36, 36, 7, 27, 27, 27, 27, 14, 27), label = "M_d, min. distance that includes majority share of district", format.stata = "%9.0g")), row.names = c(NA, 

-10L), class = c("tbl_df", "tbl", "data.frame"))

  • Can you post sample data? Please edit the question with the output of `dput(df)`. Or, if the data set `df` is too big with the output of `dput(head(df, 20))`. – Rui Barradas Mar 17 '23 at 17:56
  • Thanks for your help Rui! :) The sample data is the data used in the paper "The Persistent Effects of Place-Based Policy: Evidence from the West-German Zonenrandgebiet" by Maximilian Ehrlich and Tobias Seidel. Their original data can be found here: https://www.openicpsr.org/openicpsr/project/114698/version/V1/view The Stata code was the code they originally used, so I suppose that the dataset cannot be too large? Sorry I am not too experienced with R. If you could use more info from me, please feel free to let me know! – ThankfulStudent Mar 17 '23 at 17:58
  • I added the dput! – ThankfulStudent Mar 17 '23 at 19:33

0 Answers0