# Load data
library(tidyverse)
library(haven)
library(modelsummary)
library(gt)
<- read_dta('https://ditraglia.com/data/mlda.dta') mlda
Tutorial #4: Minimum Legal Drinking Age
This problem is based on data from Carpenter & Dobkin (2009; AEJ Applied), who study the causal effect of alcohol consumption on mortality in US youths. In 1984, the US Federal Government introduced legislation that compelled states to introduce a minimum legal drinking age (MLDA) of 21. Before then, different states had different minimum ages at which it was legal to purchase alcohol: some had an MLDA of 18, others 19, and others 20. Carter & Dobkin (2009) do not rely upon variation in states’ MLDAs before 1984. (If you’re interested to know why, see the introduction of their paper.) Instead they take a regression discontinuity approach. Since 1984, a US resident’s birthday has created a sharp change in ease of access to alcohol. The day before your 21st birthday, you cannot buy alcohol legally; on the day itself and forever after you can. If we view the treatment as being able to buy alcohol legally, this is a sharp RD design. The outcome of interest is all-cause mortality. If legal access to alcohol causes an increase in mortality, we should see a “jump” in mortality rates just after people turn 21. For more background, see the paper.
Because access to the underlying individual mortality data is restricted, here we will work with group averages. The data you will need to complete the following is contained in a file called mlda.dta
, available from the data directory of my website: https://ditraglia.com/data/
. The dataset contains multiple columns, but you’ll only need two of them. The first is agecell
. This variable contains age in months, stored as a whole number of years plus a decimal. (It’s a bit inelegant, I agree!) The second is all
, which gives all-cause mortality rates per 100,000 individuals. These variables were constructed as follows. Underlying both of them is individual data on mortality. These individual data were grouped into fifty “bins” based on age in months. The average age in the bin was stored in agecell
and the mortality rate in the bin was stored in all
. (I provide this explanation only in case you’re curious: you won’t need to worry about it below!)
- Use a linear RD model to estimate the causal effect of legal access to alcohol on death rates. Plot your results and carry out appropriate present appropriate statistical inference. Discuss your findings.
- Repeat 1 using a quadratic rather than linear specification. Compare and contrast your findings.
- RD analysis is fundamentally local in nature: the mortality rates of individuals far from the cutoff should not inform us about the causal effect for 21 year olds. Check the sensitivity of your results from parts 1 and 2 by restricting your sample to ages between 20 and 22, inclusive. Discuss your findings.
Solutions
Table of Regression Results
# Center age around the cutoff and create treatment indicator
<- mlda |>
mlda mutate(age = agecell - 21,
over21 = if_else(agecell >= 21, 1, 0))
# Linear RD model
<- all ~ age * over21
linear_model <- lm(linear_model, mlda)
linear_RD
# Quadratic RD Model
<- all ~ (age + I(age^2)) * over21
quadratic_model <- lm(quadratic_model, mlda)
quadratic_RD
# Sensitivity Analysis: what changes if we restrict to ages 20-22?
<- mlda |>
mlda_subset filter(agecell >= 20 & agecell <= 22)
<- lm(linear_model, mlda_subset)
linear_RD_local <- lm(quadratic_model, mlda_subset)
quadratic_RD_local
<- list(linear_RD, quadratic_RD, linear_RD_local, quadratic_RD_local)
results <- modelsummary(results,
RD_table fmt = 2,
output = 'gt',
stars = TRUE,
gof_omit = 'Log.Lik|R2|R2 Adj.|AIC|BIC|F|RMSE',
title = 'Sharp RD Results - MLDA Example')
|>
RD_table tab_spanner(label = 'Ages 19-23', columns = 2:3) |>
tab_spanner(label = 'Ages 20-22', columns = 4:5)
Ages 19-23 | Ages 20-22 | |||
---|---|---|---|---|
(1) | (2) | (3) | (4) | |
(Intercept) | 93.62*** | 93.07*** | 92.52*** | 94.34*** |
(0.93) | (1.40) | (1.37) | (2.05) | |
age | 0.83 | -0.83 | -1.61 | 9.40 |
(0.82) | (3.29) | (2.41) | (9.62) | |
over21 | 7.66*** | 9.55*** | 9.75*** | 9.61** |
(1.32) | (1.99) | (1.94) | (2.89) | |
age × over21 | -3.60** | -6.02 | -3.29 | -24.45+ |
(1.16) | (4.65) | (3.40) | (13.60) | |
I(age^2) | -0.84 | 11.16 | ||
(1.62) | (9.45) | |||
I(age^2) × over21 | 2.90 | -0.87 | ||
(2.28) | (13.37) | |||
Num.Obs. | 48 | 48 | 24 | 24 |
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
Base R Versions of Plots
# Function for making an RD plot
<- function(reg, dat, inc = 0.01) {
make_RD_plot plot(all ~ agecell, dat, xlab = 'Age',
ylab = 'Mortality Rate (per 100,000)',
pch = 20, col = 'blue')
abline(v = 21, lty = 2)
<- min(dat$agecell)
x_min <- max(dat$agecell)
x_max <- seq(x_min, 21 - inc, inc)
x_before <- seq(21, x_max, inc)
x_after <- predict(reg, data.frame(age = x_before - 21,
y_before over21 = 1 * (x_before >= 21)))
<- predict(reg, data.frame(age = x_after - 21,
y_after over21 = 1 * (x_after >= 21)))
points(x_before, y_before, type = 'l', lwd = 2)
points(x_after, y_after, type = 'l', lwd = 2)
}
par(mfrow = c(2, 2))
make_RD_plot(linear_RD, mlda)
make_RD_plot(quadratic_RD, mlda)
make_RD_plot(linear_RD_local, mlda_subset)
make_RD_plot(quadratic_RD_local, mlda_subset)
par(mfrow = c(1, 1))
ggplot2
Versions of the Plots
library(gridExtra)
<- ggplot(mlda, aes(x = age, y = all, color = factor(over21))) +
linear_plot geom_point() +
geom_smooth(method = 'lm', formula = y ~ x) +
xlab('Age') +
ylab('Mortality Rate (per 100,000)') +
theme(legend.position = 'none') # Get rid of the legend!
<- ggplot(mlda, aes(x = age, y = all, color = factor(over21))) +
quadratic_plot geom_point() +
geom_smooth(method = 'lm', formula = y ~ x + I(x^2)) +
xlab('Age') +
ylab('Mortality Rate (per 100,000)') +
theme(legend.position = 'none') # Get rid of the legend!
<- ggplot(mlda_subset, aes(x = age, y = all,
linear_plot_local color = factor(over21))) +
geom_point() +
geom_smooth(method = 'lm', formula = y ~ x) +
xlab('Age') +
ylab('Mortality Rate (per 100,000)') +
theme(legend.position = 'none') # Get rid of the legend!
<- ggplot(mlda_subset, aes(x = age, y = all,
quadratic_plot_local color = factor(over21))) +
geom_point() +
geom_smooth(method = 'lm', formula = y ~ x + I(x^2)) +
xlab('Age') +
ylab('Mortality Rate (per 100,000)') +
theme(legend.position = 'none') # Get rid of the legend!
grid.arrange(linear_plot, quadratic_plot,
ncol = 2) linear_plot_local, quadratic_plot_local,