OLS: Interpretation and hypothesis testing
MACS 33001
University of Chicago
Cigarette consumption
Cigarette tax
Southern states
Estimating linear regression models in R
south <- lm(cigarettes ~ cig_tax + south, data = states)
summary(south)
##
## Call:
## lm(formula = cigarettes ~ cig_tax + south, data = states)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.407 -1.689 -0.389 0.912 8.839
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.643 1.065 7.17 4.4e-09 ***
## cig_tax -0.876 0.726 -1.21 0.2338
## south 2.730 0.998 2.74 0.0087 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.76 on 47 degrees of freedom
## Multiple R-squared: 0.275, Adjusted R-squared: 0.244
## F-statistic: 8.92 on 2 and 47 DF, p-value: 0.000519
lm()
objects
## List of 12
## $ coefficients : Named num [1:3] 7.643 -0.876 2.73
## ..- attr(*, "names")= chr [1:3] "(Intercept)" "cig_tax" "south"
## $ residuals : Named num [1:50] 0.569 8.839 -1.898 2.529 -0.998 ...
## ..- attr(*, "label")= chr "Packs bimonthly per adult pop"
## ..- attr(*, "format.stata")= chr "%10.0g"
## ..- attr(*, "names")= chr [1:50] "1" "2" "3" "4" ...
## $ effects : Named num [1:50] -53.57 -8.88 -7.55 1.97 -2.82 ...
## ..- attr(*, "label")= chr "Packs bimonthly per adult pop"
## ..- attr(*, "format.stata")= chr "%10.0g"
## ..- attr(*, "names")= chr [1:50] "(Intercept)" "cig_tax" "south" "" ...
## $ rank : int 3
## $ fitted.values: Named num [1:50] 9.47 10.11 10.06 9.89 7.26 ...
## ..- attr(*, "label")= chr "Packs bimonthly per adult pop"
## ..- attr(*, "format.stata")= chr "%10.0g"
## ..- attr(*, "names")= chr [1:50] "1" "2" "3" "4" ...
## $ assign : int [1:3] 0 1 2
## $ qr :List of 5
## ..$ qr : num [1:50, 1:3] -7.071 0.141 0.141 0.141 0.141 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:50] "1" "2" "3" "4" ...
## .. .. ..$ : chr [1:3] "(Intercept)" "cig_tax" "south"
## .. ..- attr(*, "assign")= int [1:3] 0 1 2
## ..$ qraux: num [1:3] 1.14 1.17 1.1
## ..$ pivot: int [1:3] 1 2 3
## ..$ tol : num 1e-07
## ..$ rank : int 3
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 47
## $ xlevels : Named list()
## $ call : language lm(formula = cigarettes ~ cig_tax + south, data = states)
## $ terms :Classes 'terms', 'formula' language cigarettes ~ cig_tax + south
## .. ..- attr(*, "variables")= language list(cigarettes, cig_tax, south)
## .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:3] "cigarettes" "cig_tax" "south"
## .. .. .. ..$ : chr [1:2] "cig_tax" "south"
## .. ..- attr(*, "term.labels")= chr [1:2] "cig_tax" "south"
## .. ..- attr(*, "order")= int [1:2] 1 1
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(cigarettes, cig_tax, south)
## .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
## .. .. ..- attr(*, "names")= chr [1:3] "cigarettes" "cig_tax" "south"
## $ model :'data.frame': 50 obs. of 3 variables:
## ..$ cigarettes: num [1:50] 10.04 18.95 8.16 12.42 6.26 ...
## .. ..- attr(*, "label")= chr "Packs bimonthly per adult pop"
## .. ..- attr(*, "format.stata")= chr "%10.0g"
## ..$ cig_tax : num [1:50] 1.03 0.3 0.36 0.55 0.44 2 0.17 0.695 0.18 0.91 ...
## .. ..- attr(*, "label")= chr "Cigarette tax per pack"
## .. ..- attr(*, "format.stata")= chr "%10.0g"
## ..$ south : 'labelled' num [1:50] 1 1 1 1 0 0 0 0 1 0 ...
## .. ..- attr(*, "labels")= Named num [1:2] 0 1
## .. .. ..- attr(*, "names")= chr [1:2] "Nonsouth" "South"
## .. ..- attr(*, "label")= chr "Southern state?"
## .. ..- attr(*, "format.stata")= chr "%8.0g"
## ..- attr(*, "terms")=Classes 'terms', 'formula' language cigarettes ~ cig_tax + south
## .. .. ..- attr(*, "variables")= language list(cigarettes, cig_tax, south)
## .. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:3] "cigarettes" "cig_tax" "south"
## .. .. .. .. ..$ : chr [1:2] "cig_tax" "south"
## .. .. ..- attr(*, "term.labels")= chr [1:2] "cig_tax" "south"
## .. .. ..- attr(*, "order")= int [1:2] 1 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(cigarettes, cig_tax, south)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
## .. .. .. ..- attr(*, "names")= chr [1:3] "cigarettes" "cig_tax" "south"
## - attr(*, "class")= chr "lm"
tidy()
## # A tibble: 3 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 7.64 1.07 7.17 0.00000000445
## 2 cig_tax -0.876 0.726 -1.21 0.234
## 3 south 2.73 0.998 2.74 0.00874
## Classes 'tbl_df', 'tbl' and 'data.frame': 3 obs. of 5 variables:
## $ term : chr "(Intercept)" "cig_tax" "south"
## $ estimate : num 7.643 -0.876 2.73
## $ std.error: num 1.065 0.726 0.998
## $ statistic: num 7.17 -1.21 2.74
## $ p.value : num 4.45e-09 2.34e-01 8.74e-03
augment()
## # A tibble: 50 x 10
## cigarettes cig_tax south .fitted .se.fit .resid .hat .sigma .cooksd
## * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10.0 1.03 1 9.47 0.768 0.569 0.0775 2.79 1.29e-3
## 2 19.0 0.3 1 10.1 0.716 8.84 0.0674 2.44 2.65e-1
## 3 8.16 0.36 1 10.1 0.706 -1.90 0.0654 2.77 1.18e-2
## 4 12.4 0.55 1 9.89 0.690 2.53 0.0625 2.76 1.99e-2
## 5 6.26 0.44 0 7.26 0.792 -0.998 0.0824 2.78 4.27e-3
## 6 6.22 2 0 5.89 0.687 0.329 0.0620 2.79 3.35e-4
## 7 9.68 0.17 0 7.49 0.956 2.19 0.120 2.77 3.25e-2
## 8 3.58 0.695 0 7.03 0.653 -3.45 0.0560 2.74 3.28e-2
## 9 9.35 0.18 1 10.2 0.744 -0.866 0.0728 2.79 2.78e-3
## 10 4.92 0.91 0 6.85 0.557 -1.93 0.0407 2.77 7.19e-3
## # ... with 40 more rows, and 1 more variable: .std.resid <dbl>
glance()
## # A tibble: 1 x 11
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## * <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.275 0.244 2.76 8.92 5.19e-4 3 -120. 248. 256.
## # ... with 2 more variables: deviance <dbl>, df.residual <int>