OLS: Interpretation and hypothesis testing

MACS 33001 University of Chicago

Cigarette consumption

Cigarette tax

Linear regression of cigarette consumption

## 
## Call:
## lm(formula = cigarettes ~ cig_tax, data = states)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.739 -2.095 -0.211  0.986  9.857 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    9.681      0.812   11.93  5.8e-16 ***
## cig_tax       -1.960      0.649   -3.02    0.004 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.94 on 48 degrees of freedom
## Multiple R-squared:  0.16,   Adjusted R-squared:  0.142 
## F-statistic: 9.13 on 1 and 48 DF,  p-value: 0.00403

Southern states

Linear regression of cigarette consumption

## 
## Call:
## lm(formula = cigarettes ~ cig_tax + south, data = states)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.407 -1.689 -0.389  0.912  8.839 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    7.643      1.065    7.17  4.4e-09 ***
## cig_tax       -0.876      0.726   -1.21   0.2338    
## south          2.730      0.998    2.74   0.0087 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.76 on 47 degrees of freedom
## Multiple R-squared:  0.275,  Adjusted R-squared:  0.244 
## F-statistic: 8.92 on 2 and 47 DF,  p-value: 0.000519

Estimating linear regression models in R

south <- lm(cigarettes ~ cig_tax + south, data = states)
summary(south)
## 
## Call:
## lm(formula = cigarettes ~ cig_tax + south, data = states)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.407 -1.689 -0.389  0.912  8.839 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    7.643      1.065    7.17  4.4e-09 ***
## cig_tax       -0.876      0.726   -1.21   0.2338    
## south          2.730      0.998    2.74   0.0087 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.76 on 47 degrees of freedom
## Multiple R-squared:  0.275,  Adjusted R-squared:  0.244 
## F-statistic: 8.92 on 2 and 47 DF,  p-value: 0.000519

lm() objects

## List of 12
##  $ coefficients : Named num [1:3] 7.643 -0.876 2.73
##   ..- attr(*, "names")= chr [1:3] "(Intercept)" "cig_tax" "south"
##  $ residuals    : Named num [1:50] 0.569 8.839 -1.898 2.529 -0.998 ...
##   ..- attr(*, "label")= chr "Packs bimonthly per adult pop"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##   ..- attr(*, "names")= chr [1:50] "1" "2" "3" "4" ...
##  $ effects      : Named num [1:50] -53.57 -8.88 -7.55 1.97 -2.82 ...
##   ..- attr(*, "label")= chr "Packs bimonthly per adult pop"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##   ..- attr(*, "names")= chr [1:50] "(Intercept)" "cig_tax" "south" "" ...
##  $ rank         : int 3
##  $ fitted.values: Named num [1:50] 9.47 10.11 10.06 9.89 7.26 ...
##   ..- attr(*, "label")= chr "Packs bimonthly per adult pop"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##   ..- attr(*, "names")= chr [1:50] "1" "2" "3" "4" ...
##  $ assign       : int [1:3] 0 1 2
##  $ qr           :List of 5
##   ..$ qr   : num [1:50, 1:3] -7.071 0.141 0.141 0.141 0.141 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:50] "1" "2" "3" "4" ...
##   .. .. ..$ : chr [1:3] "(Intercept)" "cig_tax" "south"
##   .. ..- attr(*, "assign")= int [1:3] 0 1 2
##   ..$ qraux: num [1:3] 1.14 1.17 1.1
##   ..$ pivot: int [1:3] 1 2 3
##   ..$ tol  : num 1e-07
##   ..$ rank : int 3
##   ..- attr(*, "class")= chr "qr"
##  $ df.residual  : int 47
##  $ xlevels      : Named list()
##  $ call         : language lm(formula = cigarettes ~ cig_tax + south, data = states)
##  $ terms        :Classes 'terms', 'formula'  language cigarettes ~ cig_tax + south
##   .. ..- attr(*, "variables")= language list(cigarettes, cig_tax, south)
##   .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
##   .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. ..$ : chr [1:3] "cigarettes" "cig_tax" "south"
##   .. .. .. ..$ : chr [1:2] "cig_tax" "south"
##   .. ..- attr(*, "term.labels")= chr [1:2] "cig_tax" "south"
##   .. ..- attr(*, "order")= int [1:2] 1 1
##   .. ..- attr(*, "intercept")= int 1
##   .. ..- attr(*, "response")= int 1
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##   .. ..- attr(*, "predvars")= language list(cigarettes, cig_tax, south)
##   .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
##   .. .. ..- attr(*, "names")= chr [1:3] "cigarettes" "cig_tax" "south"
##  $ model        :'data.frame':   50 obs. of  3 variables:
##   ..$ cigarettes: num [1:50] 10.04 18.95 8.16 12.42 6.26 ...
##   .. ..- attr(*, "label")= chr "Packs bimonthly per adult pop"
##   .. ..- attr(*, "format.stata")= chr "%10.0g"
##   ..$ cig_tax   : num [1:50] 1.03 0.3 0.36 0.55 0.44 2 0.17 0.695 0.18 0.91 ...
##   .. ..- attr(*, "label")= chr "Cigarette tax per pack"
##   .. ..- attr(*, "format.stata")= chr "%10.0g"
##   ..$ south     : 'labelled' num [1:50] 1 1 1 1 0 0 0 0 1 0 ...
##   .. ..- attr(*, "labels")= Named num [1:2] 0 1
##   .. .. ..- attr(*, "names")= chr [1:2] "Nonsouth" "South"
##   .. ..- attr(*, "label")= chr "Southern state?"
##   .. ..- attr(*, "format.stata")= chr "%8.0g"
##   ..- attr(*, "terms")=Classes 'terms', 'formula'  language cigarettes ~ cig_tax + south
##   .. .. ..- attr(*, "variables")= language list(cigarettes, cig_tax, south)
##   .. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
##   .. .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. .. ..$ : chr [1:3] "cigarettes" "cig_tax" "south"
##   .. .. .. .. ..$ : chr [1:2] "cig_tax" "south"
##   .. .. ..- attr(*, "term.labels")= chr [1:2] "cig_tax" "south"
##   .. .. ..- attr(*, "order")= int [1:2] 1 1
##   .. .. ..- attr(*, "intercept")= int 1
##   .. .. ..- attr(*, "response")= int 1
##   .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##   .. .. ..- attr(*, "predvars")= language list(cigarettes, cig_tax, south)
##   .. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
##   .. .. .. ..- attr(*, "names")= chr [1:3] "cigarettes" "cig_tax" "south"
##  - attr(*, "class")= chr "lm"

tidy()

## # A tibble: 3 x 5
##   term        estimate std.error statistic       p.value
##   <chr>          <dbl>     <dbl>     <dbl>         <dbl>
## 1 (Intercept)    7.64      1.07       7.17 0.00000000445
## 2 cig_tax       -0.876     0.726     -1.21 0.234        
## 3 south          2.73      0.998      2.74 0.00874
## Classes 'tbl_df', 'tbl' and 'data.frame':    3 obs. of  5 variables:
##  $ term     : chr  "(Intercept)" "cig_tax" "south"
##  $ estimate : num  7.643 -0.876 2.73
##  $ std.error: num  1.065 0.726 0.998
##  $ statistic: num  7.17 -1.21 2.74
##  $ p.value  : num  4.45e-09 2.34e-01 8.74e-03

augment()

## # A tibble: 50 x 10
##    cigarettes cig_tax south .fitted .se.fit .resid   .hat .sigma .cooksd
##  *      <dbl>   <dbl> <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
##  1      10.0    1.03  1        9.47   0.768  0.569 0.0775   2.79 1.29e-3
##  2      19.0    0.3   1       10.1    0.716  8.84  0.0674   2.44 2.65e-1
##  3       8.16   0.36  1       10.1    0.706 -1.90  0.0654   2.77 1.18e-2
##  4      12.4    0.55  1        9.89   0.690  2.53  0.0625   2.76 1.99e-2
##  5       6.26   0.44  0        7.26   0.792 -0.998 0.0824   2.78 4.27e-3
##  6       6.22   2     0        5.89   0.687  0.329 0.0620   2.79 3.35e-4
##  7       9.68   0.17  0        7.49   0.956  2.19  0.120    2.77 3.25e-2
##  8       3.58   0.695 0        7.03   0.653 -3.45  0.0560   2.74 3.28e-2
##  9       9.35   0.18  1       10.2    0.744 -0.866 0.0728   2.79 2.78e-3
## 10       4.92   0.91  0        6.85   0.557 -1.93  0.0407   2.77 7.19e-3
## # ... with 40 more rows, and 1 more variable: .std.resid <dbl>

glance()

## # A tibble: 1 x 11
##   r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
## *     <dbl>         <dbl> <dbl>     <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1     0.275         0.244  2.76      8.92 5.19e-4     3  -120.  248.  256.
## # ... with 2 more variables: deviance <dbl>, df.residual <int>