forked from idrisr/Regression
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ch4_ex.R
70 lines (61 loc) · 2.95 KB
/
ch4_ex.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# ***************************************************************************
# Chapter 4 Multiple Regression Analysis
# Book: Regression Basics, Leo Kahane, 1st edition
# ***************************************************************************
# 4.1 ***********************************************************************
abor_orig <- read.table('data/abort.txt', header=TRUE, sep='\t')
m <- lm(ABORTION ~ EDUC + RELIGION + PRICE + PICKET + INCOME, data=abor_orig)
m1 <- lm(ABORTION ~ RELIGION + PRICE + PICKET + INCOME, data=abor_orig)
# Model with EDUC slightly better
summary(m)$adj.r.squared > summary(m1)$adj.r.squared
# EDUC not a significant covariate
summary(m)$coefficients['EDUC', 'Pr(>|t|)'] < 0.05
# 4.2 ***********************************************************************
# Is the the coefficient for Growth statistically different from 1.0 at α = .1?
alpha <- 0.1
h <- 1
orig_elec <- read.table('data/Elect.txt', header=TRUE, sep='\t')
m <- lm(Votes ~ Growth + Inflation, data=orig_elec)
ms <- summary(m)
se <- ms$coefficients['Growth', 'Std. Error']
b <- ms$coefficients['Growth', 'Estimate']
df <- dim(orig_elec)[1] - dim(ms$coefficients)[1]
t <- (b-h) / se
# qt assumes one-sided test, so therefore divide alpha by 2.
# to reject H₀: |t| >= t_alpha, ie t must be more extreme than t_alpha
t_alpha <- qt(1-alpha/2, df)
# not statistically different than 1 ∴ fail to reject H₀
print('Reject H₀?')
abs(t) >= t_alpha
# 4.3 ***********************************************************************
# Predict the salary of a MLB player who has 10 years of experience, slugging
# average of 380 and a fielding percentage of 97.5
orig_bb <- read.table('data/bball.txt', header=TRUE, sep='\t')
m <- lm(SALARY ~ YEARS + SLUGGING + FIELDING, data=orig_bb)
ms <- summary(m)
sal_predict <- predict(m, data.frame('YEARS'=10, 'SLUGGING'=380,
'FIELDING'=97.5))
sal_predict2 <- m$coefficients['YEARS']*10 + m$coefficients['SLUGGING']*380 +
m$coefficients['FIELDING']*97.5 + m$coefficients['(Intercept)']
print('Predicted Salary:')
print(sal_predict * 10e6)
# 4.4 ***********************************************************************
# Back to the model about wages from problem 3.1
# WAGE ~ EDUC + EXPERIENCE
orig_wage <- read.table('data/wage.csv', header=TRUE, sep='\t')
m <- lm(WAGE ~ EDUC + EXPER, data=orig_wage)
ms <- summary(m)
# EDUC more important, and stat sig
# EXPER not stat sig
# 4.5 ***********************************************************************
# SUV Price example continuation from problem 2.3
orig_suv <- read.table('data/SUV.txt', header=TRUE, sep='\t')
m <- lm(MSRP ~ HORSE + LITERS, data=orig_suv)
ms <- summary(m)
# horsepower makes a difference with 95% confidence
# engine size does not make a difference with 95% confidence
m1 <- lm(MSRP ~ HORSE, data=orig_suv)
m1s <- summary(m1)
ms$adj.r.squared > m1s$adj.r.squared
ms$adj.r.squared - m1s$adj.r.squared
# adding engine size improves adj R² by .015