Multiple Regression Analysis with Qualitative Information (Chapter 7)

Begin with our standard initial procedure: set your working directory, remove any unnecessary environment, load the necessary libraries and load the files which we are going to need for the estimations into the working directory.

setwd(...)
rm(list=ls())
library('foreign')

download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/wage1.dta','wage1.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/gpa1.dta','gpa1.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/jtrain.dta','jtrain.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/hprice1.dta','hprice1.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/beauty.dta','beauty.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/lawsch85.dta','lawsch85.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/mlb1.dta','mlb1.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/gpa3.dta','gpa3.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/mroz.dta','mroz.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/crime1.dta','crime1.dta',mode='wb')
download.file('http://fmwww.bc.edu/ec-p/data/wooldridge/fertil2.dta','fertil2.dta',mode='wb')

Estimating dummy variables that reflect qualitative information works quite the same way in R as it does with quantitative variables. You just add it as an independent variable into the formula of the model.

# Example 7.1
wage1<-read.dta('wage1.dta')

lm.7.1.1<-lm(wage ~ female + educ + exper + tenure, data=wage1)
summary(lm.7.1.1)

lm.7.1.2<-lm(wage ~ female, data=wage1)
summary(lm.7.1.2)

# Example 7.2
gpa1<-read.dta('gpa1.dta')

lm.7.2<-lm(colGPA ~ PC + hsGPA + ACT, data=gpa1)
summary(lm.7.2)

# Example 7.3
jtrain<-read.dta('jtrain.dta')
  # Since we do not use the whole data, but just observations from 1988
  # we have to manipulate the sampe
jtrain<-jtrain[jtrain$d88==1,]
  # We choose those observation, where in each row of the sampel it holds
  # that jtrain$d88==1.
  # Then we estimate the usual regression.
lm.7.3<-lm(hrsemp ~ grant + lsales + lemploy, data=jtrain)
summary(lm.7.3)

# Example 7.4
hprice1<-read.dta('hprice1.dta')
lm.7.4<-lm(lprice ~ llotsize + lsqrft + bdrms + colonial, data=hprice1)
summary(lm.7.4)

# Example 7.5
lm.7.5<-lm(lwage ~ female + educ + exper + expersq + tenure + tenursq, data=wage1)
summary(lm.7.5)

  # For the more accurate estimate extract the coefficient on female from the
  # summary command, paste it into the exp() command and subract 1.
exp(summary(lm.7.5)$coeff[2,1])-1

  # For the estimate of the percentage by which a man's wage exceed a
  # compareable woman's wage we just put a minus sign in front of the coefficient.
exp(-summary(lm.7.5)$coeff[2,1])-1

The central command that allows you to create dummy variables is <as.numeric()>. It gives a vector which is one if the conditions that are stated in parentheses are true and zero otherwise.

# Example 7.6
  # Generate the subgroup dummies
marrmale<-as.numeric(wage1$female==0 & wage1$married==1)
marrfem<-as.numeric(wage1$female==1 & wage1$married==1)
singfem<-as.numeric(wage1$female==1 & wage1$married==0)

lm.7.6.1<-lm(lwage ~ marrmale + marrfem + singfem + educ + exper + 
             expersq + tenure + tenursq, data=wage1)
summary(lm.7.6.1)

singmale<-as.numeric(wage1$female==0)*as.numeric(wage1$married==0)
lm.7.6.2<-lm(lwage ~ marrmale + singmale + singfem + educ + exper + 
             expersq + tenure + tenursq, data=wage1)
summary(lm.7.6.2)


# Example 7.7
beauty<-read.dta('beauty.dta')
lm.7.7<-lm(lwage ~ belavg + abvavg + educ + exper + expersq + service + married + black, data=beauty)
summary(lm.7.7)
  # Unfortunatly, the results are not the same as in the book, resp. the paper by
  # Hamermesh and Biddle (1994)

In the next example the conditions used in the are a bit more sophisticated, but follow the same logic as above.

# Example 7.8
lawsch85<-read.dta('lawsch85.dta')

  # Generate dummies which is not necessary here, but I
  # wanted to show you how it is done.
  # However, you must create the last (r61.100) since it is not contained
  # in the downloaded data.
top10<-as.numeric(lawsch85$rank<=10)
r11.25<-as.numeric(lawsch85$rank>=11 & lawsch85$rank<=25)
r26.40<-as.numeric(lawsch85$rank>=26 & lawsch85$rank<=40)
r41.60<-as.numeric(lawsch85$rank>=41 & lawsch85$rank<=60)
r61.100<-as.numeric(lawsch85$rank>=61 & lawsch85$rank<=100)

lm.7.8<-lm(lsalary ~ top10 + r11_25 + r26_40 + r41_60 + r61.100 +
             LSAT + GPA + llibvol + lcost, data=lawsch85)
summary(lm.7.8)

# Equation 7.14
  # Generate the interaction term
femmarr<-wage1$female*wage1$married

lm.e7.14<-lm(lwage ~ female + married + femmarr  + educ + exper + 
               expersq + tenure + tenursq, data=wage1)
summary(lm.e7.14)

# Example 7.9
 # No data found

# Example 7.10
femmarr<-wage1$female*wage1$married
lm.7.10<-lm(lwage ~ female + educ + femmarr + exper + 
               expersq + tenure + tenursq, data=wage1)
  # Yields slightly different results
summary(lm.7.10)

  # Test for d0 and d1
lm.7.10.t<-lm(lwage ~ + educ + exper + 
              expersq + tenure + tenursq, data=wage1)
  # F= 36.717. The book's value is a bit lower.
anova(lm.7.10,lm.7.10.t)

# Example 7.11
mlb1<-read.dta('mlb1.dta')
lm.7.11<-lm(lsalary ~ years + gamesyr + bavg + hrunsyr + rbisyr +
              runsyr + fldperc + allstar + black + hispan + 
              blckpb + hispph, data=mlb1)
summary(lm.7.11)

# Equation 7.22
gpa3<-read.dta('gpa3.dta')

  # I just want to save time and space, so I worked with the command.
with(gpa3[gpa3$term==2,],{
     femsat<-female*sat
     femhsperc<-female*hsperc
     femtothrs<-female*tothrs
     lm.e7.22<-lm(cumgpa ~ female + sat + femsat +
                    hsperc + femhsperc +
                    tothrs + femtothrs)
     summary(lm.e7.22)})

# Equation 7.29
mroz<-read.dta('mroz.dta')

lm.e7.29<-lm(inlf ~ nwifeinc + educ + exper + expersq +
               age + kidslt6 + kidsge6, data=mroz)
summary(lm.e7.29)

# Example 7.12
crime1<-read.dta('crime1.dta')
  # Generate arr86
arr86<-as.numeric(crime1$narr86>0)

lm.7.12<-lm(arr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86, data=crime1)
summary(lm.7.12)
  
  # Equation 7.23
lm.e7.23<-lm(arr86 ~ pcnv + avgsen + tottime + ptime86 + qemp86 +
              black + hispan, data=crime1)
summary(lm.e7.23)

# Equation 7.33
lm.e7.33<-lm(lscrap ~ grant + lsales + lemploy, data=jtrain[jtrain$d88==1,])
summary(lm.e7.33)

# Equation 7.35 and 7.37
fertil2<-read.dta('fertil2.dta')
lm.e7.35<-lm(children ~ age + educ, data=fertil2)
summary(lm.e7.35)
lm.e7.37<-lm(children ~ age + educ + electric, data=fertil2)
summary(lm.e7.37)
Advertisements

One comment

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s