Sheperd (2013): The Gravity Model of International Trade: A User Guide

library(foreign)
library(ggvis)
rm(list=ls())

dat<-read.dta("services_dataset.dta")

dat<-cbind(dat,"ltrade"=log(dat[,"trade"]))
dat<-cbind(dat,"ldist"=log(dat[,"dist"]))
dat<-cbind(dat,"lgdpexp"=log(dat[,"gdp_exp"]))
dat<-cbind(dat,"lgdpimp"=log(dat[,"gdp_imp"]))

dat[dat[,"ltrade"]==-Inf,"ltrade"]<-NA

# Table 1
round(cor(dat[dat[,"sector"]=="SER",c("ltrade","lgdpexp","lgdpimp","ldist")],use="complete.obs"),4)

# Figure 1
dat<-cbind(dat,"lgdpboth"=log(dat[,"lgdpexp"]*dat[,"lgdpimp"]))
dat2<-dat[is.na(dat[,"lgdpboth"])==FALSE & is.na(dat[,"ltrade"])==FALSE & is.na(dat[,"ldist"])==FALSE,]

dat2[dat2[,"sector"]=="SER",] %>% 
  ggvis(~lgdpboth,~ltrade) %>% 
  layer_points() %>% layer_model_predictions(model="lm",stroke:="red")

# Figure 2
dat2[dat2[,"sector"]=="SER",] %>% 
  ggvis(~ldist,~ltrade) %>% 
  layer_points() %>% layer_model_predictions(model="lm",stroke:="red")

# Table 2
lm.t2<-lm(ltrade ~ lgdpexp + lgdpimp + ldist + contig + comlang_off + colony + comcol,
          data=dat,subset=(dat[,"sector"]=="SER"))
summary(lm.t2)

# Table 5
lm.t5<-lm(ltrade ~ etcr_exp + etcr_imp + lgdpexp + lgdpimp + ldist + contig + 
           comlang_off + colony + comcol,
          data=dat,subset=(dat[,"sector"]=="SER"))
summary(lm.t5)


# Table 6
for (i in levels(factor(dat[,"exp"]))){
  dat<-cbind(dat,i=as.numeric(dat[,"exp"]==i))
}
names(dat)[31:248]<-paste("exp",levels(factor(dat[,"exp"])),sep=".")

for (i in levels(factor(dat[,"imp"]))){
  dat<-cbind(dat,i=as.numeric(dat[,"imp"]==i))
}
names(dat)[249:466]<-paste("imp",levels(factor(dat[,"imp"])),sep=".")

t6.y<-as.numeric(dat[dat[,"sector"]=="SER",26])
t6.x<-as.matrix(dat[dat[,"sector"]=="SER",c(27,6,7,9,10,32:248,250:466)])

lm.t6<-lm(t6.y ~ t6.x)
summary(lm.t6)$coef[1:9,]
summary(lm.t6)

# Table 7
t7.y<-as.numeric(dat[dat[,"sector"]=="SER",26])
t7.x<-as.matrix(dat[dat[,"sector"]=="SER",c(27,6,7,9,10,31:466)])

lm.t7<-lm(t7.y ~ -1 + t7.x)
summary(lm.t7)$coef[1:9,]

# Table 8
dat<-cbind(dat,"etcrboth"=dat[,"etcr_exp"]*dat[,"etcr_imp"])
t8.y<-as.numeric(dat[dat[,"sector"]=="SER",26])
t8.x<-as.matrix(dat[dat[,"sector"]=="SER",c(467,27,6,7,9,10,32:248,250:466)])

lm.t8<-lm(t8.y ~ t8.x)
summary(lm.t8)$coef[1:9,]
summary(lm.t8)

# Table 9
t9.y<-as.numeric(dat[dat[,"sector"]=="SER",26])
t9.x<-as.matrix(dat[dat[,"sector"]=="SER",c(27,32:248,250:466)])

lm.t9<-lm(t9.y ~ t9.x)
summary(lm.t9)$coef[1:9,]
summary(lm.t9)

# Table 10
# Calculate means of ldistance for exporters
levelexp<-levels(factor(dat[,"exp"]))  # List of exporters
meansexp<-data.frame() # Create frame for each sector and exporter
for (i in levels(factor(dat[,"sector"]))){
  meansexp<-rbind(meansexp,data.frame("sector"=rep(i,length(levelexp)),
                                "exp"=levelexp))
}
meansexp<-cbind(meansexp,"l.dist"=NA) # Add column for values

for (i in 1:nrow(meansexp)){ # Calculate mean values
  meansexp[i,3]<-mean(dat[dat[,"sector"]==meansexp[i,"sector"] & dat[,"exp"]==meansexp[i,"exp"],"ldist"],na.rm=TRUE)
}

# Calculate means of ldistance for importers
levelimp<-levels(factor(dat[,"imp"]))  # List of exporters
meansimp<-data.frame() # Create frame for each sector and exporter
for (i in levels(factor(dat[,"sector"]))){
  meansimp<-rbind(meansimp,data.frame("sector"=rep(i,length(levelimp)),
                                "imp"=levelimp))
}
meansimp<-cbind(meansimp,"l.dist"=NA) # Add column for values

for (i in 1:nrow(meansimp)){ # Calculate mean values
  meansimp[i,3]<-mean(dat[dat[,"sector"]==meansimp[i,"sector"] & dat[,"imp"]==meansimp[i,"imp"],"ldist"],na.rm=TRUE)
}

temp1<-c()
for (i in 1:nrow(dat)){
  temp1<-append(temp1,meansexp[meansexp[,"sector"]==dat[i,"sector"] & meansexp[,"exp"]==dat[i,"exp"],"l.dist"])
}

temp2<-c()
for (i in 1:nrow(dat)){
  temp2<-append(temp2,meansimp[meansimp[,"sector"]==dat[i,"sector"] & meansimp[,"imp"]==dat[i,"imp"],"l.dist"])
}

temp3<-c()
for(i in 1:nrow(dat)){
  temp3<-append(temp3,sum(dat[dat[,"sector"]==dat[i,"sector"],"ldist"],na.rm=TRUE))
}

dat<-cbind(dat,"ldist.star"=dat[,"ldist"]-temp1-temp2+(1/(218^2))*temp3)

lm.t10<-lm(ltrade ~ ldist.star + lgdpexp + lgdpimp,
           data=dat[dat[,"sector"]=="SER",])
summary(lm.t10)

# Table 11a
library(AER)

dat<-cbind(dat,"llatexp"=log(abs(dat[,"lat_exp"])))
dat<-cbind(dat,"llatimp"=log(abs(dat[,"lat_imp"])))

iv.t11a<-ivreg(etcr_exp ~ lgdpexp + lgdpimp + ldist +
                 contig + comlang_off + colony + llatexp + llatimp,
               data=dat,
               subset=is.na(dat[,"etcr_imp"])==FALSE & is.na(dat[,"ltrade"])==FALSE)
summary(iv.t11a)

# Table 11b
iv.t11b<-ivreg(etcr_imp ~ lgdpexp + lgdpimp + ldist +
                 contig + comlang_off + colony + llatexp + llatimp,
               data=dat,
               subset=is.na(dat[,"etcr_exp"])==FALSE & is.na(dat[,"ltrade"])==FALSE)
summary(iv.t11b)

# Table 11c
iv.t11c<-ivreg(ltrade ~ etcr_exp + etcr_imp  + lgdpexp + lgdpimp + ldist +
                 contig + comlang_off + colony |
                 llatexp + llatimp + lgdpexp + lgdpimp + ldist +
                 contig + comlang_off + colony,
               data=dat)
summary(iv.t11c)

# Table 12
t12.y<-as.numeric(dat[dat[,"sector"]=="SER","trade"])
t12.x<-as.matrix(dat[dat[,"sector"]=="SER",c(27,6,7,9,10,32:248,250:466)])

p.t12<-glm(t12.y ~ t12.x, family="quasipoisson")
summary(p.t12)$coef[1:9,]
summary(p.t12)

# Table 13
library(sampleSelection) #install.packages('sampleSelection')

exporters2<-as.numeric(as.matrix(read.csv("exporters.csv")))

t13.y<-as.numeric(dat[dat[,"sector"]=="SER","ltrade"])
t13.ys<-as.numeric(is.na(as.numeric(dat[dat[,"sector"]=="SER","ltrade"]))==FALSE)

dat3<-cbind(dat,"entcostboth"=dat[,"ent_cost_imp"]*dat[,"ent_cost_exp"])
#t13.xs<-as.matrix(dat3[dat3[,"sector"]=="SER",c(471,27,6,7,9,10,32:248,250:466)])
t13.xs<-as.matrix(dat3[dat3[,"sector"]=="SER",c(27,6,7,9,10,471,(30+exporters2),(248+exporters2))])
t13.x<-as.matrix(dat3[dat3[,"sector"]=="SER",c(27,6,7,9,10,(30+exporters2),(248+exporters2))])

lm.t13o<-lm(t13.y ~ t13.x)
nonsingo<-as.vector(is.na(coef(lm.t13o))==FALSE)[-1]

lm.t13s<-lm(t13.ys ~ t13.xs)  # Trying to omit singularities
nonsings<-as.vector(is.na(coef(lm.t13s))==FALSE)[-1]

summary(probit(t13.ys ~ t13.xs))

# Couldn't fully reproduce the Heckman estimator

3 comments

  1. thanks for your reply and offer. I am somewhat confused, as the data from the paper looks different than what is used in the book (also after running the do files). Do I need to get the other variables from CEPII or or is there a dataset that comes with the book or have I missed something?
    Do you think you could share the data you use with me? In case, my address is name.familyname at gmail com, with no capital letters. Thanks!

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s