Sheperd (2013): The Gravity Model of International Trade: A User Guide

A more recent version of the user guide – Sheperd (2016) – and the accompanying dataset can be downloaded from UNESCAP. I have reproduced a great part of it here.

library(foreign)
library(ggvis)
rm(list=ls())

dat<-read.dta("services_dataset.dta")

dat<-cbind(dat,"ltrade"=log(dat[,"trade"]))
dat<-cbind(dat,"ldist"=log(dat[,"dist"]))
dat<-cbind(dat,"lgdpexp"=log(dat[,"gdp_exp"]))
dat<-cbind(dat,"lgdpimp"=log(dat[,"gdp_imp"]))

dat[dat[,"ltrade"]==-Inf,"ltrade"]<-NA

# Table 1
round(cor(dat[dat[,"sector"]=="SER",c("ltrade","lgdpexp","lgdpimp","ldist")],use="complete.obs"),4)

# Figure 1
dat<-cbind(dat,"lgdpboth"=log(dat[,"lgdpexp"]*dat[,"lgdpimp"]))
dat2<-dat[is.na(dat[,"lgdpboth"])==FALSE & is.na(dat[,"ltrade"])==FALSE & is.na(dat[,"ldist"])==FALSE,]

dat2[dat2[,"sector"]=="SER",] %>% 
  ggvis(~lgdpboth,~ltrade) %>% 
  layer_points() %>% layer_model_predictions(model="lm",stroke:="red")

# Figure 2
dat2[dat2[,"sector"]=="SER",] %>% 
  ggvis(~ldist,~ltrade) %>% 
  layer_points() %>% layer_model_predictions(model="lm",stroke:="red")

# Table 2
lm.t2<-lm(ltrade ~ lgdpexp + lgdpimp + ldist + contig + comlang_off + colony + comcol,
          data=dat,subset=(dat[,"sector"]=="SER"))
summary(lm.t2)

# Table 5
lm.t5<-lm(ltrade ~ etcr_exp + etcr_imp + lgdpexp + lgdpimp + ldist + contig + 
           comlang_off + colony + comcol,
          data=dat,subset=(dat[,"sector"]=="SER"))
summary(lm.t5)


# Table 6
for (i in levels(factor(dat[,"exp"]))){
  dat<-cbind(dat,i=as.numeric(dat[,"exp"]==i))
}
names(dat)[31:248]<-paste("exp",levels(factor(dat[,"exp"])),sep=".")

for (i in levels(factor(dat[,"imp"]))){
  dat<-cbind(dat,i=as.numeric(dat[,"imp"]==i))
}
names(dat)[249:466]<-paste("imp",levels(factor(dat[,"imp"])),sep=".")

t6.y<-as.numeric(dat[dat[,"sector"]=="SER",26])
t6.x<-as.matrix(dat[dat[,"sector"]=="SER",c(27,6,7,9,10,32:248,250:466)])

lm.t6<-lm(t6.y ~ t6.x)
summary(lm.t6)$coef[1:9,]
summary(lm.t6)

# Table 7
t7.y<-as.numeric(dat[dat[,"sector"]=="SER",26])
t7.x<-as.matrix(dat[dat[,"sector"]=="SER",c(27,6,7,9,10,31:466)])

lm.t7<-lm(t7.y ~ -1 + t7.x)
summary(lm.t7)$coef[1:9,]

# Table 8
dat<-cbind(dat,"etcrboth"=dat[,"etcr_exp"]*dat[,"etcr_imp"])
t8.y<-as.numeric(dat[dat[,"sector"]=="SER",26])
t8.x<-as.matrix(dat[dat[,"sector"]=="SER",c(467,27,6,7,9,10,32:248,250:466)])

lm.t8<-lm(t8.y ~ t8.x)
summary(lm.t8)$coef[1:9,]
summary(lm.t8)

# Table 9
t9.y<-as.numeric(dat[dat[,"sector"]=="SER",26])
t9.x<-as.matrix(dat[dat[,"sector"]=="SER",c(27,32:248,250:466)])

lm.t9<-lm(t9.y ~ t9.x)
summary(lm.t9)$coef[1:9,]
summary(lm.t9)

# Table 10
# Calculate means of ldistance for exporters
levelexp<-levels(factor(dat[,"exp"]))  # List of exporters
meansexp<-data.frame() # Create frame for each sector and exporter
for (i in levels(factor(dat[,"sector"]))){
  meansexp<-rbind(meansexp,data.frame("sector"=rep(i,length(levelexp)),
                                "exp"=levelexp))
}
meansexp<-cbind(meansexp,"l.dist"=NA) # Add column for values

for (i in 1:nrow(meansexp)){ # Calculate mean values
  meansexp[i,3]<-mean(dat[dat[,"sector"]==meansexp[i,"sector"] & dat[,"exp"]==meansexp[i,"exp"],"ldist"],na.rm=TRUE)
}

# Calculate means of ldistance for importers
levelimp<-levels(factor(dat[,"imp"]))  # List of exporters
meansimp<-data.frame() # Create frame for each sector and exporter
for (i in levels(factor(dat[,"sector"]))){
  meansimp<-rbind(meansimp,data.frame("sector"=rep(i,length(levelimp)),
                                "imp"=levelimp))
}
meansimp<-cbind(meansimp,"l.dist"=NA) # Add column for values

for (i in 1:nrow(meansimp)){ # Calculate mean values
  meansimp[i,3]<-mean(dat[dat[,"sector"]==meansimp[i,"sector"] & dat[,"imp"]==meansimp[i,"imp"],"ldist"],na.rm=TRUE)
}

temp1<-c()
for (i in 1:nrow(dat)){
  temp1<-append(temp1,meansexp[meansexp[,"sector"]==dat[i,"sector"] & meansexp[,"exp"]==dat[i,"exp"],"l.dist"])
}

temp2<-c()
for (i in 1:nrow(dat)){
  temp2<-append(temp2,meansimp[meansimp[,"sector"]==dat[i,"sector"] & meansimp[,"imp"]==dat[i,"imp"],"l.dist"])
}

temp3<-c()
for(i in 1:nrow(dat)){
  temp3<-append(temp3,sum(dat[dat[,"sector"]==dat[i,"sector"],"ldist"],na.rm=TRUE))
}

dat<-cbind(dat,"ldist.star"=dat[,"ldist"]-temp1-temp2+(1/(218^2))*temp3)

lm.t10<-lm(ltrade ~ ldist.star + lgdpexp + lgdpimp,
           data=dat[dat[,"sector"]=="SER",])
summary(lm.t10)

# Table 11a
library(AER)

dat<-cbind(dat,"llatexp"=log(abs(dat[,"lat_exp"])))
dat<-cbind(dat,"llatimp"=log(abs(dat[,"lat_imp"])))

iv.t11a<-ivreg(etcr_exp ~ lgdpexp + lgdpimp + ldist +
                 contig + comlang_off + colony + llatexp + llatimp,
               data=dat,
               subset=is.na(dat[,"etcr_imp"])==FALSE & is.na(dat[,"ltrade"])==FALSE)
summary(iv.t11a)

# Table 11b
iv.t11b<-ivreg(etcr_imp ~ lgdpexp + lgdpimp + ldist +
                 contig + comlang_off + colony + llatexp + llatimp,
               data=dat,
               subset=is.na(dat[,"etcr_exp"])==FALSE & is.na(dat[,"ltrade"])==FALSE)
summary(iv.t11b)

# Table 11c
iv.t11c<-ivreg(ltrade ~ etcr_exp + etcr_imp  + lgdpexp + lgdpimp + ldist +
                 contig + comlang_off + colony |
                 llatexp + llatimp + lgdpexp + lgdpimp + ldist +
                 contig + comlang_off + colony,
               data=dat)
summary(iv.t11c)

# Table 12
t12.y<-as.numeric(dat[dat[,"sector"]=="SER","trade"])
t12.x<-as.matrix(dat[dat[,"sector"]=="SER",c(27,6,7,9,10,32:248,250:466)])

p.t12<-glm(t12.y ~ t12.x, family="quasipoisson")
summary(p.t12)$coef[1:9,]
summary(p.t12)

# Table 13
library(sampleSelection) #install.packages('sampleSelection')

exporters2<-as.numeric(as.matrix(read.csv("exporters.csv")))

t13.y<-as.numeric(dat[dat[,"sector"]=="SER","ltrade"])
t13.ys<-as.numeric(is.na(as.numeric(dat[dat[,"sector"]=="SER","ltrade"]))==FALSE)

dat3<-cbind(dat,"entcostboth"=dat[,"ent_cost_imp"]*dat[,"ent_cost_exp"])
#t13.xs<-as.matrix(dat3[dat3[,"sector"]=="SER",c(471,27,6,7,9,10,32:248,250:466)])
t13.xs<-as.matrix(dat3[dat3[,"sector"]=="SER",c(27,6,7,9,10,471,(30+exporters2),(248+exporters2))])
t13.x<-as.matrix(dat3[dat3[,"sector"]=="SER",c(27,6,7,9,10,(30+exporters2),(248+exporters2))])

lm.t13o<-lm(t13.y ~ t13.x)
nonsingo<-as.vector(is.na(coef(lm.t13o))==FALSE)[-1]

lm.t13s<-lm(t13.ys ~ t13.xs)  # Trying to omit singularities
nonsings<-as.vector(is.na(coef(lm.t13s))==FALSE)[-1]

summary(probit(t13.ys ~ t13.xs))

# Couldn't fully reproduce the Heckman estimator
Advertisements

5 comments

  1. thanks for your reply and offer. I am somewhat confused, as the data from the paper looks different than what is used in the book (also after running the do files). Do I need to get the other variables from CEPII or or is there a dataset that comes with the book or have I missed something?
    Do you think you could share the data you use with me? In case, my address is name.familyname at gmail com, with no capital letters. Thanks!

  2. Hey there, thanks for the great website! I would like to to the coding in R, yet I am not quite sure though how to reach the correct dataset. Could you provide some guidance?

    Thanks a lot!
    Nicolas

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s