# This script runs the analyses underlying Section 3.1 in the paper

# Set working directory
setwd("D:/");

# Loading functions and trade data
source("code/effSpread_functions.R");
load("temp/pseudotrades");

# Required packages
require(data.table);   # The script uses data.table, which is much faster than data.frames. 
require(multiwayvcov); # Used for clustered std errors in probit models
require(lmtest);       # Used for z test in probit models

##################################
# 2. LIQUIDITY DEMAND ELASTICITY #
##################################

# Buyer-initiated trade indicator
tt$buyInit = tt$D;
tt$buyInit[tt$buyInit == -1] = 0;

# Fig 1a: Liquidity demand elasticity across estimators
buyInitProb = function(rows){ # This function returns the fraction of trades that are buyer-initiated and the dollar trading volume for each bin
  return(c(mean(tt$buyInit[rows]), sum(tt$Volume[rows]*tt$Price[rows]) / 1e6))
}
bpsLimit = 2.1; # Sets the x-axis interval
step     = 0.2; # Sets the x-axis bin size

# Fundamental value deviation from the midpoint in relative terms
tt$wmdDiff = 10000 * (log(tt$wmd) - log(tt$mid));
tt$micDiff = 10000 * (log(tt$mic) - log(tt$mid));

# Distribution over difference between midpoint and weighted midpoint
subset = tt$wmdDiff > -bpsLimit & tt$wmdDiff < bpsLimit;
mean(subset); # Fraction of observations spanned by the x-axis interval, reported in footnote 7
wmdBuckets = aggregate((1:nrow(tt))[subset], by = list(c(-100, c(seq(-bpsLimit, bpsLimit, step) + step / 2), 100)[findInterval(tt$wmdDiff[subset],c(-100, seq(-bpsLimit, bpsLimit, step), 100))]),buyInitProb);
dimnames(wmdBuckets[,2]) = list(round(wmdBuckets[,1], 1),c("%buyInit", "volume"));

# Distribution over difference between midpoint and micro-price
subset = tt$micDiff > -bpsLimit & tt$micDiff < bpsLimit;
mean(subset);
micBuckets = aggregate((1:nrow(tt))[subset], by = list(c(-100, c(seq(-bpsLimit, bpsLimit, step) + step/2), 100)[findInterval(tt$micDiff[subset],c(-100, seq(-bpsLimit, bpsLimit, step), 100))]),buyInitProb);
dimnames(micBuckets[,2]) = list(round(micBuckets[,1], 1),c("%buyInit", "volume"));

# Plot Figure 2
pdf('output/Fig1a.pdf')
plot(  x = wmdBuckets[,1], y = wmdBuckets[,2][,1], type = "l", ylim = c(0,1), 
       xlab = "Fundamental value deviation from the midpoint (bps)", ylab = "Fraction buyer-initated trades");
points(x = micBuckets[,1], y = micBuckets[,2][,1], type = "l", col = "red");
legend("topleft", c("Weighted midpoint","Micro-price"), fill = c("black","red"));
dev.off()

# Fig 1b: Liquidity demand elasticity across spread levels
subset = tt$wmdDiff > -bpsLimit & tt$wmdDiff < bpsLimit;
mean(subset);
micBuckets_sprLevels = aggregate((1:nrow(tt))[subset], by = list(round(c(-100, c(seq(-bpsLimit, bpsLimit, step) + step / 2), 100),4)[findInterval(tt$wmdDiff[subset], c(-100, seq(-bpsLimit, bpsLimit, step),100))],
                                                              paste(c(1, 2, ">2"), "cents")[findInterval(round(tt$spr[subset], 2),c(0, .011, .021, 100))]), buyInitProb);

pdf('output/Fig1b.pdf')
plot(  x = micBuckets_sprLevels[micBuckets_sprLevels[,2] == "1 cents", 1], y = micBuckets_sprLevels[micBuckets_sprLevels[,2] == "1 cents", 3][,1],type="l",col="black", ylim = c(0,1),
       xlab = "Fundamental value deviation from the midpoint (bps)", ylab = "Fraction buyer-initated trades");
points(x = micBuckets_sprLevels[micBuckets_sprLevels[,2] == "2 cents", 1], y = micBuckets_sprLevels[micBuckets_sprLevels[,2] == "2 cents", 3][,1],type="l",col="blue",  ylim = c(0,1));
points(x = micBuckets_sprLevels[micBuckets_sprLevels[,2] == ">2 cents",1], y = micBuckets_sprLevels[micBuckets_sprLevels[,2] == ">2 cents",3][,1],type="l",col="red",   ylim = c(0,1));
legend("topleft", c("1 cent","2 cents",">2 cents"), fill = c("black","blue","red"));
dev.off()

# Fig 1c: Liquidity demand elasticity across spread levels (fundamental value deviation measured in USD cents)
tt$wmdDiffUSD = 100 * (tt$wmd - tt$mid);
subset = tt$wmdDiff > -bpsLimit & tt$wmdDiff < bpsLimit;
mean(subset);
micBuckets_sprLevels = aggregate((1:nrow(tt))[subset], by = list(round(c(-100, c(seq(-bpsLimit, bpsLimit, step) + step / 2),100),4)[findInterval(tt$wmdDiffUSD[subset], c(-100, seq(-bpsLimit, bpsLimit, step), 100))],
                                                              paste(c(1, 2, ">2"),"cents")[findInterval(round(tt$spr[subset], 2),c(0, .011, .021, 100))]), buyInitProb);

pdf('output/Fig1c.pdf')
plot(  x = micBuckets_sprLevels[micBuckets_sprLevels[,2] == "1 cents", 1], y = micBuckets_sprLevels[micBuckets_sprLevels[,2] == "1 cents", 3][,1], type= "l", col = "black", ylim = c(0,1), xlim = c(-bpsLimit, bpsLimit),
       xlab = "Fundamental value deviation from the midpoint (USD cents)", ylab = "Fraction buyer-initated trades");
points(x = micBuckets_sprLevels[micBuckets_sprLevels[,2] == "2 cents", 1], y = micBuckets_sprLevels[micBuckets_sprLevels[,2] == "2 cents", 3][,1], type= "l", col = "blue",  ylim = c(0,1));
points(x = micBuckets_sprLevels[micBuckets_sprLevels[,2] == ">2 cents",1], y = micBuckets_sprLevels[micBuckets_sprLevels[,2] == ">2 cents",3][,1], type= "l", col = "red",   ylim = c(0,1));
legend("topleft", c("Weighted midpoint","Micro-price"), fill = c("black","red"));
dev.off()

# Probit models (Eq.7)
# Weighted midpoint
regr  = glm(buyInit ~ wmdDiff, family = binomial(link = "probit"), data = tt, maxit = 100)
vcov_year_formula <- cluster.vcov(regr, ~ Stock + Date + Venue); # Standard errors clustered by stock, date, and trading venue
coeftest(regr, vcov_year_formula);

# Micro-price
regr  = glm(buyInit ~ micDiff, family = binomial(link = "probit"), data = tt, maxit = 100)
vcov_year_formula <- cluster.vcov(regr, ~ Stock + Date + Venue); # Standard errors clustered by stock, date, and trading venue
coeftest(regr, vcov_year_formula);

# Clean-up
tt[,c("buyInit","wmdDiff","micDiff","wmdDiffUSD") := NULL];
rm(bpsLimit,step,regr,wmdBuckets,micBuckets,micBuckets_sprLevels,vcov_year_formula, subset);
