# This script runs the analyses underlying Section 5 in the paper

# Set working directory
setwd("D:/");

# Loading functions and trade and quote data
source("code/effSpread_functions.R");
load("temp/pseudotrades");
load("temp/pseudoquotes");

# Required packages
require(data.table);   # The script uses data.table, which is much faster than data.frames. 
require(reldist);      # Used for the weighted quantile function, wtd.quantile

#################################################
# SECTION 5.2: LIQUIDITY VARIANCE DECOMPOSITION #
#################################################

# Oneway quoted spreads (OQS)
qq[, oqs_mid := 20000 * (Ask.Price - mid) / mid, by = Stock];
qq[, oqs_wmd := 20000 * (Ask.Price - wmd) / mid, by = Stock];
qq[, oqs_mic := 20000 * (Ask.Price - mic) / mid, by = Stock];

# Variances and covariances
oqsVariances = qq[,list(oqsMidVar     = var(oqs_mid),
                        oqsWmdVar     = var(oqs_wmd), 
                        oqsMicVar     = var(oqs_mic), 
                        oqsWmdBiasVar = var(oqs_wmd - oqs_mid),
                        oqsMicBiasVar = var(oqs_mic - oqs_mid),
                        oqsWmdBiasCov = 2*cov(oqs_mid, oqs_wmd - oqs_mid),
                        oqsMicBiasCov = 2*cov(oqs_mid, oqs_mic - oqs_mid),
                        price  = mean(mid)), by = Stock];

# Adding trading volume
load(file = "temp/liqStock");
oqsVariances = merge(as.data.frame(oqsVariances), liqStock[,c("Stock", "volDollar")], by = 1);

# Variance decomposition
# Liquidity variance decomposition (reported in text)
oqsVarDecomp = rbind(c(weighted.mean(oqsVariances$oqsWmdVar,     w = oqsVariances$volDollar),
                       weighted.mean(oqsVariances$oqsMidVar,     w = oqsVariances$volDollar),
                       weighted.mean(oqsVariances$oqsWmdBiasVar, w = oqsVariances$volDollar),
                       weighted.mean(oqsVariances$oqsWmdBiasCov, w = oqsVariances$volDollar)),
                     c(weighted.mean(oqsVariances$oqsMicVar,     w = oqsVariances$volDollar),
                       weighted.mean(oqsVariances$oqsMidVar,     w = oqsVariances$volDollar),
                       weighted.mean(oqsVariances$oqsMicBiasVar, w = oqsVariances$volDollar),
                       weighted.mean(oqsVariances$oqsMicBiasCov, w = oqsVariances$volDollar)));

oqsVarDecomp = t(cbind(oqsVarDecomp,1-oqsVarDecomp[,2]/oqsVarDecomp[,1]));
dimnames(oqsVarDecomp) = list(c("Estimator OQS variance","Midpoint OQS variance","Nominal bias variance","Nominal bias covariance","Overlooked variance"),
                              c("Weighted midpoint", "Micro-price"));
round(oqsVarDecomp,4);

###########################################
# TABLE 4: SIMULATION OF LIQUIDITY TIMING #
###########################################

# Obtaining the threshold for submitting market orders
# Time-weighted spreads (ask-side)
qq[, dur := c(diff(Time), (16*3600 - 5 * 60) - tail(Time,1)), by = list(Stock, Date)];
threshold = 0.25; # quantile used for triggering a market order
twqs = qq[, .("qs_mid_mean" = weighted.mean(Ask.Price - mid, w = dur),
              "limMid" = round(wtd.quantile(Ask.Price - mid, q = threshold, na.rm = T, weight = dur),5),
              "limWmd" = round(wtd.quantile(Ask.Price - wmd, q = threshold, na.rm = T, weight = dur),5),
              "limMic" = round(wtd.quantile(Ask.Price - mic, q = threshold, na.rm = T, weight = dur),5)), by = list(Stock, Date)];

vwap = qq[, .("AP" = mean(Ask.Price, na.rm=TRUE), "MP" = mean(mid, na.rm=TRUE)), by = list(Stock)];

# Setting the thresholds for the next-day trading strategies
# The first date is excluded from the trading strategy
thresholds = twqs[Date != max(twqs$Date)]; 
thresholds$tradeDate = thresholds$Date + 1;

# Assigning quotes and trades to 10-minute periods
TenMinIntervals = seq(9.5 * 3600 + 5*60, 16 * 3600 - 5 * 60, 10 * 60);
qq$int10min = findInterval(qq$Time, TenMinIntervals);
tt$int10min = findInterval(tt$Time, TenMinIntervals);

# Passive trading strategy
# Determining the execution time for the passive order strategy 
# Obtain best bid price and volume from the quote data and merge into the trade file
BestBid = qq[,.(BP = first(Bid.Price), BV=first(Bid.Size)), by = list(Stock, Date, int10min)];
tt = merge(tt, BestBid, by=c("Stock", "Date", "int10min"));

# Calculate cumulative trade volumes at the best bid price or lower
tt[Price <= BP, cumVol :=  cumsum(Volume / 100), by = list(Stock, Date, int10min)];

# Identify the trade that would lead to execution of the hypothetical bid order
# "bid" is henceforth the prefix used for variables related to the passive strategy
bidTrades = tt[!is.na(cumVol) & cumVol >= (BV+1), .(bidT = first(Time) - TenMinIntervals[first(int10min)], bidV = first(BV),
                                                    bidP = first(BP), bidMid = first(mid), bidWmd = first(wmd), bidMic = first(mic)), by = list(Stock,Date,int10min)]

# Aggressive trading strategies
# The following prefixes are henceforth used for the four strategies involving market orders
# "mid" submits a market order when the midpoint EffSpr is below its threshold
# "wmd" submits a market order when the weighted midpoint EffSpr is below its threshold
# "mic" submits a market order when the micro-price EffSpr is below its threshold
# "rdm" submits a market order in the beginning of each period
qq = merge(qq,thresholds[,c("Stock","tradeDate","limMid","limWmd","limMic")], by.x = c("Stock","Date"), by.y = c("Stock","tradeDate"));

midTrades = qq[(Ask.Price - mid) <= limMid, .(midT = first(Time) - TenMinIntervals[first(int10min)], 
                                              midP = first(Ask.Price), midMid = first(mid), midWmd = first(wmd), midMic = first(mic)), by = list(Stock,Date,int10min)]

wmdTrades = qq[(Ask.Price - wmd) <= limWmd, .(wmdT = first(Time) - TenMinIntervals[first(int10min)], 
                                              wmdP = first(Ask.Price), wmdMid = first(mid), wmdWmd = first(wmd), wmdMic = first(mic)), by = list(Stock,Date,int10min)]

micTrades = qq[(Ask.Price - mic) <= limMic, .(micT = first(Time) - TenMinIntervals[first(int10min)], 
                                              micP = first(Ask.Price), micMid = first(mid), micWmd = first(wmd), micMic = first(mic)), by = list(Stock,Date,int10min)]

rdmTrades = qq[                           , .(rdmT = first(Time) - TenMinIntervals[first(int10min)], 
                                              rdmP = first(Ask.Price), rdmMid = first(mid), rdmWmd = first(wmd), rdmMic = first(mic)), by = list(Stock,Date,int10min)]

# Merging trade data across strategies
allTrades = merge(bidTrades[Date != 1],midTrades, all = T, by = c("Stock", "Date", "int10min"));
allTrades = merge(allTrades, wmdTrades,           all = T, by = c("Stock", "Date", "int10min"));
allTrades = merge(allTrades, micTrades,           all = T, by = c("Stock", "Date", "int10min"));
allTrades = merge(allTrades, rdmTrades,           all = T, by = c("Stock", "Date", "int10min"));

# Setting execution delay for missed trades to 600 seconds
allTrades$bidT[is.na(allTrades$bidT)] = 60 * 10;
allTrades$midT[is.na(allTrades$midT)] = 60 * 10;
allTrades$wmdT[is.na(allTrades$wmdT)] = 60 * 10;
allTrades$micT[is.na(allTrades$micT)] = 60 * 10;

# COMBINED STRATEGIES TRADE PROPERTIES
# The following combines the results of the passive and aggressive traidng strategies 
# (i.e., considering the choice between limit and market orders).
# For each aggressive strategy, set trade price, midpoint, and micro-price to that 
# of the passive strategy for the periods where no market orders were triggered.
# Then, for each period where a market order is triggered before the limit order is executed, 
# change the trade price, midpoint, and micro-price conditions holding at the time of the market order.

# Midpoint strategy
allTrades$midPP = allTrades$bidP;   # Passive trade price 
allTrades$midVV = allTrades$bidWmd; # Micro-price at time of passive trade

allTrades$midPP[allTrades$midT < allTrades$bidT] = allTrades$midP[allTrades$midT   < allTrades$bidT]; # Aggressive trade price
allTrades$midVV[allTrades$midT < allTrades$bidT] = allTrades$midWmd[allTrades$midT < allTrades$bidT]; # Micro-price at time of aggressive trade

# Weighted midpoint strategy
allTrades$wmdPP = allTrades$bidP;   # Passive trade price 
allTrades$wmdVV = allTrades$bidWmd; # Micro-price at time of passive trade

allTrades$wmdPP[allTrades$wmdT < allTrades$bidT] = allTrades$wmdP[allTrades$wmdT   < allTrades$bidT]; # Aggressive trade price
allTrades$wmdVV[allTrades$wmdT < allTrades$bidT] = allTrades$wmdWmd[allTrades$wmdT < allTrades$bidT]; # Micro-price at time of aggressive trade

# Micro-price strategy
allTrades$micPP = allTrades$bidP;   # Passive trade price 
allTrades$micVV = allTrades$bidWmd; # Micro-price at time of passive trade

allTrades$micPP[allTrades$micT < allTrades$bidT] = allTrades$micP[allTrades$micT   < allTrades$bidT]; # Aggressive trade price
allTrades$micVV[allTrades$micT < allTrades$bidT] = allTrades$micWmd[allTrades$micT < allTrades$bidT]; # Micro-price at time of aggressive trade

# TRADING PERFORMANCE
# Micro-price effective spread
USDvolWeights = tt[, .(USDvolSum = sum(Price * Volume, na.rm = TRUE), 
                       AveragePrice = mean(Price, na.rm = TRUE)), by=c("Stock", "Date")];

EffSpr_perf = allTrades[, .(bidES = mean(20000*(bidP  - bidMic) / bidMid, na.rm = TRUE),
                            rdmES = mean(20000*(rdmP  - rdmMic) / rdmMid, na.rm = TRUE),
                            midES = mean(20000*(midPP - midMic) / midMid, na.rm = TRUE),
                            wmdES = mean(20000*(wmdPP - wmdMic) / wmdMid, na.rm = TRUE),
                            micES = mean(20000*(micPP - micMic) / micMid, na.rm = TRUE)), by=c("Stock", "Date")]
EffSpr_perf = merge(EffSpr_perf, USDvolWeights, by = c("Stock", "Date"), all.x = TRUE, all.y = FALSE);

# Execution shortfall
ExShf_perf = allTrades[,.(bidExShf = 10000 * (log(mean(bidP,  na.rm = TRUE)) - log(mean(rdmMid[!is.na(bidP)]))), 
                          rdmExShf = 10000 * (log(mean(rdmP,  na.rm = TRUE)) - log(mean(rdmMid[!is.na(bidP)]))), 
                          midExShf = 10000 * (log(mean(midPP, na.rm = TRUE)) - log(mean(rdmMid[!is.na(bidP)]))), 
                          wmdExShf = 10000 * (log(mean(wmdPP, na.rm = TRUE)) - log(mean(rdmMid[!is.na(bidP)]))),
                          micExShf = 10000 * (log(mean(micPP, na.rm = TRUE)) - log(mean(rdmMid[!is.na(bidP)])))), by=c("Stock", "Date")];

ExShf_perf = merge(ExShf_perf, USDvolWeights, by = c("Stock", "Date"), all.x = TRUE, all.y = FALSE);

# Execution delays (TT) and missed trades (M), i.e. opportunity costs
Time_perf = allTrades[,.(bidTT = mean(bidT[bidT < 600], na.rm = TRUE), 
                         rdmTT = mean(rdmT[rdmT < 600], na.rm = TRUE),
                         midTT = mean(midT[midT < 600], na.rm = TRUE), 
                         wmdTT = mean(wmdT[wmdT < 600], na.rm = TRUE),
                         micTT = mean(micT[micT < 600], na.rm = TRUE),
                         
                         bidM  = mean(bidT == 600, na.rm = TRUE), 
                         rdmM  = mean(rdmT == 600, na.rm = TRUE),
                         midM  = mean(midT == 600, na.rm = TRUE), 
                         wmdM  = mean(wmdT == 600, na.rm = TRUE),
                         micM  = mean(micT == 600, na.rm = TRUE)), by=c("Stock","Date")];

Time_perf = merge(Time_perf, USDvolWeights, by = c("Stock", "Date"), all.x = TRUE, all.y = FALSE);

# Tests for difference between each strategy and the midpoint strategy
EffSpr = round(rbind(weighted.means.2var(dt=EffSpr_perf, var1 = "wmdES",  var2 = "midES",  weight.var = "USDvolSum", cluster = c("Stock","Date")),
                     weighted.means.2var(dt=EffSpr_perf, var1 = "micES",  var2 = "midES",  weight.var = "USDvolSum", cluster = c("Stock","Date")),
                     weighted.means.2var(dt=EffSpr_perf, var1 = "rdmES",  var2 = "midES",  weight.var = "USDvolSum", cluster = c("Stock","Date")),
                     weighted.means.2var(dt=EffSpr_perf, var1 = "bidES",  var2 = "midES",  weight.var = "USDvolSum", cluster = c("Stock","Date"))),3);
dimnames(EffSpr)[[1]] = c("wmd","mic","rdm","bid");

ExChf = round(rbind(weighted.means.2var(dt=ExShf_perf, var1 = "wmdExShf",  var2 = "midExShf",  weight.var = "USDvolSum", cluster = c("Stock","Date")),
                    weighted.means.2var(dt=ExShf_perf, var1 = "micExShf",  var2 = "midExShf",  weight.var = "USDvolSum", cluster = c("Stock","Date")),
                    weighted.means.2var(dt=ExShf_perf, var1 = "rdmExShf",  var2 = "midExShf",  weight.var = "USDvolSum", cluster = c("Stock","Date")),
                    weighted.means.2var(dt=ExShf_perf, var1 = "bidExShf",  var2 = "midExShf",  weight.var = "USDvolSum", cluster = c("Stock","Date"))),3);
dimnames(ExChf)[[1]] = c("wmd","mic","rdm","bid");

OppCost = round(rbind(weighted.means.2var(dt=Time_perf, var1 = "wmdTT",  var2 = "midTT",  weight.var = "USDvolSum", cluster = c("Stock","Date")),
                      weighted.means.2var(dt=Time_perf, var1 = "micTT",  var2 = "midTT",  weight.var = "USDvolSum", cluster = c("Stock","Date")),
                      weighted.means.2var(dt=Time_perf, var1 = "rdmTT",  var2 = "midTT",  weight.var = "USDvolSum", cluster = c("Stock","Date")),
                      weighted.means.2var(dt=Time_perf, var1 = "bidTT",  var2 = "midTT",  weight.var = "USDvolSum", cluster = c("Stock","Date"))),3);
dimnames(OppCost)[[1]] = c("wmd","mic","rdm","bid");

# Output to Table 4
Performance = matrix(nrow=5,ncol=10, dimnames=list(c("Midpoint OQS","Weighted midpoint OQS","Micro-price OQS","Immediate market orders","Limit orders only"), c("EffSpr","EffSprvsMid","t","p",
                                                                                       "ExShf","ExShfvsMid","t","p",
                                                                                       "T","missed")));
Performance[,"EffSpr"] = c(EffSpr[1,2],EffSpr[,1]);
Performance[2:5,2:4]   = EffSpr[,c(3,5:6)];
Performance[,"ExShf"]  = c(ExChf[1,2],ExChf[,1]);
Performance[2:5,6:8]   = ExChf[,c(3,5:6)];
Performance[,"T"]      = c(OppCost[1,2],OppCost[,1]);
Performance[,"missed"] = round(100*colMeans(Time_perf[, c("midM","micM","wmdM","rdmM","bidM"), with = F]), 2);

capture.output(round(Performance,2), file = "output/Table4.txt");

# Percentage difference in execution shortfall, reported in paper
round(Performance[2:3, "ExShfvsMid"] / Performance[1, "ExShf"],2); 
