# This script runs the analyses underlying Section 4 in the paper

# Set working directory
setwd("D:/");

# Loading functions and trade data
source("code/effSpread_functions.R");
load("temp/pseudotrades");

# Required packages
require(data.table);   # The script uses data.table, which is much faster than data.frames. 
require(lfe);          # Used for regressions with fixed effects and clustered standard errors
require(stargazer);    # Used for regression result reporting

################################
# SECTION 4.1: HFT PERFORMANCE #
################################

# The HFT data set is matched to Tick History trades and quotes and then processed as above.
# To illustrate the HFT regressions of Table 2, I add fictional HFT flags to the data set used above.

# Randomly generating the HFT flags of the proprietary HFT data set
tt$Taker = sample(c("H", "N"), replace = T, size = nrow(tt))
tt$Maker = sample(c("H", "N"), replace = T, size = nrow(tt))
table(tt$Taker, tt$Maker)

# Regression analysis as in Carrion 2013
tt$Large  = tt$Volume >= 1000;
tt$Medium = tt$Volume < 1000 & tt$Volume >= 500;
tt$Buy    = tt$D == 1;

# Active side regressions
tt$takerSpr    = tt$espr_mid;
tt$HFT         = tt$Taker == "H";
regr_mid_taker = felm(takerSpr ~ HFT + HFT:Medium + HFT:Large + HFT:Buy + Medium + Large + Buy | Stock + Date | 0 | Stock + Date, data = tt, psdef=T);
tt$takerSpr    = tt$espr_wmd;
regr_wmd_taker = felm(takerSpr ~ HFT + HFT:Medium + HFT:Large + HFT:Buy + Medium + Large + Buy | Stock + Date | 0 | Stock + Date, data = tt, psdef=T);
tt$takerSpr    = tt$espr_mic;
regr_mic_taker = felm(takerSpr ~ HFT + HFT:Medium + HFT:Large + HFT:Buy + Medium + Large + Buy | Stock + Date | 0 | Stock + Date, data = tt, psdef=T);

# Passive side regressions
tt$makerSpr    = tt$espr_mid;
tt$HFT         = tt$Maker == "H";
regr_mid_maker = felm(makerSpr ~ HFT + HFT:Medium + HFT:Large + HFT:Buy + Medium + Large + Buy | Stock + Date | 0 | Stock + Date, data = tt, psdef=T);
tt$makerSpr    = tt$espr_wmd;
regr_wmd_maker = felm(makerSpr ~ HFT + HFT:Medium + HFT:Large + HFT:Buy + Medium + Large + Buy | Stock + Date | 0 | Stock + Date, data = tt, psdef=T);
tt$makerSpr    = tt$espr_mic;
regr_mic_maker = felm(makerSpr ~ HFT + HFT:Medium + HFT:Large + HFT:Buy + Medium + Large + Buy | Stock + Date | 0 | Stock + Date, data = tt, psdef=T);

# Regression results as in Table 2
stargazer(regr_mid_taker, regr_wmd_taker, regr_mic_taker, regr_mid_maker, regr_wmd_maker, regr_mic_maker,
  align = TRUE, 
  covariate.labels = c("$HFT$", "$MEDIUM$", "$LARGE$", "$BUY$", "$HFT \\times MEDIUM$", "$HFT \\times LARGE$", "$HFT \\times BUY$"),
  omit.stat = c("ll", "aic", "ser", "adj.rsq"),
  no.space = TRUE,
  star.cutoffs = c(1, 0.10, 0.05),
  star.char = c(" ", "*", "**"),
  omit.table.layout = "n",
  header = F,
  digits = 2,
  type = "text",
  out = "output/Table4.txt"
)

# Clean-up
tt[,c("makerSpr", "takerSpr", "HFT", "Large", "Medium", "Buy", "Maker", "Taker") := NULL];
rm(regr_mic_maker, regr_mic_taker, regr_mid_maker, regr_mid_taker, regr_wmd_maker, regr_wmd_taker);

#####################################
# SECTION 4.2: LIQUIDITY PORTFOLIOS #
#####################################

# Stock-date liquidity
liqStockDate = aggregate(1:nrow(tt), by = list(tt$Stock, tt$Date), spreadBias);
liqStockDate = cbind(as.data.frame(liqStockDate[,1]), liqStockDate[,2], liqStockDate[,3]);
dimnames(liqStockDate)[[2]] = c("Stock","Date","midSpr","wmdSpr","micSpr",
                                "wmd_nomBias","wmd_bias","wmd_biasL","wmd_biasU",
                                "mic_nomBias","mic_bias","mic_biasL","mic_biasU",
                                "qSpr","medianQSpr","averagePrice","volTrades","volDollar");

liqQuintilesDiff_output = matrix(nrow = 2, ncol = 9, dimnames = list(c("Weighted midpoint", "Micro-price"), -4:4));

# Weighted midpoint
liqQuintilesDiff = aggregate(1:nrow(liqStockDate), by = list(liqStockDate$Date), liqQuintileFunction, estimator = "wmdSpr")
liqQuintilesDiff_output[1,] = colSums(liqQuintilesDiff$x) / sum(liqQuintilesDiff$x);

# Micro-price
liqQuintilesDiff = aggregate(1:nrow(liqStockDate), by = list(liqStockDate$Date), liqQuintileFunction, estimator = "micSpr")
liqQuintilesDiff_output[2,] = colSums(liqQuintilesDiff$x) / sum(liqQuintilesDiff$x);

round(liqQuintilesDiff_output,3);

# Clean-up
rm(liqStockDate, liqQuintilesDiff, liqQuintilesDiff_output);

###############################################################
# TABLE 3: VENUE RANKING DIFFERENCES ACROSS EFFSPR ESTIMATORS #
###############################################################

liqStockVenueDate = aggregate(1:nrow(tt),by = list(tt$Stock, tt$Venue, tt$Date), spreadBias);
liqStockVenueDate = cbind(as.data.frame(liqStockVenueDate[,1]), liqStockVenueDate[,2], liqStockVenueDate[,3], liqStockVenueDate[,4]);
dimnames(liqStockVenueDate)[[2]] = c("Stock","Venue","Date","midSpr","wmdSpr","micSpr",
                                     "wmd_nomBias","wmd_bias","wmd_biasL","wmd_biasU",
                                     "mic_nomBias","mic_bias","mic_biasL","mic_biasU",
                                     "qSpr","medianQSpr","averagePrice","volTrades","volDollar");

# This application follows Holden & Jacobsen 2014, Table VII
rankings = apply(unique(liqStockVenueDate[,c("Stock", "Date")]), 1, venuerank);
rankings = do.call(rbind, rankings);
eligibleVenues = names(table(liqStockVenueDate$Venue)[table(liqStockVenueDate$Venue) > 0]);
rankDiff = list(wmdSpr = t(as.matrix(table(rankings$venue, rankings$wmdRank - rankings$midRank)[eligibleVenues,]) / as.vector(table(rankings$venue)[eligibleVenues])),
                micSpr = t(as.matrix(table(rankings$venue, rankings$micRank - rankings$midRank)[eligibleVenues,]) / as.vector(table(rankings$venue)[eligibleVenues])));

# Output
# Table 3(a)
exchSort = c("a", "b", "c", "d");
capture.output(round(100 * cbind(rankDiff$wmdSpr[,exchSort], rowMeans(rankDiff$wmdSpr[,exchSort])),1), 
               file = "output/Table3a.txt");

# Table 3(b)
capture.output(round(100 * cbind(rankDiff$micSpr[,exchSort], rowMeans(rankDiff$micSpr[,exchSort])),1), 
               file = "output/Table3b.txt");

# Clean-up
rm(liqStockVenueDate, exchSort, rankings, rankDiff, eligibleVenues);
