Overall Statistics
Total Trades
82
Average Win
0.69%
Average Loss
-0.52%
Compounding Annual Return
15.955%
Drawdown
2.300%
Expectancy
0.470
Net Profit
10.395%
Sharpe Ratio
1.858
Probabilistic Sharpe Ratio
84.880%
Loss Rate
37%
Win Rate
63%
Profit-Loss Ratio
1.32
Alpha
0.109
Beta
-0.021
Annual Standard Deviation
0.059
Annual Variance
0.003
Information Ratio
0.791
Tracking Error
0.187
Treynor Ratio
-5.185
Total Fees
$1430.14
Estimated Strategy Capacity
$230000.00
Lowest Capacity Asset
HYMC XEZF85YZ9C2T
"""
Multi-Entry ML Liquidation Strategy using optimal take profit as a target
Last changes:
v0.4: Stable version with P&L target
v0.3: Adjusted extension formula
v0.2: Extension for take profit instead of profit and loss
v0.1: First working version

@version: 0.4
@creation date: 8/9/2022
"""

from AlgorithmImports import *

import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from io import StringIO
from sklearn.neural_network import MLPRegressor

import indicators as idx

TICKERS_CSV = "https://drive.google.com/uc?export=download&id=1did0Sk3F9Sn5Il_nUX252jOB_n0UFqat"
DATE_COL = "Agreement Start Date"
SYMBOL_COL = "ticker"
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min", "volume": "sum"}
SECS_PER_DAY = 24 * 60 * 60


class LiquidationBasicML(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2021, 10, 1)
        self.SetEndDate(2022, 6, 1)
        self.benchmark = self.GetParameter("benchmark", "SPY")
        self.capital = self.GetParameter("capital", 80000)
        self.min_gap = self.GetParameter("min_gap", 0.15)
        self.target_gain = self.GetParameter("target_gain", 0.01)  # minimum return to label the p&l as a buy

        self.SetCash(self.capital)
        self.atm = self.get_atm()
        self.atm_start = self.atm.index.get_level_values("time").min()
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.last_update = datetime(2000, 1, 1)
        self.last_training = datetime(2000, 1, 1)
        self.gaplist, self.features, self.targets = None, None, None
        self.model = None

        self.pos_size = 0.1  # TODO: How to define an edge with regressors?
        self.UniverseSettings.DataNormalizationMode = DataNormalizationMode.Raw

        every_day = self.DateRules.EveryDay(self.benchmark)
        at = self.TimeRules.At
        self.Train(every_day, at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 31), self.update_gaplist)
        self.Schedule.On(every_day, at(9, 45), self.trade)  # Can trade up or down
        self.Schedule.On(every_day, at(10, 30), self.exit_trades)
        self.Schedule.On(every_day, at(15, 55), self.liquidate)

    def train_model(self):
        days_since_training = (self.Time - self.last_training).days
        if self.features is None \
                or (days_since_training <= 30 and self.pos_size > 0): return

        if self.model is None:
            self.model = MLPRegressor(hidden_layer_sizes=(32, 32),
                                      warm_start=True, 
                                      early_stopping=True)
        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        self.model.fit(self.features, self.targets)
        self.last_training = self.Time
        self.print(f"Training Points: {len(self.features)} Edge: {self.pos_size:.1%}")
        self.Plot("ML", "Edge", self.pos_size)

    def trade(self):
        self.update_features()
        x_pred = self.features.query("time == @self.Time.date()")  # TODO: Review indexing
        if self.model is None or len(x_pred) == 0: return

        x_pred = x_pred.groupby("symbol").head(1)  # Get last calculated feature
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        for sym, pred in y_pred.items():
            price = self.Portfolio[sym].Price
            qty = self.CalculateOrderQuantity(sym, -self.pos_size)
            if pred >= self.target_gain:
                take_profit = price * (1 - pred)
                self.LimitOrder(sym, qty, price)
                self.LimitOrder(sym, -qty, take_profit)
                self.print(f"Trading {pred:.1%} of {sym}")

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        for sym in self.ActiveSecurities.Keys:
            qty = self.Portfolio[sym].Quantity
            if qty != 0: self.LimitOrder(sym, -qty, self.Portfolio[sym].Price)
        self.update_targets()

    def liquidate(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def update_features(self):
        new_features = self.gaplist.query("time > @self.last_update")
        now = self.Time
        entry_hr, entry_mn = now.hour, now.minute
        qb = QuantBook()
        minute_bars, fcf_bars, cash_bars = [], [], []
        for symbol, day in new_features.index:
            minute_bars += [self.History([symbol],
                                         day.replace(hour=7, minute=1),
                                         day.replace(hour=entry_hr, minute=entry_mn),
                                         Resolution.Minute)]
            fcf_bars += [qb.GetFundamental([self.Symbol(symbol)],
                                           "FinancialStatements.CashFlowStatement.FreeCashFlow",
                                           day, day)]
            cash_bars += [qb.GetFundamental([self.Symbol(symbol)],
                                            "FinancialStatements.BalanceSheet.CashAndCashEquivalents",
                                            day, day)]
        try:
            minute_bars = pd.concat(minute_bars)
            pm_bar = agg_bars(minute_bars, "07:01", "09:30")
            opening_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
            fcf_bars = pd.concat(fcf_bars)
            fcf_bars = fcf_bars.unstack().dropna().apply(lambda x: x.ThreeMonths)
            cash_bars = pd.concat(cash_bars)
            cash_bars = cash_bars.unstack().dropna().apply(lambda x: x.ThreeMonths)
        except (KeyError, ValueError) as e:
            self.print(f"Update Features Error: {e}")
            return

        new_features = new_features.join(opening_bar.add_prefix("opening_"))
        new_features["opening_range"] = opening_bar.eval("(close-low)/(high-low)")
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")

        last_atm = self.atm.query("time <= @self.Time").groupby("symbol").last()
        new_features = new_features.join(last_atm[["atm_date", "atm_size", "atm_offer"]])
        seconds_since_atm = (self.Time.timestamp() - new_features["atm_date"])
        new_features["atm_days"] = seconds_since_atm / SECS_PER_DAY

        opening_bars = idx.filter_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
        divergence = opening_bars["close"] / idx.intra_vwap(opening_bars) - 1
        grouper = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
        new_features["max_divergence"] = divergence.groupby(grouper).max()
        new_features["min_divergence"] = divergence.groupby(grouper).min()
        new_features["runway_cash"] = cash_bars / fcf_bars  # CashAndCashEquivalents / FreeCashFlow
        new_features["seconds_to_exit"] = (now.replace(hour=10, minute=30) - now).seconds
        new_features.eval("pm_volume_atm = pm_volume_usd / atm_size", inplace=True)

        self.features = pd.concat([new_features.dropna(), self.features])
        self.features = self.features[~self.features.index.duplicated(keep='first')]

        self.features["gap_days"] = self.features.groupby(["symbol", "atm_date"]).cumcount()
        self.print(f"Stored {len(new_features)} new features, total: {len(self.features)}")
        self.Log(new_features.to_string())

    def update_targets(self):
        new_features = self.features.query("time > @self.last_update")
        exit_hr, exit_mn = self.Time.hour, self.Time.minute
        minute_bars = [self.History([symbol],
                                    day.replace(hour=exit_hr, minute=exit_mn)-timedelta(minutes=1),
                                    day.replace(hour=exit_hr, minute=exit_mn),
                                    Resolution.Minute)
                       for symbol, day in new_features.index]
        try:
            minute_bars = pd.concat(minute_bars)
            target_bar = agg_bars(minute_bars, "09:30", f"{exit_hr}:{exit_mn}")
        except (KeyError, ValueError) as e:
            self.print(f"Update Targets Error: {e}")
            return

        new_targets = target_bar.eval("1 - low/open")  # Return of a short with exit on LOD
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.targets = self.targets[~self.targets.index.duplicated(keep='first')]
        self.last_update = self.Time
        self.print(f"Stored {len(new_targets)} new targets, total: {len(self.targets)}")

    def update_gaplist(self):
        last_update = self.atm_start if self.gaplist is None \
            else self.gaplist.index.get_level_values("time").max()
        last_valid_atm = last_update - timedelta(365)
        valid_atm = self.atm.query("(time >= @last_valid_atm) and (time <= @self.Time)")
        tickers = valid_atm.index.get_level_values("symbol").unique().tolist()
        day_bars = self.History(tickers, last_update, self.Time, Resolution.Daily)
        shifted_time_idx = day_bars.index.levels[1].shift(-1, freq="D")
        day_bars.index = day_bars.index.set_levels(shifted_time_idx, level=1)
        today_start = self.Time.replace(hour=9, minute=30)
        if self.Time > today_start:  # adding manually the last day bar if missing
            last_day_bars = self.History(tickers, today_start, self.Time,
                                         Resolution.Minute)
            last_day_bar = agg_bars(last_day_bars, "09:31", "16:00")
            day_bars = pd.concat([day_bars, last_day_bar])

        yesterday_close = day_bars["close"].groupby("symbol").shift(1)
        gaps = day_bars["open"] / yesterday_close - 1
        new_gaplist = gaps[gaps >= self.min_gap].to_frame("gap")
        self.gaplist = pd.concat([new_gaplist, self.gaplist])

    def get_atm(self):
        csv = StringIO(self.Download(TICKERS_CSV))
        atms = pd.read_csv(csv, parse_dates=[DATE_COL],
                           dayfirst=True, thousands=",")
        atms.eval("atm_offer = OfferingType == 'ATM'", inplace=True)
        atms = atms.rename(columns={DATE_COL: "time", SYMBOL_COL: "symbol",
                                    "Total ATM Capacity": "atm_size"})
        [self.AddEquity(s, Resolution.Minute, extendedMarketHours=True)
         for s in atms["symbol"].unique()]
        atms["symbol"] = atms["symbol"].apply(lambda x: str(self.Symbol(x).ID))
        atms["atm_date"] = atms["time"].apply(lambda x: x.timestamp())
        return atms.set_index(["symbol", "time"])

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    grouper = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(grouper).agg(AGG_OPS)