Overall Statistics
Total Trades
6134
Average Win
0.26%
Average Loss
-0.21%
Compounding Annual Return
98.871%
Drawdown
16.500%
Expectancy
0.108
Net Profit
98.871%
Sharpe Ratio
2.818
Probabilistic Sharpe Ratio
92.332%
Loss Rate
49%
Win Rate
51%
Profit-Loss Ratio
1.19
Alpha
0.704
Beta
-0.256
Annual Standard Deviation
0.232
Annual Variance
0.054
Information Ratio
1.702
Tracking Error
0.267
Treynor Ratio
-2.546
Total Fees
$15815.07
Estimated Strategy Capacity
$24000000.00
Lowest Capacity Asset
CDW VHRARJ4RLSV9
"""
Library of indicators
@version: 0.11
"""

import pandas as pd


def filter_bars(bars, start, end):
    time_idx = bars.index.get_level_values("time")
    return bars.iloc[time_idx.indexer_between_time(start, end)]


def rename(bars, name):
    return bars.rename(name) if isinstance(bars, pd.Series) \
        else bars.add_prefix(f"{name}_")


def get_daygrouper():
    return [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]

# Daily indicators
def roll_max(bars, window, groupby="symbol"):
    groups = bars.groupby(groupby)
    output = groups.apply(lambda x: x.rolling(window, min_periods=1).max())
    return output


def roll_min(bars, window, groupby="symbol"):
    groups = bars.groupby(groupby)
    return groups.apply(lambda x: x.rolling(window).min())


def roll_average(bars, window, groupby="symbol", mean_type="arit"):
    mean_func = (lambda x: x.ewm(span=window).mean()) if mean_type=="exp" \
        else (lambda x: x.rolling(window).mean())
    return bars.groupby(groupby).apply(mean_func)


def roll_range(bars, window):
    max_high = roll_max(bars["high"], window).squeeze()
    min_low = roll_min(bars["low"], window).squeeze()
    avg_close = roll_average(bars["close"], window).squeeze()
    return (avg_close-min_low)/(max_high-min_low)


def roll_change(bars, window):
    return bars.groupby("symbol").pct_change(window)


def position_range(bars, window):
    yesterday_bars = bars.groupby("symbol").shift(1)  # Not including trading date
    max_high = roll_max(yesterday_bars["high"], window).squeeze()
    min_low = roll_min(yesterday_bars["low"], window).squeeze()
    return (bars["open"]-min_low)/(max_high-min_low)


def gap(bars):
    yesterday_bars = bars.groupby("symbol").shift(1)  # Not including trading date
    return bars["open"]/yesterday_bars["close"]-1


def extension(bars, window):
    max_high = roll_max(bars["high"], window).squeeze()
    min_low = roll_max(bars["low"], window).squeeze()
    return (bars["high"]-max_high)/(max_high-min_low)


def retracement(bars, window):
    max_high = roll_max(bars["high"], window).squeeze()
    min_low = roll_max(bars["low"], window).squeeze()
    return (max_high-bars["low"])/(max_high-min_low)


def gap_extension(bars):
    yesterday_bars = bars.groupby("symbol").shift(1)  # Not including trading date
    return (yesterday_bars["high"]-bars["open"])/(bars["open"]-yesterday_bars["close"])


def day_range(bars):
    return bars.eval("(open-low)/(high-low)")


def gap_retracement(bars):
    yesterday_bars = bars.groupby("symbol").shift(1)  # Not including trading date
    return (bars["open"]-yesterday_bars["low"])/(bars["open"]-yesterday_bars["close"])


def roll_vwap(bars, window):
    price_volume = bars[["high","low","close"]].mean(axis=1)*bars["volume"]
    avg_price_volume = price_volume.groupby("symbol").apply(lambda x: x.rolling(window, min_periods=1).sum())
    avg_volume = bars["volume"].groupby("symbol").apply(lambda x: x.rolling(window, min_periods=1).sum())
    return avg_price_volume/avg_volume


def shift(bars, shift):
    return bars.groupby("symbol").shift(shift)


def divergence(num_bars, den_bars):
    return num_bars/den_bars-1


# Intra day indicators

def intra_change(bars):
    grouper = bars.groupby(get_daygrouper())
    return grouper.last()/grouper.first()-1


def intra_vwap(bars):
    price_volume = bars.eval("(high + low + close)/3 * volume")
    price_volume = price_volume.groupby("symbol").cumsum()
    volume = bars["volume"].groupby("symbol").cumsum()
    return price_volume/volume


def intra_average(bars):
    return bars.groupby(get_daygrouper()).average()


def intra_max(bars):
    return bars.groupby(get_daygrouper()).max()


def intra_min(bars):
    return bars.groupby(get_daygrouper()).min()


def intra_gapext(daily_bars, intra_bars):  # Gap Extension
    numerator = intra_max(intra_bars["high"])-daily_bars["open"]
    denominator = daily_bars["open"] - daily_bars["close"].groupby("symbol").shift(1)
    return numerator.divide(denominator, axis="index")


def intra_highext(daily_bars, intra_bars):  # Total High Extension
    intra_high = intra_max(intra_bars["high"])
    intra_low = intra_min(intra_bars["low"])
    return (daily_bars["high"]-intra_high).divide(intra_high-intra_low,
                                                  axis="index")


def intra_retrace(bars):  # Retrace
    grouper = bars.groupby(get_daygrouper())
    start_bars = grouper.first()
    end_bars = grouper.last()
    return (end_bars["high"]-start_bars["high"])/(start_bars["high"]-start_bars["low"])


def intra_divup(bars):  # Divergence Up
    vwap = intra_vwap(bars)
    return (bars["high"] - vwap) / vwap


def intra_divdown(bars):  # Divergence Down
    vwap = intra_vwap(bars)
    return (vwap - bars["low"]) / vwap


def intra_position_range(bars):  # Posin Range
    grouper = bars.groupby(get_daygrouper())
    return (grouper["close"].last()-grouper["low"].min())/(grouper["high"].max()-grouper["low"].min())


def intra_relvolume(daily_bars, intra_bars, avg_days=10):
    grouper = intra_bars.groupby(get_daygrouper())
    intra_volume = grouper["volume"].sum()
    avg_volume = shift(roll_average(daily_bars["volume"], avg_days), 1)  # Shift 1 day later to match with intra-day data
    return intra_volume/avg_volume.squeeze()


def intra_volume_hod(bars):
    grouper = bars.groupby(get_daygrouper())
    index = grouper.apply(lambda x: x.idxmax()[1])
    return grouper["volume"].cumsum()[index].groupby(get_daygrouper()).last()
"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL

@version: 0.19
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
        self.kelly_frac = literal_eval(self.GetParameter("kelly_frac"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.use_sl = literal_eval(self.GetParameter("use_sl"))
        self.exposure = self.GetParameter("exposure")
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.score = 0
        self.cv = TimeSeriesSplitGroups(n_splits=10)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_features)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(15, 55), self.store_targets)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        training_days = self.features.index.get_level_values("time").unique()
        if len(training_days) < 20: return

        time_groups = self.targets.index.get_level_values("time")
        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=time_groups,
                                    scoring="balanced_accuracy")
        self.score = np.mean(np.nan_to_num(cv_scores, 0))
        self.model.fit(self.features, self.targets)
        self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}")
        self.Plot("ML", "Score", self.score)

    def trade(self):
        if self.score == 0: return

        n_classes = len(self.model.classes_)
        edge = (n_classes * self.score - 1) / (n_classes-1)
        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * (edge * self.kelly_frac).clip(0, 1)
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            if self.use_sl:
                features = x_pred.loc[symbol]
                window = (features.bb_high - features.bb_low) * self.retracement_sl
                stop_loss = features.bb_high - window if pos > 0 \
                    else features.bb_low + window   # TODO: Refactor
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")

        new_features = min5_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = min5_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        start = self.Time.replace(hour=9, minute=31, second=0)
        end = self.Time.replace(hour=15, minute=55, second=0)
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
        new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_exit_target(self, price_bar):
        window = (price_bar.bb_high - price_bar.bb_low)
        if price_bar.close > price_bar.open * 1.01 and "long" in self.exposure:  # long trade
            if self.use_sl:
                stop_loss = price_bar.bb_high - window * self.retracement_sl
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif price_bar.close < price_bar.open * 0.99 and "short" in self.exposure:  # short trade
            if self.use_sl:
                stop_loss = price_bar.bb_low + window * self.retracement_sl
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
import math

import numpy as np
from math import factorial
from itertools import combinations
from sklearn.model_selection._split import _BaseKFold, indexable


class TimeSeriesSplitGroups(_BaseKFold):
    def __init__(self, n_splits=5, purge_groups=0):
        super().__init__(n_splits, shuffle=False, random_state=None)
        self.purge_groups = purge_groups

    def split(self, X, y=None, groups=None):
        X, y, groups = indexable(X, y, groups)
        n_folds = self.n_splits + 1
        group_list = np.unique(groups)
        n_groups = len(group_list)
        if n_folds + self.purge_groups > n_groups:
            raise ValueError((f"Cannot have number of folds plus purged groups "
                              f"={n_folds+self.purge_groups} greater than the "
                              f"number of groups: {n_groups}."))
        test_size = (n_groups-self.purge_groups) // n_folds
        test_starts = [n_groups-test_size*c for c in range(1, n_folds)]
        for tstart in test_starts:
            train_idx = np.isin(groups, group_list[:tstart - self.purge_groups])
            test_idx = np.isin(groups, group_list[tstart:tstart + test_size])
            yield (np.nonzero(train_idx)[0], np.nonzero(test_idx)[0])


class CombinatorialPurgedCV(_BaseKFold):
    def __init__(self, n=4, k=2, purge=0, embargo=0):
        self.n = n
        self.k = k
        self.purge = purge
        self.embargo = embargo
        n_splits = int(factorial(n)/(factorial(k)*factorial(n-k)))
        super().__init__(n_splits, shuffle=False, random_state=None)

    def split(self, X, y=None, groups=None):
        X, y, groups = indexable(X, y, groups)
        unique_groups = list(np.unique(groups))
        required_folds = self.n_splits
        if required_folds > len(unique_groups):
            raise ValueError((f"Required folds ={required_folds} greater than "
                              f"the number of groups: {len(unique_groups)}."))
        fold_size = int(math.ceil(len(unique_groups) / self.n))
        test_folds = combinations(range(self.n), self.k)
        for test_fold in test_folds:
            train_groups, test_groups = [], []
            for c in range(self.n):
                start = c * fold_size
                stop = min((c + 1) * fold_size, len(unique_groups))  # To avoid going out of bound
                if c in test_fold:
                    test_groups += unique_groups[start:stop]
                else:
                    # Naive fold sizing, should be distributed before train/test split
                    if c-1 in test_fold: start += self.embargo
                    if c+1 in test_fold: stop -= (self.purge+self.embargo)
                    train_groups += unique_groups[start:stop]
            train_idx = np.nonzero(np.isin(groups, train_groups))[0]
            test_idx = np.nonzero(np.isin(groups, test_groups))[0]
            yield train_idx, test_idx