"""
Machine Learning Algorithm with the option to use meta-labeling for position sizing
@email: info@beawai.com
@creation date: 10/01/2023
"""
from AlgorithmImports import *
import copy
import random
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.exceptions import NotFittedError
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
class MLCryptoAlgo(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2008, 1, 1)
self.SetCash(100000)
self.lookback = self.GetParameter("lookback", 21)
self.use_meta = self.GetParameter("use_meta", 1) # Whether to use the meta-model for position sizing
self.seed = self.GetParameter("seed", 42)
random.seed(self.seed)
self.training_len = 252 * 100
self.resolution = Resolution.Daily
self.ticker = "SPY"
self.AddEquity(self.ticker, self.resolution)
self.main_model = GradientBoostingClassifier(n_iter_no_change=1) # Model to predict direction (side)
self.meta_model = GradientBoostingClassifier(n_iter_no_change=1) # Meta-model to predict position (size)
self.Train(self.DateRules.WeekStart(), self.TimeRules.At(9, 0), self.train)
every_day = self.DateRules.EveryDay(self.ticker)
at_market_open = self.TimeRules.AfterMarketOpen(self.ticker, 0)
self.Schedule.On(every_day, at_market_open, self.trade)
def train(self):
x, y = self.get_main_data(self.training_len, include_y=True)
# Train models
if self.use_meta:
x_main, x_meta, y_main, y_meta = train_test_split(x, y, test_size=0.5, shuffle=False)
self.main_model.fit(x_main, y_main)
self.Debug(f"{self.Time} Main model score {self.main_model.score(x_main, y_main)}")
x_meta, y_meta = self.get_meta_data(x_meta, y_meta)
self.meta_model.fit(x_meta, y_meta)
self.Debug(f"{self.Time} Meta model score {self.meta_model.score(x_meta, y_meta)}")
else:
self.main_model.fit(x, y)
self.Debug(f"{self.Time} Main model score {self.main_model.score(x, y)}")
def trade(self):
self.Transactions.CancelOpenOrders()
x_main = self.get_main_data(self.lookback + 1, include_y=False).tail(1) # getting last datapoint for prediction
try:
y_main_proba = self.main_model.predict_proba(x_main)[:, 1]
except NotFittedError as e:
self.Debug(e)
return
if self.use_meta: # using meta-model to decide the position size
y_main = y_main_proba > 0.5 # Buy signal from main model
x_meta = self.get_meta_data(x_main)
y_meta = self.meta_model.predict_proba(x_meta)[:,1]
y_pred = (y_meta * y_main)[0] # Combining Buy signal with position size
self.Debug(f"{self.Time} Predictions: Main {y_main} - Meta {y_meta}")
else: # using the main model probability for the position size
y_pred = y_main_proba
self.Debug(f"{self.Time} Predictions: Main {y_pred}")
self.SetHoldings(self.ticker, y_pred)
def get_main_data(self, datapoints, include_y=True):
""" Get features and target for the main model """
tickers = list(self.ActiveSecurities.Keys)
data = self.History(tickers, datapoints, self.resolution)
features = data["open"].pct_change().to_frame("returns")
features["range"] = data.eval("(high-low)/close")
features["volume_usd"] = data.eval("volume * close")
x = pd.concat([features.shift(s) for s in range(self.lookback)],
axis=1).dropna()
if include_y:
y = features["returns"].shift(-1).reindex_like(x).dropna()
return x.loc[y.index], y > 0 # the target of the main model is binary (up/down)
else:
return x
def get_meta_data(self, x_main, y_main_true=None):
"""
Get features and target for the meta-model
Using the same features as the main model plus the main model
predicted probability and whether it was right as a target
"""
x_meta = copy.copy(x_main)
x_meta["proba"] = self.main_model.predict_proba(x_main)[:, 1] # Adding main model prediction to meta features
if y_main_true is None:
return x_meta
else:
y_main_pred = self.main_model.predict(x_main)
return (x_meta, y_main_pred == y_main_true)