Overall Statistics |
Total Trades 1636 Average Win 0.53% Average Loss -0.34% Compounding Annual Return 72.110% Drawdown 15.300% Expectancy 0.326 Net Profit 138.407% Sharpe Ratio 2.254 Probabilistic Sharpe Ratio 94.717% Loss Rate 48% Win Rate 52% Profit-Loss Ratio 1.57 Alpha 0.489 Beta 0.022 Annual Standard Deviation 0.217 Annual Variance 0.047 Information Ratio 1.834 Tracking Error 0.274 Treynor Ratio 21.878 Total Fees $24274.43 Estimated Strategy Capacity $4400000.00 Lowest Capacity Asset RBLX XMP3AJ4KU3C5 |
""" Big Bertha Strategy with Machine Learning Last changes: v0.42a: Improved positions calculation for DAS Integration v0.41a: Added use_kelly parameter and 9am training for easier live trading v0.40a: Improvements to DAS Integration and logging v0.39a: Minor improvements and changes to DAS Integration v0.38a: DAS Trader integration v0.37a: Long Short with probability threshold v0.36a: Long Short version (market exposure parameter) v0.35a: CV and training improvements v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.42a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingClassifier import utils as utl class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.capital = self.GetParameter("capital", 800000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability self.use_kelly = self.GetParameter("use_kelly", 1) # Whether to use the kelly criterion for sizing self.SetStartDate(2021, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(9, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.Schedule.On(every_day, at(16, 0), self.save_data) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if (self.Time - self.last_training).days < 7 \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= 21: return y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both weight = self.y # TODO: How to use log with -100% returns? model_temp = sklearn.base.clone(self.model) x_train, x_test, y_train, y_test, w_train, w_test = train_test_split( self.x, y_bin, weight, train_size=0.5, shuffle=False) model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns y_pred = model_temp.predict(x_test) score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) \ if self.use_kelly else 1 # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight))) self.kelly = np.nan_to_num(score).clip(0, 1) self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n" f"Kelly: {self.kelly:.1%}\n") self.Plot("ML", "Score", self.kelly) def enter_trades(self): self.calc_features() x_pred = self.x.query("time == @self.Time.date()") if self.kelly <= 0 or len(x_pred) == 0: return x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1], index=x_pred.index) self.print(f"Predictions: {y_pred.to_string()}") for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]: self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string()) proba_desc = y_pred.sort_values(ascending=False) proba_asc = y_pred.sort_values(ascending=True) pairs = dict(proba=proba_desc.values-proba_asc.values, long_sym=proba_desc.index.get_level_values("symbol"), short_sym=proba_asc.index.get_level_values("symbol")) pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba") pairs.eval("pos = proba * @self.kelly", inplace=True) if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum() self.print(f"Positions {pairs}") for _, row in pairs.iterrows(): self.SetHoldings(row["long_sym"], row["pos"]/2) self.SetHoldings(row["short_sym"], -row["pos"]/2) if self.LiveMode: order_value = self.Portfolio.TotalPortfolioValue*row["pos"]/2 long_qty = int(order_value/self.Securities[row["long_sym"]].Price) utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty) short_qty = -int(order_value/self.Securities[row["short_sym"]].Price) utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() if self.LiveMode: utl.das_liquidate(self) self.calc_targets() def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") if len(last_x) == 0: return tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) if len(minute_bars) == 0: return trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning @version: 0.1 @creation date: 05/07/2022 First prototype """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import train_test_split import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.SetStartDate(2015, 1, 1) self.SetCash(100000) self.UniverseSettings.ExtendedMarketHours = True self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) self.test_acc = 0 self.train_days = timedelta(91) # Trainining on the last quarter self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.max_symbols = literal_eval(self.GetParameter("max_symbols")) self.benchmark = self.GetParameter("benchmark") self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Daily) self.SetBenchmark(self.benchmark) self.Train(self.DateRules.MonthStart(), self.TimeRules.At(0, 0), self.train_model) self.Schedule.On(self.DateRules.EveryDay(self.benchmark), self.TimeRules.At(9, 35), self.trade) self.Schedule.On(self.DateRules.EveryDay(self.benchmark), self.TimeRules.At(15, 55), self.stop_trading) def coarse_filter(self, coarse): symbols = [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] return np.random.choice(symbols, size=self.max_symbols, replace=False).tolist() def train_model(self): x, y = self.get_data(self.Time - self.train_days, self.Time) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, shuffle=True) self.model.fit(x_train, (y_train > 0).astype(float), sample_weight=abs(y_train)) # TODO: Use log returns for training weights self.test_acc = self.model.score(x_test, (y_test > 0).astype(float), sample_weight=abs(y_test)) self.Debug(f"Training Points: {len(x_train)} Test Accuracy: {self.test_acc:.1%}") self.Plot("ML", "Test Score", self.test_acc) def trade(self): if self.test_acc > 0.5: x_pred = self.get_data(self.Time-timedelta(10), self.Time, with_target=False) y_proba = pd.Series(self.model.predict_proba(x_pred)[:,1], index=x_pred.index).groupby("symbol").last() positions = y_proba.apply(lambda x: x if x > 0.5 else 0) # TODO: Implement shorting if positions.sum() > 1: positions /= positions.sum() # Max portfolio size 100% self.Debug(f"Trading\n{y_proba}\nPos: {positions}") self.Plot("ML", "Prediction", y_proba.mean()) for symbol in y_proba.index: self.SetHoldings(symbol, positions[symbol]) def stop_trading(self): self.Debug("End of day") self.Transactions.CancelOpenOrders() self.Liquidate() def get_data(self, start, end, with_target=True): tickers = [t for t in list(self.ActiveSecurities.Keys) if str(t) not in self.benchmark] minute_bars = self.History(tickers, start, end, Resolution.Minute) day_bars = idx.filter_bars(minute_bars, "09:30", "16:30") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(minute_bars, "09:30", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha"] = min5_bar.eval("(high-low)/open") features["range"] = min5_bar.eval("(low+close)/(high-low)") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"]/yesterday_close features["pm_volume_usd"] = pm_bar.eval("close*volume") features.dropna(inplace=True) if with_target: trade_day_bars = idx.filter_bars(minute_bars, "09:30", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) target = trade_day_bar.eval("close/open-1").dropna() index = target.index.intersection(features.index) return features.loc[index], target.loc[index] else: return features
""" Big Bertha Strategy with Machine Learning @version: 0.2 @creation date: 05/07/2022 First prototype """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import train_test_split import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.SetStartDate(2015, 1, 1) self.UniverseSettings.ExtendedMarketHours = True self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=1) self.test_acc = 0 self.train_days = timedelta(30) # Training on the last quarter self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.max_symbols = literal_eval(self.GetParameter("max_symbols")) self.benchmark = self.GetParameter("benchmark") self.capital = literal_eval(self.GetParameter("capital")) self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x))) self.SetCash(self.capital) self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Daily) self.SetBenchmark(self.benchmark) self.Train(self.DateRules.MonthStart(), self.TimeRules.At(0, 0), self.train_model) self.Schedule.On(self.DateRules.EveryDay(self.benchmark), self.TimeRules.At(9, 35), self.trade) self.Schedule.On(self.DateRules.EveryDay(self.benchmark), self.TimeRules.At(15, 55), self.stop_trading) def coarse_filter(self, coarse): symbols = [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] return np.random.choice(symbols, size=self.max_symbols, replace=False).tolist() def train_model(self): x, y = self.get_data(self.Time - self.train_days, self.Time) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, shuffle=True) self.model.fit(x_train, (y_train > 0).astype(float), sample_weight=abs(y_train)) self.test_acc = self.model.score(x_test, (y_test > 0).astype(float), sample_weight=abs(y_test)) self.Debug(f"Training Points: {len(x_train)} Test Accuracy: {self.test_acc:.1%}") self.Plot("ML", "Test Score", self.test_acc) def trade(self): if self.test_acc > 0.5: x_pred = self.get_data(self.Time-timedelta(5), self.Time, with_target=False) y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() positions = y_proba.apply(lambda x: -1 if x < 0.25 else +1 if x > 0.75 else 0) # To short, 50% is position 0 if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100% self.Debug(f"Trading\n{y_proba}\nPos: {positions}") self.Plot("ML", "Prediction", y_proba.mean()) for symbol in positions.index: self.SetHoldings(symbol, positions[symbol]) def stop_trading(self): self.Debug("End of day") self.Transactions.CancelOpenOrders() self.Liquidate() def get_data(self, start, end, with_target=True): tickers = [t for t in list(self.ActiveSecurities.Keys) if str(t) not in self.benchmark] minute_bars = self.History(tickers, start, end, Resolution.Minute) day_bars = idx.filter_bars(minute_bars, "09:30", "16:30") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(day_bars, "09:30", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha"] = min5_bar.eval("(high-low)/open") features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"]/yesterday_close features["pm_volume_usd"] = pm_bar.eval("close*volume") features.dropna(inplace=True) if with_target: trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) target = trade_day_bar.eval("close/open-1").apply(np.log1p) index = target.dropna().index.intersection(features.index) return features.loc[index], target.loc[index] else: return features
""" Big Bertha Strategy with Machine Learning @version: 0.3 @creation date: 05/07/2022 First prototype """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.SetStartDate(2015, 1, 1) self.UniverseSettings.ExtendedMarketHours = True self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) self.test_acc = 0 self.train_days = timedelta(30) # Training on the last quarter self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.max_symbols = literal_eval(self.GetParameter("max_symbols")) self.min_gap = literal_eval(self.GetParameter("min_gap")) self.benchmark = self.GetParameter("benchmark") self.capital = literal_eval(self.GetParameter("capital")) self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x))) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Daily) self.SetBenchmark(self.benchmark) self.Train(self.DateRules.MonthStart(), self.TimeRules.At(0, 0), self.train_model) self.Schedule.On(self.DateRules.EveryDay(self.benchmark), self.TimeRules.At(9, 35), self.trade) self.Schedule.On(self.DateRules.EveryDay(self.benchmark), self.TimeRules.At(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): x, y = self.get_data(self.Time - self.train_days, self.Time, min_gap=self.min_gap) fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), scoring="accuracy", fit_params=fit_params) self.test_acc = np.mean(cv_scores) self.model.fit(x, (y > 0).astype(float), **fit_params) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}") self.Plot("ML", "Test Accuracy", self.test_acc) def trade(self): if self.test_acc <= 0.5: return x_pred = self.get_data(self.Time-timedelta(5), self.Time, with_target=False, min_gap=self.min_gap) if x_pred is None: return today = self.Time.replace(hour=0, minute=0, second=0) x_pred.query("time == @today", inplace=True) if len(x_pred) == 0: return y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index) positions = y_proba.apply(lambda x: -1 if x < 0.4 else +1 if x > 0.6 else 0) # To short, 50% is position 0 if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100% self.Debug(f"Trading\n{y_proba}\nPos: {positions}") self.Plot("ML", "Prediction", y_proba.mean()) for symbol in positions.index: self.SetHoldings(symbol, positions[symbol]) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def get_data(self, start, end, with_target=True, min_gap=0): tickers = [t for t in list(self.ActiveSecurities.Keys) if str(t) not in self.benchmark] daily_bars = self.History(tickers, start, end, Resolution.Daily) features = pd.DataFrame() features["gap"] = idx.gap(daily_bars).dropna() features.query("abs(gap) >= @min_gap", inplace=True) if len(features) == 0: return None, None if with_target else None minute_bars = pd.concat([self.History([s], d, d + timedelta(1), Resolution.Minute) for s, d in features.index]) pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(minute_bars, "09:30", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features["big_bertha"] = min5_bar.eval("(high-low)/open") features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close*volume") features.dropna(inplace=True) if not with_target: return features trade_day_bars = idx.filter_bars(minute_bars, "09:35", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) target = trade_day_bar.eval("close/open-1").apply(np.log1p) index = target.dropna().index.intersection(features.index) return features.loc[index], target.loc[index]
""" Big Bertha Strategy with Machine Learning @version: 0.4 @creation date: 05/07/2022 First prototype """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.max_symbols = literal_eval(self.GetParameter("max_symbols")) self.capital = literal_eval(self.GetParameter("capital")) self.SetStartDate(2020, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.test_acc = 0 self.train_days = timedelta(30) # Training on the last quarter self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) self.Train(self.DateRules.MonthStart(), self.TimeRules.At(0, 0), self.train_model) self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.At(9, 35), self.trade) self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.At(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData] def train_model(self): x, y = self.get_data(self.Time - self.train_days, self.Time) fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), scoring="accuracy", fit_params=fit_params) self.test_acc = np.mean(cv_scores) self.model.fit(x, (y > 0).astype(float), **fit_params) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}") self.Plot("ML", "Test Accuracy", self.test_acc) def trade(self): model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50% x_pred = self.get_data(self.Time-timedelta(5), self.Time, with_target=False) y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() positions = (y_proba - 0.5) * 2 * model_confidence # TODO: Fix risk management if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100% self.Plot("ML", "Prediction", y_proba.mean()) for symbol, position in positions[abs(positions) > 0.01].items(): self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}") self.SetHoldings(symbol, position) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def get_data(self, start, end, with_target=True): # TODO: Add daily dataset update tickers = np.random.choice(list(self.ActiveSecurities.Keys), size=self.max_symbols, replace=False).tolist() minute_bars = self.History(tickers, start, end, Resolution.Minute) day_bars = idx.filter_bars(minute_bars, "09:30", "16:30") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(day_bars, "09:30", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha"] = min5_bar.eval("(high-low)/open") features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close*volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close features = features.dropna().query("abs(gap) >= 0.02") if with_target: trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) target = trade_day_bar.eval("close/open-1").apply(np.log1p) index = target.dropna().index.intersection(features.index) return features.loc[index], target.loc[index] else: return features
""" Big Bertha Strategy with Machine Learning @version: 0.6 @creation date: 05/07/2022 First prototype """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.max_symbols = literal_eval(self.GetParameter("max_symbols")) self.capital = literal_eval(self.GetParameter("capital")) self.SetStartDate(2020, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.benchmark = "SPY" self.AddEquity(self.benchmark, Resolution.Daily) self.SetBenchmark(self.benchmark) self.history = pd.DataFrame() self.test_acc = 0 self.train_days = timedelta(30) # Training on the last quarter self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.update_history) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) self.Schedule.On(every_day, at(16, 0), self.update_history) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData] def train_model(self): if len(self.history) > 10000: x, y = self.get_data(self.Time - self.train_days, self.Time) fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), scoring="accuracy", fit_params=fit_params) self.test_acc = np.mean(cv_scores) self.model.fit(x, (y > 0).astype(float), **fit_params) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}") self.Plot("ML", "Test Accuracy", self.test_acc) def trade(self): if len(self.history) > 10000: model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50% x_pred = self.get_data(self.Time-timedelta(5), self.Time, with_target=False) y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() positions = (y_proba - 0.5) * 2 * model_confidence # TODO: Fix risk management if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100% self.Plot("ML", "Prediction", y_proba.mean()) for symbol, position in positions[abs(positions) > 0.01].items(): self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}") self.SetHoldings(symbol, position) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def update_history(self): tickers = list(self.ActiveSecurities.Keys) new_data = self.History(tickers, 5, Resolution.Minute) self.history = pd.concat([self.history, new_data]) def get_data(self, start, end, with_target=True): # TODO: Add daily dataset update time_index = self.history.index.get_level_values("time") time_filter = (time_index >= start) & (time_index <= end) minute_bars = self.history.loc[time_filter] day_bars = idx.filter_bars(minute_bars, "09:30", "16:30") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(day_bars, "09:30", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha"] = min5_bar.eval("(high-low)/open") features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close*volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close features = features.dropna().query("abs(gap) >= 0.02") if with_target: trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) target = trade_day_bar.eval("close/open-1").apply(np.log1p) index = target.dropna().index.intersection(features.index) return features.loc[index], target.loc[index] else: return features
""" Big Bertha Strategy with Machine Learning @version: 0.6 @creation date: 05/07/2022 First prototype """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.max_symbols = literal_eval(self.GetParameter("max_symbols")) self.capital = literal_eval(self.GetParameter("capital")) self.SetStartDate(2020, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.test_acc = 0 self.train_days = timedelta(30) # Training on the last quarter self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) self.Train(self.DateRules.MonthStart(), self.TimeRules.At(0, 0), self.train_model) self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.At(9, 35), self.trade) self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.At(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData] def train_model(self): x, y = self.get_data(self.Time - self.train_days, self.Time) fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), scoring="accuracy", fit_params=fit_params) self.test_acc = np.mean(cv_scores) self.model.fit(x, (y > 0).astype(float), **fit_params) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}") self.Plot("ML", "Test Accuracy", self.test_acc) def trade(self): model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50% x_pred = self.get_data(self.Time-timedelta(5), self.Time, with_target=False) y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() positions = (y_proba - 0.5) * 2 * model_confidence # TODO: Fix risk management if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100% self.Plot("ML", "Prediction", y_proba.mean()) for symbol, position in positions[abs(positions) > 0.01].items(): self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}") self.SetHoldings(symbol, position) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def get_data(self, start, end, with_target=True): tickers = np.random.choice(list(self.ActiveSecurities.Keys), size=self.max_symbols, replace=False).tolist() minute_bars = self.History(tickers, start, end, Resolution.Minute) day_bars = idx.filter_bars(minute_bars, "09:30", "16:30") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(day_bars, "09:30", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha"] = min5_bar.eval("(high-low)/open") features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close*volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close features = features.dropna().query("abs(gap) >= 0.02") if with_target: trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) target = trade_day_bar.eval("close/open-1").apply(np.log1p) index = target.dropna().index.intersection(features.index) return features.loc[index], target.loc[index] else: return features
""" Big Bertha Strategy with Machine Learning - Implementing offline data storage to avoid symbols limitation @version: 0.7 @creation date: 05/07/2022 First prototype """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2020, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.test_acc = 0 self.features, self.targets = None, None self.train_days = 63 # Training on the last quarter self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.update_data) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.get_dataset_days() < 21: return self.Debug(f"{self.Time} Training") x, y = self.get_data(self.train_days) fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), scoring="accuracy", fit_params=fit_params) self.test_acc = np.mean(cv_scores) self.model.fit(x, (y > 0).astype(float), **fit_params) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}") self.Plot("ML", "Test Accuracy", self.test_acc) def trade(self): model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50% if model_confidence <= 0: return self.Debug(f"{self.Time} Trading") x_pred = self.get_data(1, with_target=False) x_pred.query("time == @self.Time.date()") y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() positions = (y_proba - 0.5) * 2 * model_confidence # TODO: Fix risk management if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100% self.Plot("ML", "Prediction", y_proba.mean()) for symbol, position in positions[abs(positions) > 0.01].items(): self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}") self.SetHoldings(symbol, position) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def update_data(self): trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7), self.Time - timedelta(1)) last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, trade_days))[-1].Date start = last_day.replace(hour=9, minute=30, second=0) end = self.Time.replace(hour=9, minute=35, second=0) tickers = [ticker for ticker in list(self.ActiveSecurities.Keys) if str(ticker) not in self.benchmark] minute_bars = self.History(tickers, start, end, Resolution.Minute) features = self.calculate_features(minute_bars).dropna() self.features = pd.concat([self.features, features], copy=False) targets = self.calculate_targets(minute_bars).dropna() self.targets = pd.concat([self.targets, targets], copy=False) memory = self.features.memory_usage(deep=True).sum() memory += self.targets.memory_usage(deep=True) self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB") def calculate_features(self, minute_bars): day_bars = idx.filter_bars(minute_bars, "09:31", "16:30") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(day_bars, "09:31", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha"] = min5_bar.eval("(high-low)/open") features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close*volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close return features def calculate_targets(self, minute_bars): trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) return trade_day_bar.eval("close/open-1").apply(np.log1p) def get_data(self, n_points, with_target=True): if with_target: common_index = self.targets.index.intersection(self.features.index) y = self.targets.loc[common_index].groupby("symbol").tail(n_points) x = self.features.loc[y.index] return x, y else: return self.features.groupby("symbol").tail(n_points) def get_dataset_days(self): return len(self.features.index.get_level_values("time").unique()) \ if self.features is not None else 0
""" Big Bertha Strategy with Machine Learning - Implementing offline data storage to avoid symbols limitation @version: 0.8 @creation date: 05/07/2022 First prototype """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2020, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.test_acc = 0 self.features, self.targets = None, None self.train_days = 63 # Training on the last quarter self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.update_data) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.get_dataset_days() < 21: return # At least a month of data to train the model self.Debug(f"{self.Time} Training") x, y = self.get_train_data() fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), scoring="accuracy", fit_params=fit_params) self.test_acc = np.mean(cv_scores) self.model.fit(x, (y > 0).astype(float), **fit_params) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}") self.Plot("ML", "Test Accuracy", self.test_acc) def trade(self): model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50% if model_confidence <= 0: return self.Debug(f"{self.Time} Trading") x_pred = self.get_pred_data() y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() trades = y_proba[(y_proba < 0.3) | (y_proba > 0.7)] positions = (trades - 0.5) * 2 * model_confidence # TODO: Fix risk management # Max portfolio size 100% including shorts for symbol, position in positions.items(): self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}") self.SetHoldings(symbol, position) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def update_data(self): trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7), self.Time - timedelta(1)) last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, trade_days))[-1].Date start = last_day.replace(hour=9, minute=30, second=0) end = self.Time.replace(hour=9, minute=35, second=0) tickers = [ticker for ticker in list(self.ActiveSecurities.Keys) if str(ticker) not in self.benchmark] minute_bars = self.History(tickers, start, end, Resolution.Minute) features = self.calculate_features(minute_bars).dropna() self.features = pd.concat([self.features, features]).drop_duplicates() targets = self.calculate_targets(minute_bars).dropna() self.targets = pd.concat([self.targets, targets]).drop_duplicates() memory = self.features.memory_usage(deep=True).sum() memory += self.targets.memory_usage(deep=True) self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB") def calculate_features(self, minute_bars): day_bars = idx.filter_bars(minute_bars, "09:31", "16:30") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(day_bars, "09:31", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha"] = min5_bar.eval("(high-low)/open") features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close*volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close return features def calculate_targets(self, minute_bars): trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) return trade_day_bar.eval("close/open-1").apply(np.log1p) def get_train_data(self): common_index = self.targets.index.intersection(self.features.index) y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days) x = self.features.loc[y.index] return x, y def get_pred_data(self): return self.features.query("time == @self.Time.date()") def get_dataset_days(self): return len(self.features.index.get_level_values("time").unique()) \ if self.features is not None else 0
""" Big Bertha Strategy with Machine Learning - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Risk management with stop loss @version: 0.9 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2020, 10, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.test_acc = 0 self.features, self.targets = None, None self.train_days = 252 # Training on the last year of data self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.update_data) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.get_dataset_days() < 21: return # At least a month of data to train the model self.Debug(f"{self.Time} Training") x, y = self.get_train_data() fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), scoring="accuracy", fit_params=fit_params) self.test_acc = np.mean(cv_scores) self.model.fit(x, (y > 0).astype(float), **fit_params) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}") self.Plot("ML", "Test Accuracy", self.test_acc) def trade(self): model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50% if model_confidence <= 0: return self.Debug(f"{self.Time} Trading") x_pred = self.get_pred_data() y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() trades = y_proba[(y_proba < 0.25) | (y_proba > 0.75)] positions = (trades - 0.5) * 2 * model_confidence # TODO: Fix risk management Max portfolio size 100% including shorts for symbol, position in positions.items(): self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}") qty = self.CalculateOrderQuantity(symbol, position) limit_price = self.Securities[symbol].Price self.LimitOrder(symbol, qty, limit_price) stop_price = 0 if qty > 0 else 2 * limit_price self.StopMarketOrder(symbol, -qty, stop_price) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def update_data(self): trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7), self.Time - timedelta(1)) last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, trade_days))[-1].Date start = last_day.replace(hour=9, minute=30, second=0) end = self.Time.replace(hour=9, minute=35, second=0) tickers = [ticker for ticker in list(self.ActiveSecurities.Keys) if str(ticker) not in self.benchmark] minute_bars = self.History(tickers, start, end, Resolution.Minute) features = self.calculate_features(minute_bars).dropna() self.features = pd.concat([self.features, features]).drop_duplicates() targets = self.calculate_targets(minute_bars).dropna() self.targets = pd.concat([self.targets, targets]).drop_duplicates() memory = self.features.memory_usage(deep=True).sum() memory += self.targets.memory_usage(deep=True) self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB") def calculate_features(self, minute_bars): day_bars = idx.filter_bars(minute_bars, "09:31", "16:00") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(day_bars, "09:31", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha"] = min5_bar.eval("(high-low)/open") features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close*volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close return features def calculate_targets(self, minute_bars): trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) return trade_day_bar.eval("close/open-1").apply(np.log1p) def get_train_data(self): common_index = self.targets.index.intersection(self.features.index) y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days) x = self.features.loc[y.index] return x, y def get_pred_data(self): return self.features.query("time == @self.Time.date()")
""" Big Bertha Strategy with Machine Learning Done - Offline data storage to avoid symbols limitation - Trade execution on high probability trades Todo - Risk management with stop loss @version: 0.9 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2020, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.confidence = 0 self.features, self.targets = None, None self.train_days = 252 # Training on the last year of data self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.update_data) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return self.Debug(f"{self.Time} Training") x, y = self.get_train_data() fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), scoring="accuracy", fit_params=fit_params) self.confidence = max(np.mean(cv_scores) - 0.5, 0) * 2 # 100% if accuracy 100%, 0% if below 50% self.model.fit(x, (y > 0).astype(float), **fit_params) self.Debug(f"{self.Time} Points:{len(x)} Confidence:{self.confidence:.1%}") self.Plot("ML", "Confidence", self.confidence) def trade(self): if self.confidence <= 0: return self.Debug(f"{self.Time} Trading") x_pred = self.get_pred_data() y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() trades = y_proba[(y_proba < 0.25) | (y_proba > 0.75)] positions = (trades - 0.5) * 2 * self.confidence # TODO: Fix risk management Max portfolio size 100% including shorts for symbol, position in positions.items(): self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}") self.SetHoldings(symbol, position) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def update_data(self): trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7), self.Time - timedelta(1)) last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, trade_days))[-1].Date start = last_day.replace(hour=9, minute=30, second=0) end = self.Time.replace(hour=9, minute=35, second=0) tickers = [ticker for ticker in list(self.ActiveSecurities.Keys) if str(ticker) not in self.benchmark] minute_bars = self.History(tickers, start, end, Resolution.Minute) features = self.calculate_features(minute_bars).dropna() self.features = pd.concat([self.features, features]).drop_duplicates() targets = self.calculate_targets(minute_bars).dropna() self.targets = pd.concat([self.targets, targets]).drop_duplicates() memory = self.features.memory_usage(deep=True).sum() memory += self.targets.memory_usage(deep=True) self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB") def calculate_features(self, minute_bars): day_bars = idx.filter_bars(minute_bars, "09:31", "16:00") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(day_bars, "09:31", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha"] = min5_bar.eval("(high-low)/open") features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close*volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close return features def calculate_targets(self, minute_bars): trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) return trade_day_bar.eval("close/open-1").apply(np.log1p) def get_train_data(self): common_index = self.targets.index.intersection(self.features.index) y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days) x = self.features.loc[y.index] return x, y def get_pred_data(self): return self.features.query("time == @self.Time.date()") def get_dataset_days(self): return len(self.features.index.get_level_values("time").unique()) \ if self.features is not None else 0
""" Big Bertha Strategy with Machine Learning Done - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades Todo - Risk management with stop loss @version: 0.10 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2020, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.confidence = 0 self.features, self.targets = None, None self.train_days = 252 # Training on the last year of data self.model = GradientBoostingClassifier(warm_start=True, n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.update_data) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return self.Debug(f"{self.Time} Training") x, y = self.get_train_data() fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), scoring="accuracy", fit_params=fit_params) self.confidence = max(np.mean(cv_scores) - 0.5, 0) * 2 # 100% if accuracy 100%, 0% if below 50% self.model.fit(x, (y > 0).astype(float), **fit_params) self.Debug(f"{self.Time} Points:{len(x)} Confidence:{self.confidence:.1%}") self.Plot("ML", "Confidence", self.confidence) def trade(self): if self.confidence <= 0: return self.Debug(f"{self.Time} Trading") x_pred = self.get_pred_data() y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() self.Debug(f"Predictions: {len(trades)} - Proba {min(trades):.0%}-{max(trades):.0%}") trades = y_proba[(y_proba < 0.25) | (y_proba > 0.75)] positions = (trades - 0.5) * 2 * self.confidence # TODO: Fix risk management Max portfolio size 100% including shorts for symbol, position in positions.items(): self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}") self.SetHoldings(symbol, position) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def update_data(self): trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7), self.Time - timedelta(1)) last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, trade_days))[-1].Date start = last_day.replace(hour=9, minute=30, second=0) end = self.Time.replace(hour=9, minute=35, second=0) tickers = [ticker for ticker in list(self.ActiveSecurities.Keys) if str(ticker) not in self.benchmark] minute_bars = self.History(tickers, start, end, Resolution.Minute) features = self.calculate_features(minute_bars).dropna() self.features = pd.concat([self.features, features]).drop_duplicates() targets = self.calculate_targets(minute_bars).dropna() self.targets = pd.concat([self.targets, targets]).drop_duplicates() self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints") def calculate_features(self, minute_bars): day_bars = idx.filter_bars(minute_bars, "09:31", "16:00") day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS) pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30") pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS) min5_bars = idx.filter_bars(day_bars, "09:31", "09:35") min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS) features = pd.DataFrame() features["big_bertha_size"] = min5_bar.eval("(high-low)/open") features["big_bertha_volume"] = min5_bar["volume"] features["big_bertha_open"] = min5_bar["open"] features["close_range"] = min5_bar.eval("(close-low)/(high-low)") features["open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close*volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close return features def calculate_targets(self, minute_bars): trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55") trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS) return trade_day_bar.eval("close/open-1").apply(np.log1p) def get_train_data(self): common_index = self.targets.index.intersection(self.features.index) y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days) x = self.features.loc[y.index] return x, y def get_pred_data(self): return self.features.query("time == @self.Time.date()") def get_dataset_days(self): return len(self.features.index.get_level_values("time").unique()) \ if self.features is not None else 0
""" Big Bertha Strategy with Machine Learning Done - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades Todo - Risk management with stop loss @version: 0.11 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.accuracy = None self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=3) # TODO: Evaluate Grid search for different parameters at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_data) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return self.Debug(f"{self.Time} Training") x, y = self.get_train_data() fit_params = dict(sample_weight=abs(y)) cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float), cv=10, fit_params=fit_params) self.accuracy = np.mean(cv_scores) self.model.fit(x, (y > 0).astype(float)) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}") self.Plot("ML", "Accuracy", self.accuracy) def trade(self): if self.accuracy is None: return self.Debug(f"{self.Time} Trading") x_pred = self.get_pred_data() y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1], index=x_pred.index).groupby("symbol").last() self.Debug(f"Predictions: {len(y_proba)} - Proba {min(y_proba):.0%}-{max(y_proba):.0%}") positions = (y_proba[(y_proba <= 0.4)|(y_proba >= 0.6)] - 0.5) * self.accuracy # Model and trade confidence if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used [self.SetHoldings(symbol, pos) for symbol, pos in positions.items()] def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_data(self): trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7), self.Time - timedelta(1)) last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, trade_days))[-1].Date start = last_day.replace(hour=9, minute=30, second=0) end = self.Time.replace(hour=9, minute=35, second=0) tickers = list(filter(lambda x: str(x) not in self.benchmark, self.ActiveSecurities.Keys)) minute_bars = self.History(tickers, start, end, Resolution.Minute) self.add_features(minute_bars) self.add_targets(minute_bars) self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints") def add_features(self, minute_bars): day_bar = self.agg_bars(minute_bars, "09:31", "16:00") pm_bar = self.agg_bars(minute_bars, "00:01", "09:30") min5_bar = self.agg_bars(minute_bars, "09:31", "09:35") features = min5_bar.add_prefix("bb_") features["bb_size"] = min5_bar.eval("(high-low)/open") features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)") features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close-1 features.dropna(inplace=True) if self.features is not None: new_idx = features.index.difference(self.features.index) # Removing potential duplicates self.features = pd.concat([self.features, features.loc[new_idx]]) else: self.features = features def add_targets(self, minute_bars): trading_bar = self.agg_bars(minute_bars, "09:36", "15:55") targets = trading_bar.eval("close/open-1").dropna() if self.targets is not None: new_idx = targets.index.difference(self.targets.index) # Removing potential duplicates self.targets = pd.concat([self.targets, targets.loc[new_idx]]) else: self.targets = targets def get_train_data(self): train_idx = self.targets.index.intersection(self.features.index) return self.features.loc[train_idx], self.targets.loc[train_idx] def get_pred_data(self): return self.features.query("time == @self.Time.date()") def agg_bars(self, minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Done - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades Todo - Risk management with stop loss @version: 0.12 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} RETR_PCT = 0.7 class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.accuracy = 0 self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_data) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return self.Debug(f"{self.Time} Training") x, y = self.get_train_data() cv_scores = cross_val_score(self.model, X=x, y=y, cv=10) self.accuracy = np.mean(cv_scores) self.model.fit(x, y) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}") self.Plot("ML", "Accuracy", self.accuracy) def trade(self): if self.accuracy <= 0.5: return self.Debug(f"{self.Time} Trading") x_pred = self.get_pred_data() y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_).groupby("symbol").last() actions = y_proba.idxmax(axis=1) positions = actions.apply(lambda x: 0.01 if x=="long" else -0.01 if x=="short" else 0) self.Debug(f"Predictions: {len(y_proba)} - Proba {y_proba}") for symbol, pos in positions.items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) feats = x_pred.loc[symbol].iloc[0] # TODO: Refactor window = (feats.bb_high - feats.bb_low) * RETR_PCT stop_loss = feats.bb_high - window if pos > 0 \ else feats.bb_low + window # TODO: Refactor self.StopMarketOrder(symbol, -qty, stop_loss) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_data(self): trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7), self.Time - timedelta(1)) last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, trade_days))[-1].Date start = last_day.replace(hour=9, minute=30, second=0) end = self.Time.replace(hour=9, minute=35, second=0) tickers = list(filter(lambda x: str(x) not in self.benchmark, self.ActiveSecurities.Keys)) minute_bars = self.History(tickers, start, end, Resolution.Minute) new_features = self.calc_features(minute_bars).dropna() if self.features is not None: new_idx = new_features.index.difference(self.features.index) # Removing potential duplicates self.features = pd.concat([self.features, new_features.loc[new_idx]]) else: self.features = new_features new_targets = self.calc_targets(minute_bars).dropna() if self.targets is not None: new_idx = new_targets.index.difference(self.targets.index) # Removing potential duplicates self.targets = pd.concat([self.targets, new_targets.loc[new_idx]]) else: self.targets = new_targets self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints") def calc_features(self, minute_bars): day_bar = agg_bars(minute_bars, "09:31", "16:00") pm_bar = agg_bars(minute_bars, "00:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") features = min5_bar.add_prefix("bb_") features["bb_size"] = min5_bar.eval("(high-low)/open") features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)") features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close-1 return features def calc_targets(self, minute_bars): trading_bar = agg_bars(minute_bars, "09:36", "15:55") min5_bar = agg_bars(minute_bars, "09:31", "09:35") trading_bar = trading_bar.join(min5_bar.add_prefix("bb_")) return trading_bar.apply(calc_exit_price, axis=1) def get_train_data(self): train_idx = self.targets.index.intersection(self.features.index) return self.features.loc[train_idx], self.targets.loc[train_idx] def get_pred_data(self): return self.features.query("time == @self.Time.date()") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS) def calc_exit_price(row, retr_pct=RETR_PCT): window = (row.bb_high-row.bb_low)*retr_pct if row.close > row.open: # long trade stop_loss = row.bb_high - window target = "long" if row.low > stop_loss else "pass" # 1 if profitable long and not touching the SL else: # short trade stop_loss = row.bb_low + window target = "short" if row.high < stop_loss else "pass" # -1 if profitable short and not touching the SL return target
""" Big Bertha Strategy with Machine Learning Done - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades Todo - Risk management with stop loss @version: 0.13 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} RETR_PCT = 0.7 EXT_PCT = 0.39 class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.accuracy = None self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_data) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return self.Debug(f"{self.Time} Training") x, y = self.get_train_data() cv_scores = cross_val_score(self.model, X=x, y=y, cv=10) self.accuracy = np.mean(cv_scores) self.model.fit(x, y) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}") self.Plot("ML", "Accuracy", self.accuracy) def trade(self): if self.accuracy is None: return self.Debug(f"{self.Time} Trading") x_pred = self.get_pred_data() y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_).groupby("symbol").last() actions = y_proba.idxmax(axis=1) positions = actions.apply(lambda x: 0.01 if x=="long" else -0.01 if x=="short" else 0) self.Debug(f"Predictions: {len(y_proba)} - Proba {y_proba}") for symbol, pos in positions.items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) feats = x_pred.loc[symbol].iloc[0] # TODO: Refactor window = (feats.bb_high - feats.bb_low) stop_loss = feats.bb_high - window * RETR_PCT if pos > 0 \ else feats.bb_low + window * RETR_PCT # TODO: Refactor take_profit = feats.bb_high + window * EXT_PCT if pos > 0 \ else feats.bb_low - window * EXT_PCT self.StopMarketOrder(symbol, -qty, stop_loss) # TODO: Need to cancel TP when SL and viceversa self.LimitMarketOrder(symbol, -qty, take_profit) # TODO: Need to cancel TP when SL and viceversa def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_data(self): trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7), self.Time - timedelta(1)) last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, trade_days))[-1].Date start = last_day.replace(hour=9, minute=30, second=0) end = self.Time.replace(hour=9, minute=35, second=0) tickers = list(filter(lambda x: str(x) not in self.benchmark, self.ActiveSecurities.Keys)) minute_bars = self.History(tickers, start, end, Resolution.Minute) new_features = self.calc_features(minute_bars).dropna() if self.features is not None: new_idx = new_features.index.difference(self.features.index) # Removing potential duplicates self.features = pd.concat([self.features, new_features.loc[new_idx]]) else: self.features = new_features new_targets = self.calc_targets(minute_bars).dropna() if self.targets is not None: new_idx = new_targets.index.difference(self.targets.index) # Removing potential duplicates self.targets = pd.concat([self.targets, new_targets.loc[new_idx]]) else: self.targets = new_targets self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints") def calc_features(self, minute_bars): day_bar = agg_bars(minute_bars, "09:31", "16:00") pm_bar = agg_bars(minute_bars, "00:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") features = min5_bar.add_prefix("bb_") features["bb_size"] = min5_bar.eval("(high-low)/open") features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)") features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close-1 return features def calc_targets(self, minute_bars): trading_bar = agg_bars(minute_bars, "09:36", "15:55") min5_bar = agg_bars(minute_bars, "09:31", "09:35") trading_bar = trading_bar.join(min5_bar.add_prefix("bb_")) return trading_bar.apply(calc_exit_target, axis=1) def get_train_data(self): train_idx = self.targets.index.intersection(self.features.index) return self.features.loc[train_idx], self.targets.loc[train_idx] def get_pred_data(self): return self.features.query("time == @self.Time.date()") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS) def calc_exit_target(price_bar, retr_pct=RETR_PCT, ext_pct=EXT_PCT): window = (price_bar.bb_high - price_bar.bb_low) if price_bar.close > price_bar.open: # long trade # take_profit = data_bar.bb_high + window * ext_pct TODO: Not used yet, would need to compare TP/SL timing stop_loss = price_bar.bb_high - window * retr_pct profitable_long = (price_bar.low > stop_loss) \ and (price_bar.close > price_bar.open) target = "long" if profitable_long else "pass" # 1 if profitable long and not touching the SL else: # short trade # take_profit = data_bar.bb_low - window * ext_pct TODO: Not used yet, would need to compare TP/SL timing stop_loss = price_bar.bb_low + window * retr_pct profitable_short = (price_bar.high < stop_loss) \ and (price_bar.close < price_bar.open) target = "short" if profitable_short else "pass" # -1 if profitable short and not touching the SL return target
""" Big Bertha Strategy with Machine Learning Done - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Triple barrier target with TP and SL @version: 0.14 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} RETRACEMENT_SL = 0.7 EXTENSION_TP = 0.39 class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.accuracy = None self.features, self.targets = pd.DataFrame(), pd.DataFrame() self.model = GradientBoostingClassifier(n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_data) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if len(self.features) == 0: return # No training data available self.Debug(f"{self.Time} Training") x, y = self.get_train_data() cv_scores = cross_val_score(self.model, X=x, y=y, cv=10, scoring="balanced_accuracy") self.accuracy = np.mean(cv_scores) self.model.fit(x, y) self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}") self.Plot("ML", "Accuracy", self.accuracy) def trade(self): if self.accuracy is None: return self.Debug(f"{self.Time} Trading") x_pred = self.get_pred_data().droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) positions_func = lambda x: x[1] if x[1] > 0.5 \ else -x[-1] if x[-1] > 0.5 else 0 positions = y_proba.apply(positions_func, axis=1) * self.accuracy if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.Debug(f"Predictions: {len(y_proba)} - Proba {y_proba}") for symbol, pos in positions.items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) stop_loss = features.bb_high - window * RETRACEMENT_SL if pos > 0 \ else features.bb_low + window * RETRACEMENT_SL # TODO: Refactor #take_profit = features.bb_high + window * EXTENSION_TP if pos > 0 \ # else features.bb_low - window * EXTENSION_TP self.StopMarketOrder(symbol, -qty, stop_loss) # TODO: Need to cancel TP when SL and viceversa #self.LimitOrder(symbol, -qty, take_profit) # TODO: Need to cancel TP when SL and viceversa def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_data(self): trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7), self.Time - timedelta(1)) last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, trade_days))[-1].Date start = last_day.replace(hour=9, minute=30, second=0) end = self.Time.replace(hour=9, minute=35, second=0) tickers = list(filter(lambda x: str(x) not in self.benchmark, self.ActiveSecurities.Keys)) minute_bars = self.History(tickers, start, end, Resolution.Minute) new_features = self.calc_features(minute_bars).dropna() new_idx = new_features.index.difference(self.features.index) # Removing potential duplicates self.features = pd.concat([self.features, new_features.loc[new_idx]]) new_targets = self.calc_targets(minute_bars).dropna() new_idx = new_targets.index.difference(self.targets.index) # Removing potential duplicates self.targets = pd.concat([self.targets, new_targets.loc[new_idx]]) self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints") def calc_features(self, minute_bars): day_bar = agg_bars(minute_bars, "09:31", "16:00") pm_bar = agg_bars(minute_bars, "00:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") features = min5_bar.add_prefix("bb_") features["bb_size"] = min5_bar.eval("(high-low)/open") features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)") features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)") features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) features["gap"] = day_bar["open"] / yesterday_close-1 return features def calc_targets(self, minute_bars): trading_bar = agg_bars(minute_bars, "09:36", "15:55") min5_bar = agg_bars(minute_bars, "09:31", "09:35") trading_bar = trading_bar.join(min5_bar.add_prefix("bb_")) return trading_bar.apply(calc_exit_target, axis=1) def get_train_data(self): train_idx = self.targets.index.intersection(self.features.index) return self.features.loc[train_idx], self.targets.loc[train_idx] def get_pred_data(self): return self.features.query("time == @self.Time.date()") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS) def calc_exit_target(price_bar, retr_pct=RETRACEMENT_SL, ext_pct=EXTENSION_TP): window = (price_bar.bb_high - price_bar.bb_low) if price_bar.close > price_bar.open: # long trade # take_profit = data_bar.bb_high + window * ext_pct TODO: Not used yet, would need to compare TP/SL timing stop_loss = price_bar.bb_high - window * retr_pct target = 1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: # short trade # take_profit = data_bar.bb_low - window * ext_pct TODO: Not used yet, would need to compare TP/SL timing stop_loss = price_bar.bb_low + window * retr_pct target = -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL return target
""" Big Bertha Strategy with Machine Learning Done - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Double barrier target with SL - Implemented Pipeline with clustering/dimensionality reduction TODO: - Implement Triple Barrier with TP @version: 0.15 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.cluster import KMeans from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} RETRACEMENT_SL = 0.7 class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.score = 0 self.features, self.targets = pd.DataFrame(), pd.Series() self.pipe = Pipeline([#("scaling", MinMaxScaler()), #("clustering", KMeans(n_clusters=2)), ("model", GradientBoostingClassifier(n_iter_no_change=3, n_estimators=100))]) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_features) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) self.Schedule.On(every_day, at(15, 55), self.store_targets) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if len(self.features) == 0: return self.clean_data() training_days = self.features.index.get_level_values("time").unique() if len(training_days) < 20: return # Minimum of month of training cv_scores = cross_val_score(self.pipe, X=self.features, y=self.targets, cv=10, scoring="balanced_accuracy") self.score = np.mean(cv_scores) self.pipe.fit(self.features, self.targets) self.print(f"CV Sharpe {self.score / np.std(cv_scores):.1f}") self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}") self.Plot("ML", "Score", self.score) def trade(self): if self.score <= 1/3: return x_pred = self.features.query("time == @self.Time.date()").droplevel("time") y_pred = pd.Series(self.pipe.predict(x_pred), index=x_pred.index) positions_map = {"long": 0.1, "short": -0.1, "pass": 0} positions = y_pred.apply(lambda x: positions_map[x]) if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * RETRACEMENT_SL stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window # TODO: Refactor self.StopMarketOrder(symbol, -qty, stop_loss) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_features(self): last_day = self.get_last_day(self.Time) start = last_day.replace(hour=9, minute=30, second=0) tickers = self.get_active_tickers() minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) day_bar = agg_bars(minute_bars, "09:31", "16:00") pm_bar = agg_bars(minute_bars, "00:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") new_features = min5_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_close = day_bar["close"].groupby("symbol").shift(1) new_features["gap"] = day_bar["open"] / yesterday_close - 1 self.features = pd.concat([self.features, new_features.dropna()]) def store_targets(self): last_features = self.features.groupby("symbol").last() tickers = list(last_features.index) start = self.Time.replace(hour=9, minute=31, second=0) end = self.Time.replace(hour=15, minute=54, second=0) minute_bars = self.History(tickers, start, end, Resolution.Minute) trading_bar = agg_bars(minute_bars, "09:36", "15:54") min5_bar = agg_bars(minute_bars, "09:31", "09:35") trading_bar = trading_bar.join(min5_bar.add_prefix("bb_")) new_targets = trading_bar.apply(calc_exit_target, axis=1) self.targets = pd.concat([self.targets, new_targets.dropna()]) def clean_data(self): self.features = self.features[~self.features.index.duplicated(keep='first')] self.targets = self.targets[~self.targets.index.duplicated(keep='first')] common_idx = self.targets.index.intersection(self.features.index) self.features = self.features.loc[common_idx] self.targets = self.targets.loc[common_idx] def get_active_tickers(self): return list(filter(lambda x: str(x) not in self.benchmark, self.ActiveSecurities.Keys)) def get_last_day(self, date): start, end = date - timedelta(7), date - timedelta(1) calendar_days = self.TradingCalendar.GetTradingDays(start, end) return list(filter(lambda p: p.BusinessDay and not p.PublicHoliday, calendar_days))[-1].Date def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS) def calc_exit_target(price_bar, retr_pct=RETRACEMENT_SL): window = (price_bar.bb_high - price_bar.bb_low) if price_bar.close > price_bar.open: # long trade stop_loss = price_bar.bb_high - window * retr_pct target = "long" if price_bar.low > stop_loss else "pass" # 1 if profitable long and not touching the SL else: # short trade stop_loss = price_bar.bb_low + window * retr_pct target = "short" if price_bar.high < stop_loss else "pass" # -1 if profitable short and not touching the SL return target
""" Big Bertha Strategy with Machine Learning Done - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Double barrier target with SL TODO: Implement Triple Barrier with TP @version: 0.16 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} RETRACEMENT_SL = 1.0 class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.score = 0 self.cv = TimeSeriesSplitGroups(n_splits=10) self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_features) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) self.Schedule.On(every_day, at(15, 55), self.store_targets) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return training_days = self.features.index.get_level_values("time").unique() if len(training_days) < 20: return time_groups = self.targets.index.get_level_values("time") cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=time_groups, scoring="balanced_accuracy") self.score = np.nanmean(cv_scores) self.model.fit(self.features, self.targets) self.print(f"Training score:{self.score:.1%}") self.Plot("ML", "Score", self.score) def trade(self): kelly_size = (3 * self.score - 1) / 2 # calculating the edge like binary Kelly if kelly_size <= 0: return x_pred = self.features.groupby("symbol").last() y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * kelly_size * 0.5 # Using 50% Kelly if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * RETRACEMENT_SL stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window # TODO: Refactor self.StopMarketOrder(symbol, -qty, stop_loss) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") new_features = min5_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = min5_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) start = self.Time.replace(hour=9, minute=31, second=0) end = self.Time.replace(hour=15, minute=55, second=0) minute_bars = self.History(tickers, start, end, Resolution.Minute) min5_bar = agg_bars(minute_bars, "09:31", "09:35") trading_bar = agg_bars(minute_bars, "09:36", "15:55") trading_bar = trading_bar.join(min5_bar.add_prefix("bb_")) new_targets = trading_bar.apply(calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS) def calc_exit_target(price_bar, retr_pct=RETRACEMENT_SL): window = (price_bar.bb_high - price_bar.bb_low) if price_bar.close > price_bar.open: # long trade stop_loss = price_bar.bb_high - window * retr_pct target = +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: # short trade stop_loss = price_bar.bb_low + window * retr_pct target = -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL return target
""" Big Bertha Strategy with Machine Learning Done - Custom precision scoring - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Double barrier target with SL TODO: Implement Triple Barrier with TP @version: 0.17 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.metrics import make_scorer from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} def multi_precision(y_true, y_pred): non_zero_pred = y_pred!=0 matches = y_true[non_zero_pred] == y_pred [non_zero_pred] return np.mean(matches) if len(matches)>0 else 0 class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.retracement_sl = literal_eval(self.GetParameter("retracement_sl")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.score = 0 self.scoring = make_scorer(multi_precision) self.cv = TimeSeriesSplitGroups(n_splits=10) self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_features) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) self.Schedule.On(every_day, at(15, 55), self.store_targets) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return training_days = self.features.index.get_level_values("time").unique() if len(training_days) < 20: return time_groups = self.targets.index.get_level_values("time") cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=time_groups, scoring=self.scoring) self.score = np.mean(cv_scores) self.model.fit(self.features, self.targets) self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}") self.Plot("ML", "Score", self.score) def trade(self): if self.score == 0: return x_pred = self.features.groupby("symbol").last() y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * self.score * 0.1 if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * self.retracement_sl stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window # TODO: Refactor self.StopMarketOrder(symbol, -qty, stop_loss) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") new_features = min5_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = min5_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) start = self.Time.replace(hour=9, minute=31, second=0) end = self.Time.replace(hour=15, minute=55, second=0) minute_bars = self.History(tickers, start, end, Resolution.Minute) min5_bar = agg_bars(minute_bars, "09:31", "09:35") trading_bar = agg_bars(minute_bars, "09:36", "15:55") trading_bar = trading_bar.join(min5_bar.add_prefix("bb_")) new_targets = trading_bar.apply(self.calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def calc_exit_target(self, price_bar): window = (price_bar.bb_high - price_bar.bb_low) if price_bar.close > price_bar.open: # long trade stop_loss = price_bar.bb_high - window * self.retracement_sl target = +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: # short trade stop_loss = price_bar.bb_low + window * self.retracement_sl target = -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL return target def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Done - Custom precision scoring - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Double barrier target with SL @version: 0.18 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.retracement_sl = literal_eval(self.GetParameter("retracement_sl")) self.kelly_frac = literal_eval(self.GetParameter("kelly_frac")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.score = 0 self.cv = TimeSeriesSplitGroups(n_splits=10) self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_features) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) self.Schedule.On(every_day, at(15, 55), self.store_targets) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return training_days = self.features.index.get_level_values("time").unique() if len(training_days) < 20: return time_groups = self.targets.index.get_level_values("time") fit_params = dict(sample_weight=abs(np.log1p(self.targets))) y_binary = (self.targets > 0).astype(float) cv_scores = cross_val_score(self.model, X=self.features, y=y_binary, cv=self.cv, groups=time_groups, fit_params=fit_params) self.score = np.mean(cv_scores) self.model.fit(self.features, y_binary, **fit_params) self.print(f"Training: {y_binary.value_counts()} Score:{self.score:.1%}") self.Plot("ML", "Score", self.score) def trade(self): edge = self.score - (1 - self.score) if edge <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * edge * self.kelly_frac if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions.items(): self.SetHoldings(symbol, pos) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") new_features = min5_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = min5_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) start = self.Time.replace(hour=9, minute=36, second=0) end = self.Time.replace(hour=15, minute=55, second=0) minute_bars = self.History(tickers, start, end, Resolution.Minute) trading_bar = agg_bars(minute_bars, "09:36", "15:55") new_targets = trading_bar.eval("close/open - 1") self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Done - Custom precision scoring - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Double barrier target with SL @version: 0.18 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.retracement_sl = literal_eval(self.GetParameter("retracement_sl")) self.kelly_frac = literal_eval(self.GetParameter("kelly_frac")) self.capital = literal_eval(self.GetParameter("capital")) self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.score = 0 self.cv = TimeSeriesSplitGroups(n_splits=10) self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=3) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_features) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) self.Schedule.On(every_day, at(15, 55), self.store_targets) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return training_days = self.features.index.get_level_values("time").unique() if len(training_days) < 20: return time_groups = self.targets.index.get_level_values("time") cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=time_groups, scoring="balanced_accuracy") self.score = np.mean(cv_scores) self.model.fit(self.features, self.targets) self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}") self.Plot("ML", "Score", self.score) def trade(self): edge = (3 * self.score - 1) / 2 if edge <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * edge * self.kelly_frac if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * self.retracement_sl stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window # TODO: Refactor self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") new_features = min5_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = min5_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) start = self.Time.replace(hour=9, minute=31, second=0) end = self.Time.replace(hour=15, minute=55, second=0) minute_bars = self.History(tickers, start, end, Resolution.Minute) min5_bar = agg_bars(minute_bars, "09:31", "09:35") trading_bar = agg_bars(minute_bars, "09:36", "15:55") trading_bar = trading_bar.join(min5_bar.add_prefix("bb_")) new_targets = trading_bar.apply(self.calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def calc_exit_target(self, price_bar): window = (price_bar.bb_high - price_bar.bb_low) if price_bar.close > price_bar.open: # long trade stop_loss = price_bar.bb_high - window * self.retracement_sl target = +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: # short trade stop_loss = price_bar.bb_low + window * self.retracement_sl target = -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL return target def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Done - Custom precision scoring - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Double barrier target with SL @version: 0.19 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.retracement_sl = literal_eval(self.GetParameter("retracement_sl")) self.kelly_frac = literal_eval(self.GetParameter("kelly_frac")) self.capital = literal_eval(self.GetParameter("capital")) self.use_sl = literal_eval(self.GetParameter("use_sl")) self.target_gain = literal_eval(self.GetParameter("target_gain")) self.strategy = self.GetParameter("strategy") self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=10) self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=10) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_features) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) self.Schedule.On(every_day, at(15, 55), self.store_targets) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return training_days = self.features.index.get_level_values("time").unique() if len(training_days) <= 10: return time_groups = self.targets.index.get_level_values("time") cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=time_groups, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) score = np.mean(np.nan_to_num(cv_scores, 0)) n_classes = len(self.model.classes_) self.edge = (n_classes * score - 1) / (n_classes - 1) self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}") self.Plot("ML", "Edge", self.edge) def trade(self): if self.edge <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * (self.edge * self.kelly_frac).clip(0, 1) if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) if self.use_sl: features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * self.retracement_sl stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window # TODO: Refactor self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") new_features = min5_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = min5_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) start = self.Time.replace(hour=9, minute=31, second=0) end = self.Time.replace(hour=15, minute=55, second=0) minute_bars = self.History(tickers, start, end, Resolution.Minute) min5_bar = agg_bars(minute_bars, "09:31", "09:35") trading_bar = agg_bars(minute_bars, "09:36", "15:55") trading_bar = trading_bar.join(min5_bar.add_prefix("bb_")) new_targets = trading_bar.apply(self.calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def calc_exit_target(self, price_bar): window = (price_bar.bb_high - price_bar.bb_low) if price_bar.close >= price_bar.open * (1+self.target_gain) \ and "long" in self.strategy: # long trade if self.use_sl: stop_loss = price_bar.bb_high - window * self.retracement_sl return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif price_bar.close <= price_bar.open * (1 - self.target_gain) \ and "short" in self.strategy: # short trade if self.use_sl: stop_loss = price_bar.bb_low + window * self.retracement_sl return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Done - Custom precision scoring - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Double barrier target with SL @version: 0.20 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from ast import literal_eval from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume")) self.retracement_sl = literal_eval(self.GetParameter("retracement_sl")) self.target_gain = literal_eval(self.GetParameter("target_gain")) self.kelly_frac = literal_eval(self.GetParameter("kelly_frac")) self.capital = literal_eval(self.GetParameter("capital")) self.use_sl = literal_eval(self.GetParameter("use_sl")) self.strategy = self.GetParameter("strategy") self.benchmark = self.GetParameter("benchmark") self.SetStartDate(2021, 1, 1) self.SetEndDate(2022, 1, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=10) self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=10) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_features) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) self.Schedule.On(every_day, at(15, 55), self.store_targets) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return training_days = self.features.index.get_level_values("time").unique() if len(training_days) <= 10: return time_groups = self.targets.index.get_level_values("time") cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=time_groups, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) score = np.mean(np.nan_to_num(cv_scores, 0)) n_classes = len(self.model.classes_) self.edge = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}") self.Plot("ML", "Edge", self.edge) def trade(self): if self.edge <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * (self.edge * self.kelly_frac).clip(0, 1) if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) if self.use_sl: features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * self.retracement_sl stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window # TODO: Simplify SL? self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") min5_bar = agg_bars(minute_bars, "09:31", "09:35") new_features = min5_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = min5_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): # TODO: Run it only before training last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) start = self.Time.replace(hour=9, minute=36, second=0) end = self.Time.replace(hour=15, minute=55, second=0) minute_bars = self.History(tickers, start, end, Resolution.Minute) trading_bar = agg_bars(minute_bars, "09:36", "15:55") trading_bar = trading_bar.join(last_features) new_targets = trading_bar.apply(self.calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def calc_exit_target(self, price_bar): # TODO: Simplify SL? window = (price_bar.bb_high - price_bar.bb_low) if price_bar.close >= price_bar.open * (1+self.target_gain) \ and "long" in self.strategy: # long trade if self.use_sl: stop_loss = price_bar.bb_high - window * self.retracement_sl return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif price_bar.close <= price_bar.open * (1 - self.target_gain) \ and "short" in self.strategy: # short trade if self.use_sl: stop_loss = price_bar.bb_low + window * self.retracement_sl return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Done - Custom precision scoring - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Double barrier target with SL @version: 0.21 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 0) self.target_gain = self.GetParameter("target_gain", 0.05) self.kelly_frac = self.GetParameter("kelly_frac", 0.25) self.capital = self.GetParameter("capital", 80000) self.use_sl = self.GetParameter("use_sl", 0) self.retracement_sl = self.GetParameter("retracement_sl", 1) self.strategy = self.GetParameter("strategy", "long_short") self.benchmark = self.GetParameter("benchmark", "SPY") self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=10) self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=10) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.store_features) self.Schedule.On(every_day, at(9, 35), self.trade) self.Schedule.On(every_day, at(15, 55), self.stop_trading) self.Schedule.On(every_day, at(15, 55), self.store_targets) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = self.features.index.get_level_values("time") if len(training_days.unique()) <= 10: return # Require more than 10 days of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.edge = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}") self.Plot("ML", "Edge", self.edge) def trade(self): if self.edge <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * self.edge * self.kelly_frac if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) if self.use_sl: features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * self.retracement_sl stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window # TODO: Simplify SL? self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) def stop_trading(self): self.Transactions.CancelOpenOrders() self.Liquidate() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") bertha_bar = agg_bars(minute_bars, "09:31", "09:35") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) start = self.Time.replace(hour=9, minute=36, second=0) end = self.Time.replace(hour=15, minute=55, second=0) minute_bars = self.History(tickers, start, end, Resolution.Minute) trading_bar = agg_bars(minute_bars, "09:36", "15:55") trading_bar = trading_bar.join(last_features) new_targets = trading_bar.apply(self.calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def calc_exit_target(self, price_bar): window = (price_bar.bb_high - price_bar.bb_low) if price_bar.close >= price_bar.open * (1 + self.target_gain) \ and "long" in self.strategy: # long trade if self.use_sl: stop_loss = price_bar.bb_high - window * self.retracement_sl return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif price_bar.close <= price_bar.open * (1 - self.target_gain) \ and "short" in self.strategy: # short trade if self.use_sl: stop_loss = price_bar.bb_low + window * self.retracement_sl return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Done - Custom precision scoring - New Features (bb volume and open) - Offline data storage to avoid symbols limitation - Trade execution on high probability trades - Double barrier target with SL @version: 0.22 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 0) self.target_gain = self.GetParameter("target_gain", 0.05) self.kelly_frac = self.GetParameter("kelly_frac", 0.25) self.capital = self.GetParameter("capital", 80000) self.use_sl = self.GetParameter("use_sl", 0) self.retracement_sl = self.GetParameter("retracement_sl", 1) self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=10) self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=10) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = self.features.index.get_level_values("time") if len(training_days.unique()) <= 10: return # Require more than 10 days of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.edge = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}") self.Plot("ML", "Edge", self.edge) def enter_trades(self): self.store_features() if self.edge <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * self.edge * self.kelly_frac if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) if self.use_sl: features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * self.retracement_sl stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = self.Time.hour, self.Time.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) now = self.Time minute_bars = self.History(tickers, now - timedelta(minutes=1), now, Resolution.Minute) try: trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.apply(self.calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") except (KeyError, ValueError) as e: self.print(e) return def calc_exit_target(self, price_bar): entry_price, exit_price = price_bar.bb_close, price_bar.close window = (price_bar.bb_high - price_bar.bb_low) if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade if self.use_sl: stop_loss = price_bar.bb_high - window * self.retracement_sl return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade if self.use_sl: stop_loss = price_bar.bb_low + window * self.retracement_sl return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.23: lookback parameter @version: 0.23 @creation date: 05/07/2022 """ from AlgorithmImports import * import numpy as np import pandas as pd from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 0) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter in the trade self.kelly_frac = self.GetParameter("kelly_frac", 0.25) # Kelly ratio to use for the position sizing self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.use_sl = self.GetParameter("use_sl", 0) # Use or not the Stop Loss (0/1) self.retracement_sl = self.GetParameter("retracement_sl", 1) # Retracement percentage to use for the Stop Loss self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.features, self.targets = None, None self.model = GradientBoostingClassifier(n_iter_no_change=10) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = self.features.index.get_level_values("time") if len(training_days.unique()) <= self.cv_splits: return # Require more than 10 days of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.edge = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}") self.Plot("ML", "Edge", self.edge) def enter_trades(self): self.store_features() if self.edge <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * self.edge * self.kelly_frac if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) if self.use_sl: features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * self.retracement_sl stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = self.Time.hour, self.Time.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) now = self.Time minute_bars = self.History(tickers, now - timedelta(minutes=1), now, Resolution.Minute) try: trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.apply(self.calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") except (KeyError, ValueError) as e: self.print(e) return def calc_exit_target(self, price_bar): entry_price, exit_price = price_bar.bb_close, price_bar.close window = (price_bar.bb_high - price_bar.bb_low) if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade if self.use_sl: stop_loss = price_bar.bb_high - window * self.retracement_sl return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade if self.use_sl: stop_loss = price_bar.bb_low + window * self.retracement_sl return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.24: Offline model storage v0.23: Lookback parameter @version: 0.24 @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import numpy as np import pandas as pd from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 0) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter in the trade self.kelly_frac = self.GetParameter("kelly_frac", 0.25) # Kelly ratio to use for the position sizing self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.use_sl = self.GetParameter("use_sl", 0) # Use or not the Stop Loss (0/1) self.retracement_sl = self.GetParameter("retracement_sl", 1) # Retracement percentage to use for the Stop Loss self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if needs to be stored self.SetStartDate(2021, 6, 1) #self.SetEndDate(2022, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) #self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: self.model = GradientBoostingClassifier(n_iter_no_change=10) self.model.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.features, self.targets = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = self.features.index.get_level_values("time") if len(training_days.unique()) <= 21: return # Require more than one month of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}") self.Plot("ML", "Edge", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index) positions = y_pred * self.model.kelly * self.kelly_frac if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) if self.use_sl: features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * self.retracement_sl stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = self.Time.hour, self.Time.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") tickers = list(last_features.index.get_level_values("symbol")) now = self.Time minute_bars = self.History(tickers, now - timedelta(minutes=1), now, Resolution.Minute) try: trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.apply(self.calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") except (KeyError, ValueError) as e: self.print(e) return def calc_exit_target(self, price_bar): entry_price, exit_price = price_bar.bb_close, price_bar.close window = (price_bar.bb_high - price_bar.bb_low) if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade if self.use_sl: stop_loss = price_bar.bb_high - window * self.retracement_sl return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade if self.use_sl: stop_loss = price_bar.bb_low + window * self.retracement_sl return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.25 @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import numpy as np import pandas as pd from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 0) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.use_sl = self.GetParameter("use_sl", 0) # Use or not the Stop Loss (0/1) self.retracement_sl = self.GetParameter("retracement_sl", 1) # Retracement percentage to use for the Stop Loss self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) # self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: self.model = GradientBoostingClassifier(n_iter_no_change=10) self.model.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.features, self.targets = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = idx.get_level_values("time") if len(training_days.unique()) <= 21: return # Require more than one month of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}") self.Plot("ML", "Edge", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > 50% and scaling to 100% positions = y_proba.idxmax(axis=1) * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_proba.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) if self.use_sl: features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) * self.retracement_sl stop_loss = features.bb_high - window if pos > 0 \ else features.bb_low + window self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0) tickers = list(self.ActiveSecurities.Keys) minute_bars = self.History(tickers, start, self.Time, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = self.Time.hour, self.Time.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") self.Log(last_features) tickers = list(last_features.index.get_level_values("symbol")) now = self.Time minute_bars = self.History(tickers, now - timedelta(minutes=1), now, Resolution.Minute) self.Log(minute_bars) try: trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.apply(self.calc_exit_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") except (KeyError, ValueError) as e: self.print(e) return def calc_exit_target(self, price_bar): entry_price, exit_price = price_bar.bb_close, price_bar.close window = (price_bar.bb_high - price_bar.bb_low) if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade if self.use_sl: stop_loss = price_bar.bb_high - window * self.retracement_sl return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade if self.use_sl: stop_loss = price_bar.bb_low + window * self.retracement_sl return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.26: Adding both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.26 @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import numpy as np import pandas as pd from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.sl_retr = self.GetParameter("retracement_sl", 0) # Retracement percentage to use for the Stop Loss, disabled if 0 self.tp_ext = self.GetParameter("extension_tp", 0) # Extension percentage to use for the Take Profit, disabled if 0 self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) #self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: self.model = GradientBoostingClassifier(n_iter_no_change=10) self.model.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.features, self.targets = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = idx.get_level_values("time") if len(training_days.unique()) <= 21: return # Require more than one month of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}") self.Plot("ML", "Edge", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) y_pred = y_proba.idxmax(axis=1) sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > 50% and scaling to 100% positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) if self.sl_retr > 0: stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \ else features.bb_low + window * self.sl_retr self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) if self.tp_ext > 0: take_profit = features.bb_low + window * self.tp_ext if pos > 0 \ else features.bb_high - window * self.tp_ext self.LimitOrder(symbol, -qty, take_profit) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") def store_targets(self): last_features = self.features.query("time == @self.Time.date()") self.Log(last_features) tickers = list(last_features.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) self.Log(f"Target time: {last_minute}") minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) self.Log(minute_bars) try: trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.apply(self.calc_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") except (KeyError, ValueError) as e: self.print(e) return def calc_target(self, price_bar): entry_price, exit_price = price_bar.bb_close, price_bar.close window = (price_bar.bb_high - price_bar.bb_low) if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade if self.sl_retr > 0: stop_loss = price_bar.bb_high - window * self.sl_retr return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade if self.sl_retr > 0: stop_loss = price_bar.bb_low + window * self.sl_retr return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.28 @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import numpy as np import pandas as pd from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} def catch_errors(func): def wrap(self, *args, **kwargs): try: result = func(self, *args, **kwargs) return result except (KeyError, ValueError) as e: self.print(e) return return wrap class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal self.sl_retr = self.GetParameter("retracement_sl", 0) # Retracement percentage to use for the Stop Loss, disabled if 0 self.tp_ext = self.GetParameter("extension_tp", 0) # Extension percentage to use for the Take Profit, disabled if 0 self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) #self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: self.model = GradientBoostingClassifier(n_iter_no_change=10) self.model.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.features, self.targets = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = idx.get_level_values("time") if len(training_days.unique()) <= 21: return # Require more than one month of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}") self.Plot("ML", "Edge", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) y_pred = y_proba.idxmax(axis=1) scaling = 1 / (1 - self.min_proba) sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) if self.sl_retr > 0: stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \ else features.bb_low + window * self.sl_retr self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) if self.tp_ext > 0: take_profit = features.bb_low + window * self.tp_ext if pos > 0 \ else features.bb_high - window * self.tp_ext self.LimitOrder(symbol, -qty, take_profit) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @catch_errors def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") @catch_errors def store_targets(self): last_features = self.features.query("time == @self.Time.date()") self.Log(last_features) tickers = list(last_features.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) self.Log(f"Target time: {last_minute}") minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) self.Log(minute_bars) trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.apply(self.calc_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def calc_target(self, price_bar): entry_price, exit_price = price_bar.bb_close, price_bar.close window = (price_bar.bb_high - price_bar.bb_low) if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade if self.sl_retr > 0: stop_loss = price_bar.bb_high - window * self.sl_retr return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade if self.sl_retr > 0: stop_loss = price_bar.bb_low + window * self.sl_retr return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: V0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.28 @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import numpy as np import pandas as pd from sklearn.pipeline import Pipeline from sklearn.dummy import DummyClassifier from sklearn.naive_bayes import GaussianNB from sklearn.cluster import DBSCAN, OPTICS, KMeans from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from sklearn.ensemble import GradientBoostingClassifier from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.model_selection import GridSearchCV, cross_val_score from sklearn.gaussian_process import GaussianProcessClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} def catch_errors(func): def wrap(self, *args, **kwargs): try: result = func(self, *args, **kwargs) return result except (KeyError, ValueError) as e: self.print(e) return return wrap class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal self.sl_retr = self.GetParameter("retracement_sl", 0.0) # Retracement percentage to use for the Stop Loss, disabled if 0 self.tp_ext = self.GetParameter("extension_tp", 0.0) # Extension percentage to use for the Take Profit, disabled if 0 self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) #self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: pipe = Pipeline([("scaling", None), ("clustering", None), ("model", LogisticRegression())]) params = dict(scaling=[None, # MinMaxScaler(), StandardScaler()], clustering=[None, # KMeans(), # OPTICS(), DBSCAN()], model=[DummyClassifier(), # LogisticRegression(), # KNeighborsClassifier(), # GaussianProcessClassifier(), # GaussianNB(), GradientBoostingClassifier(n_iter_no_change=3)]) self.model = GridSearchCV(pipe, param_grid=params) self.model.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.features, self.targets = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = idx.get_level_values("time") if len(training_days.unique()) <= 21: return # Require more than one month of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}") self.print(f"Best model: {self.model.best_estimator_}") self.Log(pd.DataFrame(self.model.cv_results_).to_string()) self.Plot("ML", "Edge", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) y_pred = y_proba.idxmax(axis=1) scaling = 1 / (1 - self.min_proba) sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) if self.sl_retr > 0: stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \ else features.bb_low + window * self.sl_retr self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) if self.tp_ext > 0: take_profit = features.bb_high + window * self.tp_ext if pos > 0 \ else features.bb_low - window * self.tp_ext self.LimitOrder(symbol, -qty, take_profit) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @catch_errors def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") @catch_errors def store_targets(self): last_features = self.features.query("time == @self.Time.date()") self.Log(last_features) tickers = list(last_features.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) self.Log(f"Target time: {last_minute}") minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) self.Log(minute_bars) trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.apply(self.calc_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def calc_target(self, price_bar): entry_price, exit_price = price_bar.bb_close, price_bar.close window = (price_bar.bb_high - price_bar.bb_low) if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade if self.sl_retr > 0: stop_loss = price_bar.bb_high - window * self.sl_retr return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade if self.sl_retr > 0: stop_loss = price_bar.bb_low + window * self.sl_retr return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.29 @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import numpy as np import pandas as pd from sklearn.pipeline import Pipeline from sklearn.dummy import DummyClassifier from sklearn.cluster import DBSCAN from sklearn.linear_model import LogisticRegression from sklearn.ensemble import GradientBoostingClassifier from sklearn.preprocessing import StandardScaler from sklearn.model_selection import GridSearchCV, cross_val_score import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} def catch_errors(func): def wrap(self, *args, **kwargs): try: result = func(self, *args, **kwargs) return result except (KeyError, ValueError) as e: self.print(e) return return wrap class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal self.sl_retr = self.GetParameter("retracement_sl", 0.0) # Retracement percentage to use for the Stop Loss, disabled if 0 self.tp_ext = self.GetParameter("extension_tp", 0.0) # Extension percentage to use for the Take Profit, disabled if 0 self.entry_mns = self.GetParameter("entry_mn", 5) # Entry time (minutes after 9.30) self.exit_mns = self.GetParameter("exit_mn", 385) # Exit time (minutes after 9.30) self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) #self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: pipe = Pipeline([("scaling", None), ("clustering", None), ("model", LogisticRegression())]) params = dict(scaling=[None, StandardScaler()], clustering=[None, DBSCAN()], model=[DummyClassifier(), GradientBoostingClassifier(n_iter_no_change=3)]) self.model = GridSearchCV(pipe, param_grid=params) self.model.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.features, self.targets = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) entry_hr, entry_mn = 9 + self.entry_mns // 60, 30 + self.entry_mns % 60 exit_hr, exit_mn = 9 + self.exit_mns // 60, 30 + self.exit_mns % 60 self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(entry_hr, entry_mn), self.enter_trades) self.Schedule.On(every_day, at(exit_hr, exit_mn), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = idx.get_level_values("time") if len(training_days.unique()) <= 21: return # Require more than one month of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}") self.print(f"Best model: {self.model.best_estimator_}") self.Log(pd.DataFrame(self.model.cv_results_).to_string()) self.Plot("ML", "Edge", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) y_pred = y_proba.idxmax(axis=1) scaling = 1 / (1 - self.min_proba) sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) # TODO: Change to Limit Order? features = x_pred.loc[symbol] window = (features.bb_high - features.bb_low) if self.sl_retr > 0: stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \ else features.bb_low + window * self.sl_retr self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss) if self.tp_ext > 0: take_profit = features.bb_high + window * self.tp_ext if pos > 0 \ else features.bb_low - window * self.tp_ext self.LimitOrder(symbol, -qty, take_profit) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @catch_errors def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") @catch_errors def store_targets(self): last_features = self.features.query("time == @self.Time.date()") self.Log(last_features) tickers = list(last_features.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) self.Log(f"Target time: {last_minute}") minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) self.Log(minute_bars) trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.apply(self.calc_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def calc_target(self, price_bar): entry_price, exit_price = price_bar.bb_close, price_bar.close window = (price_bar.bb_high - price_bar.bb_low) if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade if self.sl_retr > 0: stop_loss = price_bar.bb_high - window * self.sl_retr return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade if self.sl_retr > 0: stop_loss = price_bar.bb_low + window * self.sl_retr return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.30 @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import numpy as np import pandas as pd from sklearn.model_selection import cross_val_score from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} def catch_errors(func): def wrap(self, *args, **kwargs): try: result = func(self, *args, **kwargs) return result except (KeyError, ValueError) as e: self.print(e) return return wrap class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) #self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: self.model = GradientBoostingClassifier(n_iter_no_change=3) self.model.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.features, self.targets = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] days = idx.get_level_values("time") if len(days.unique()) <= 21: return # Require more than one month of training data params = dict(sample_weight=abs(self.targets)) # Weighting each sample by its importance targets_bin = self.targets.apply(lambda x: +1 if x > self.target_gain else -1 if x < -self.target_gain else 0) cv_scores = cross_val_score(self.model, X=self.features, y=targets_bin, cv=self.cv, groups=days, fit_params=params, scoring="balanced_accuracy") self.model.fit(self.features, targets_bin, **params) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {targets_bin.value_counts()} Edge:{self.model.kelly:.1%}") self.Plot("ML", "Edge", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) y_pred = y_proba.idxmax(axis=1) scaling = 1 / (1 - self.min_proba) sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) # TODO: Change to Limit Order? def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @catch_errors def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") @catch_errors def store_targets(self): last_features = self.features.query("time == @self.Time.date()") self.Log(last_features) tickers = list(last_features.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) self.Log(f"Target time: {last_minute}") minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) self.Log(minute_bars) trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.eval("close / bb_close - 1") # Calculate the trading return self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.30a @creation date: 05/07/2022 """ import numpy as np from AlgorithmImports import * import pickle import sklearn import pandas as pd from sklearn.ensemble import GradientBoostingClassifier import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} def catch_errors(func): def wrap(self, *args, **kwargs): try: result = func(self, *args, **kwargs) return result except (KeyError, ValueError) as e: self.print(e) return return wrap class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) #self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: self.model = GradientBoostingClassifier(n_iter_no_change=3) self.model.kelly = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.x, self.y = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.x is None or self.y is None: return idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.x, self.y = self.x.loc[idx], self.y.loc[idx] days = idx.get_level_values("time") if len(days.unique()) <= 21: return # Require more than one month of training data y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0) # Replace cross_val_score with custom loop to use sample weighted metrics scores = [] for train_idx, test_idx in self.cv.split(self.x, groups=days): model_temp = sklearn.base.clone(self.model) x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx] y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx] w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx] model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns y_pred = model_temp.predict(x_test) scores += [kelly_pos(y_test, y_pred, sample_weight=abs(w_test))] # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y))) self.model.kelly = 0 if np.isnan(np.nanmean(scores)) else np.nanmean(scores) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) self.print(f"Training: {y_bin.value_counts()} Edge:{self.model.kelly:.1%}") self.Plot("ML", "Edge", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.x.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) y_pred = y_proba.idxmax(axis=1) sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.model.kelly.clip(0, 1) # Sizing based on Kelly and individual probability if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) # TODO: Change to Limit Order? def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @catch_errors def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_features.dropna(), self.x]) self.print(f"Stored new features, total: {len(self.x)}") @catch_errors def store_targets(self): last_x = self.x.query("time == @self.Time.date()") self.Log(last_x) tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) self.Log(f"Target time: {last_minute}") minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) self.Log(minute_bars) trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1") # Calculate the trading return self.y = pd.concat([new_y.dropna(), self.y]) self.print(f"Stored new targets, total: {len(self.y)}") def print(self, msg): self.Debug(f"{self.Time} {msg}") self.Log(f"{self.Time} {msg}") def kelly_pos(y_true, y_pred, sample_weight=None): # TODO: differentiate between losses on 0 and on 1/-1 trades = y_pred!=0 wins = y_true[trades]==y_pred[trades] win_rate = wins.mean() loss_rate = 1-win_rate avg_win = sample_weight[trades][wins].mean() avg_loss = sample_weight[trades][~wins].mean() return win_rate/avg_loss - loss_rate/avg_win def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.31a @creation date: 05/07/2022 """ import numpy as np from AlgorithmImports import * import pickle import sklearn import pandas as pd from sklearn.ensemble import GradientBoostingClassifier import utils as utl import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) # self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: self.model = GradientBoostingClassifier(n_iter_no_change=3) self.model.kelly = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.x, self.y = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.x is None or self.y is None: return idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] days = idx.get_level_values("time") if len(days.unique()) <= self.cv_splits: return # Days required for CV y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0) # Replace cross_val_score with custom loop to use sample weighted metrics scores = [] for train_idx, test_idx in self.cv.split(self.x, groups=days): model_tmp = sklearn.base.clone(self.model) x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx] y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx] w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx] model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns y_pred = model_tmp.predict(x_test) scores += [ utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y))) self.model.kelly = np.clip(np.nanmean(scores), 0, 1) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) self.print(f"Training: {y_bin.value_counts()} Kelly:{self.model.kelly:.1%}") self.Plot("ML", "Kelly", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.x.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) y_pred = y_proba.idxmax(axis=1) sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probability if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) # TODO: Change to Limit Order? def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @utl.catch_errors def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_features.dropna(), self.x]) self.print(f"Stored new features, total: {len(self.x)}") @utl.catch_errors def store_targets(self): last_x = self.x.query("time == @self.Time.date()") self.Log(last_x) tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) self.Log(f"Target time: {last_minute}") minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) self.Log(minute_bars) trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1") # Calculate the trading return self.y = pd.concat([new_y.dropna(), self.y]) self.print(f"Stored new targets, total: {len(self.y)}") def print(self, msg): self.Debug(f"{self.Time} {msg}") self.Log(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.32a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.ensemble import GradientBoostingClassifier import utils as utl from timeseriescv import TimeSeriesSplitGroups class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 11, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) # self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: self.model = GradientBoostingClassifier(n_iter_no_change=3) self.model.kelly = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.x, self.y = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.x is None or self.y is None: return idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] days = idx.get_level_values("time") if len(days.unique()) <= self.cv_splits: return # Days required for CV y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0) # Replace cross_val_score with custom loop to use sample weighted metrics scores = [] for train_idx, test_idx in self.cv.split(self.x, groups=days): model_tmp = sklearn.base.clone(self.model) x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx] y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx] w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx] model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns y_pred = model_tmp.predict(x_test) scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y))) self.model.kelly = np.clip(np.nanmean(scores), 0, 1) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) self.print(f"Training: {y_bin.value_counts()} Kelly:{self.model.kelly:.1%}") self.Plot("ML", "Kelly", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.x.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) self.print(f"Predictions: {y_proba}") self.Notify.Email("hb_beawai@googlegroups.com", "Big Bertha Predictions", y_proba) y_pred = y_proba.idxmax(axis=1) sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probability if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) # TODO: Change to Limit Order? def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @utl.catch_errors def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") @utl.catch_errors def store_targets(self): last_x = self.x.query("time == @self.Time.date()") tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def print(self, msg): self.Debug(f"{self.Time} {msg}") self.Log(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.33a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.ensemble import GradientBoostingClassifier import utils as utl from timeseriescv import TimeSeriesSplitGroups class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 11, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) # self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: self.model = GradientBoostingClassifier(n_iter_no_change=3) self.model.kelly = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.x, self.y = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.x is None or self.y is None: return idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] days = idx.get_level_values("time") if len(days.unique()) <= self.cv_splits: return # Days required for CV y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0) # Replace cross_val_score with custom loop to use sample weighted metrics scores = [] for train_idx, test_idx in self.cv.split(self.x, groups=days): model_tmp = sklearn.base.clone(self.model) x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx] y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx] w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx] model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns y_pred = model_tmp.predict(x_test) scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y))) self.model.kelly = np.clip(np.nanmean(scores), 0, 1) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) self.print(f"Training: {y_bin.value_counts()} Kelly:{self.model.kelly:.1%}") self.Plot("ML", "Kelly", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.x.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) self.print(f"Predictions: {y_proba.to_string()}") self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html()) self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html()) y_pred = y_proba.idxmax(axis=1) sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probability if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) # TODO: Change to Limit Order? def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @utl.catch_errors def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") @utl.catch_errors def store_targets(self): last_x = self.x.query("time == @self.Time.date()") tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def print(self, msg): self.Debug(f"{self.Time} {msg}") self.Log(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.34a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.ensemble import GradientBoostingClassifier import utils as utl from timeseriescv import TimeSeriesSplitGroups class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 4) # Number of splits for model cross validation self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 11, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") if not self.LiveMode: # Store data during backtest self.Schedule.On(every_day, at(16, 0), self.save_data) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if ((self.Time - self.last_training).days < 30 and (self.kelly > 0)) \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= self.cv_splits * 10: return y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0) # Replace cross_val_score with custom loop to use sample weighted metrics scores = [] days = self.x.index.get_level_values("time") for train_idx, test_idx in self.cv.split(self.x, groups=days): model_tmp = sklearn.base.clone(self.model) x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx] y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx] w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx] model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns y_pred = model_tmp.predict(x_test) scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y))) self.last_training = self.Time self.kelly = np.clip(np.nanmean(scores), 0, 1) self.print(f"Training: {y_bin.value_counts()} Kelly:{self.kelly:.1%}") self.Plot("ML", "Kelly", self.kelly) def enter_trades(self): self.calc_features() if self.kelly <= 0: return x_pred = self.x.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) self.print(f"Predictions: {y_proba.to_string()}") self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html()) self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html()) y_pred = y_proba.idxmax(axis=1) sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.kelly # Sizing based on Kelly and individual probability if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) # TODO: Change to Limit Order? def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.calc_targets() @utl.catch_errors def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") @utl.catch_errors def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.35a: CV and training improvements v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.35a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingClassifier import utils as utl class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.025) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.SetStartDate(2021, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") if not self.LiveMode: # Store data during backtest self.Schedule.On(every_day, at(16, 0), self.save_data) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if (self.Time - self.last_training).days < 7 \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= 21: return y_bin = self.y.apply(lambda x: +1 if x >= self.target_gain and self.strategy >= 0 else -1 if x <= -self.target_gain and self.strategy <= 0 else 0) weight = self.y - self.target_gain # TODO: How to use log with -100% returns? model_temp = sklearn.base.clone(self.model) x_train, x_test, y_train, y_test, w_train, w_test = train_test_split( self.x, y_bin, weight, train_size=0.5, shuffle=False) model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns y_pred = model_temp.predict(x_test) score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight))) self.kelly = np.nan_to_num(score).clip(0, 1) self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n" f"Kelly: {self.kelly:.1%}\n") self.Plot("ML", "Score", self.kelly) def enter_trades(self): # TODO: Balance Long/Short Trades? self.calc_features() x_pred = self.x.query("time == @self.Time.date()") if self.kelly <= 0 or len(x_pred) == 0: return x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) self.print(f"Predictions: {y_proba.to_string()}") self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html()) self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html()) y_pred = y_proba.idxmax(axis=1) sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.kelly # Sizing based on Kelly and individual probability if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) # TODO: Change to Limit Order? def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.calc_targets() def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") if len(last_x) == 0: return tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) if len(minute_bars) == 0: return trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.36a: Long Short version (market exposure parameter) v0.35a: CV and training improvements v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.36a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingClassifier import utils as utl class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.exposure = self.GetParameter("exposure", 0.5) # 0 100% short, 1 100% long, 0.5 market neutral self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.n_positions = self.GetParameter("n_positions", 10) # Number of total positions per day self.n_longs = int(self.n_positions * self.exposure) self.n_shorts = self.n_positions - self.n_longs self.SetStartDate(2021, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") if not self.LiveMode: # Store data during backtest self.Schedule.On(every_day, at(16, 0), self.save_data) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if (self.Time - self.last_training).days < 7 \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= 21: return y_bin = (self.y > 0).astype(float) weight = self.y # TODO: How to use log with -100% returns? model_temp = sklearn.base.clone(self.model) x_train, x_test, y_train, y_test, w_train, w_test = train_test_split( self.x, y_bin, weight, train_size=0.5, shuffle=False) model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns y_pred = model_temp.predict(x_test) score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight))) self.kelly = np.nan_to_num(score).clip(0, 1) self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n" f"Kelly: {self.kelly:.1%}\n") self.Plot("ML", "Score", self.kelly) def enter_trades(self): # TODO: Balance Long/Short Trades? self.calc_features() x_pred = self.x.query("time == @self.Time.date()") if self.kelly <= 0 or len(x_pred) == 0: return x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1], index=x_pred.index).sort_values(ascending=False) self.print(f"Predictions: {y_pred.to_string()}") self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_pred.to_string()) self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_pred.to_string()) longs = y_pred.head(self.n_longs) longs_pos = longs * self.exposure / longs.sum() # Normalizing by confidence (proba UP) shorts = y_pred.tail(self.n_shorts) shorts_pos = -(1 - shorts) * (1 - self.exposure) / (1 - shorts).sum() # Normalizing by confidence (1 - proba UP) positions = pd.concat([longs_pos, shorts_pos]) * self.kelly self.print(f"Positions {positions}") for symbol, pos in positions.items(): self.SetHoldings(symbol, pos) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.calc_targets() def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") if len(last_x) == 0: return tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) if len(minute_bars) == 0: return trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.37a: Long Short with probability threshold v0.36a: Long Short version (market exposure parameter) v0.35a: CV and training improvements v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.37a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingClassifier import utils as utl class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.min_proba = self.GetParameter("min_proba", 0.2) # Minimum differential probability self.SetStartDate(2021, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") if not self.LiveMode: # Store data during backtest self.Schedule.On(every_day, at(16, 0), self.save_data) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if (self.Time - self.last_training).days < 7 \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= 21: return y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both weight = self.y # TODO: How to use log with -100% returns? model_temp = sklearn.base.clone(self.model) x_train, x_test, y_train, y_test, w_train, w_test = train_test_split( self.x, y_bin, weight, train_size=0.5, shuffle=False) model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns y_pred = model_temp.predict(x_test) score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight))) self.kelly = np.nan_to_num(score).clip(0, 1) self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n" f"Kelly: {self.kelly:.1%}\n") self.Plot("ML", "Score", self.kelly) def enter_trades(self): # TODO: Balance Long/Short Trades? self.calc_features() x_pred = self.x.query("time == @self.Time.date()") if self.kelly <= 0 or len(x_pred) == 0: return x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1], index=x_pred.index) self.print(f"Predictions: {y_pred.to_string()}") self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_pred.to_string()) self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_pred.to_string()) proba_desc = y_pred.sort_values(ascending=False) proba_asc = y_pred.sort_values(ascending=True) pairs = dict(proba=proba_desc.values-proba_asc.values, long_sym=proba_desc.index.get_level_values("symbol"), short_sym=proba_asc.index.get_level_values("symbol")) pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba") pairs.eval("pos = proba * @self.kelly", inplace=True) if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum() self.print(f"Positions {pairs}") for _, row in pairs.iterrows(): self.SetHoldings(row["long_sym"], row["pos"]/2) self.SetHoldings(row["short_sym"], -row["pos"]/2) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.calc_targets() def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") if len(last_x) == 0: return tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) if len(minute_bars) == 0: return trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.38a: DAS Trader integration v0.37a: Long Short with probability threshold v0.36a: Long Short version (market exposure parameter) v0.35a: CV and training improvements v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.38a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingClassifier import utils as utl class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.min_proba = self.GetParameter("min_proba", 0.2) # Minimum differential probability self.SetStartDate(2021, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") if not self.LiveMode: # Store data during backtest self.Schedule.On(every_day, at(16, 0), self.save_data) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if (self.Time - self.last_training).days < 7 \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= 21: return y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both weight = self.y # TODO: How to use log with -100% returns? model_temp = sklearn.base.clone(self.model) x_train, x_test, y_train, y_test, w_train, w_test = train_test_split( self.x, y_bin, weight, train_size=0.5, shuffle=False) model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns y_pred = model_temp.predict(x_test) score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight))) self.kelly = np.nan_to_num(score).clip(0, 1) self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n" f"Kelly: {self.kelly:.1%}\n") self.Plot("ML", "Score", self.kelly) def enter_trades(self): # TODO: Balance Long/Short Trades? self.calc_features() x_pred = self.x.query("time == @self.Time.date()") if self.kelly <= 0 or len(x_pred) == 0: return x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1], index=x_pred.index) self.print(f"Predictions: {y_pred.to_string()}") self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_pred.to_string()) self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_pred.to_string()) proba_desc = y_pred.sort_values(ascending=False) proba_asc = y_pred.sort_values(ascending=True) pairs = dict(proba=proba_desc.values-proba_asc.values, long_sym=proba_desc.index.get_level_values("symbol"), short_sym=proba_asc.index.get_level_values("symbol")) pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba") pairs.eval("pos = proba * @self.kelly", inplace=True) if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum() self.print(f"Positions {pairs}") for _, row in pairs.iterrows(): self.SetHoldings(row["long_sym"], row["pos"]/2) self.SetHoldings(row["short_sym"], -row["pos"]/2) if self.LiveMode: utl.das_send_order(self, row["long_sym"], position=row["pos"] / 2) utl.das_send_order(self, row["short_sym"], position=-row["pos"] / 2) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() if self.LiveMode: utl.das_liquidate(self) self.calc_targets() def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") if len(last_x) == 0: return tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) if len(minute_bars) == 0: return trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.39a: Minor improvements and changes to DAS Integration v0.38a: DAS Trader integration v0.37a: Long Short with probability threshold v0.36a: Long Short version (market exposure parameter) v0.35a: CV and training improvements v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.38a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingClassifier import utils as utl class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability self.SetStartDate(2021, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") if not self.LiveMode: # Store data during backtest self.Schedule.On(every_day, at(16, 0), self.save_data) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if (self.Time - self.last_training).days < 7 \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= 21: return y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both weight = self.y # TODO: How to use log with -100% returns? model_temp = sklearn.base.clone(self.model) x_train, x_test, y_train, y_test, w_train, w_test = train_test_split( self.x, y_bin, weight, train_size=0.5, shuffle=False) model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns y_pred = model_temp.predict(x_test) score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight))) self.kelly = np.nan_to_num(score).clip(0, 1) self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n" f"Kelly: {self.kelly:.1%}\n") self.Plot("ML", "Score", self.kelly) def enter_trades(self): # TODO: Balance Long/Short Trades? self.calc_features() x_pred = self.x.query("time == @self.Time.date()") if self.kelly <= 0 or len(x_pred) == 0: return x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1], index=x_pred.index) self.print(f"Predictions: {y_pred.to_string()}") for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]: self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string()) proba_desc = y_pred.sort_values(ascending=False) proba_asc = y_pred.sort_values(ascending=True) pairs = dict(proba=proba_desc.values-proba_asc.values, long_sym=proba_desc.index.get_level_values("symbol"), short_sym=proba_asc.index.get_level_values("symbol")) pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba") pairs.eval("pos = proba * @self.kelly", inplace=True) if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum() self.print(f"Positions {pairs}") for _, row in pairs.iterrows(): self.SetHoldings(row["long_sym"], row["pos"]/2) self.SetHoldings(row["short_sym"], -row["pos"]/2) #if self.LiveMode: long_qty = self.CalculateOrderQuantity(row["long_sym"], row["pos"]/2) utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty) short_qty = self.CalculateOrderQuantity(row["short_sym"], -row["pos"] / 2) utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() #if self.LiveMode: utl.das_liquidate(self) self.calc_targets() def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") if len(last_x) == 0: return tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) if len(minute_bars) == 0: return trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.40a: Improvements to DAS Integration and logging v0.39a: Minor improvements and changes to DAS Integration v0.38a: DAS Trader integration v0.37a: Long Short with probability threshold v0.36a: Long Short version (market exposure parameter) v0.35a: CV and training improvements v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.40a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingClassifier import utils as utl class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability self.SetStartDate(2021, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(0, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.Schedule.On(every_day, at(16, 0), self.save_data) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if (self.Time - self.last_training).days < 7 \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= 21: return y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both weight = self.y # TODO: How to use log with -100% returns? model_temp = sklearn.base.clone(self.model) x_train, x_test, y_train, y_test, w_train, w_test = train_test_split( self.x, y_bin, weight, train_size=0.5, shuffle=False) model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns y_pred = model_temp.predict(x_test) score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight))) self.kelly = np.nan_to_num(score).clip(0, 1) self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n" f"Kelly: {self.kelly:.1%}\n") self.Plot("ML", "Score", self.kelly) def enter_trades(self): self.calc_features() x_pred = self.x.query("time == @self.Time.date()") if self.kelly <= 0 or len(x_pred) == 0: return x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1], index=x_pred.index) self.print(f"Predictions: {y_pred.to_string()}") for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]: self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string()) proba_desc = y_pred.sort_values(ascending=False) proba_asc = y_pred.sort_values(ascending=True) pairs = dict(proba=proba_desc.values-proba_asc.values, long_sym=proba_desc.index.get_level_values("symbol"), short_sym=proba_asc.index.get_level_values("symbol")) pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba") pairs.eval("pos = proba * @self.kelly", inplace=True) if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum() self.print(f"Positions {pairs}") for _, row in pairs.iterrows(): self.SetHoldings(row["long_sym"], row["pos"]/2) self.SetHoldings(row["short_sym"], -row["pos"]/2) if self.LiveMode: long_qty = self.CalculateOrderQuantity(row["long_sym"], row["pos"]/2) utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty) short_qty = self.CalculateOrderQuantity(row["short_sym"], -row["pos"] / 2) utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() if self.LiveMode: utl.das_liquidate(self) self.calc_targets() def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") if len(last_x) == 0: return tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) if len(minute_bars) == 0: return trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.41a: Added use_kelly parameter and 9am training for easier live trading v0.40a: Improvements to DAS Integration and logging v0.39a: Minor improvements and changes to DAS Integration v0.38a: DAS Trader integration v0.37a: Long Short with probability threshold v0.36a: Long Short version (market exposure parameter) v0.35a: CV and training improvements v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.40a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingClassifier import utils as utl class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability self.use_kelly = self.GetParameter("use_kelly", 1) # Whether to use the kelly criterion for sizing self.SetStartDate(2021, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(9, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.Schedule.On(every_day, at(16, 0), self.save_data) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if (self.Time - self.last_training).days < 7 \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= 21: return y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both weight = self.y # TODO: How to use log with -100% returns? model_temp = sklearn.base.clone(self.model) x_train, x_test, y_train, y_test, w_train, w_test = train_test_split( self.x, y_bin, weight, train_size=0.5, shuffle=False) model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns y_pred = model_temp.predict(x_test) score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) \ if self.use_kelly else 1 # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight))) self.kelly = np.nan_to_num(score).clip(0, 1) self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n" f"Kelly: {self.kelly:.1%}\n") self.Plot("ML", "Score", self.kelly) def enter_trades(self): self.calc_features() x_pred = self.x.query("time == @self.Time.date()") if self.kelly <= 0 or len(x_pred) == 0: return x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1], index=x_pred.index) self.print(f"Predictions: {y_pred.to_string()}") for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]: self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string()) proba_desc = y_pred.sort_values(ascending=False) proba_asc = y_pred.sort_values(ascending=True) pairs = dict(proba=proba_desc.values-proba_asc.values, long_sym=proba_desc.index.get_level_values("symbol"), short_sym=proba_asc.index.get_level_values("symbol")) pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba") pairs.eval("pos = proba * @self.kelly", inplace=True) if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum() self.print(f"Positions {pairs}") for _, row in pairs.iterrows(): self.SetHoldings(row["long_sym"], row["pos"]/2) self.SetHoldings(row["short_sym"], -row["pos"]/2) if self.LiveMode: long_qty = self.CalculateOrderQuantity(row["long_sym"], row["pos"]/2) utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty) short_qty = self.CalculateOrderQuantity(row["short_sym"], -row["pos"] / 2) utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() if self.LiveMode: utl.das_liquidate(self) self.calc_targets() def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") if len(last_x) == 0: return tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) if len(minute_bars) == 0: return trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.42a: Improved positions calculation for DAS Integration v0.41a: Added use_kelly parameter and 9am training for easier live trading v0.40a: Improvements to DAS Integration and logging v0.39a: Minor improvements and changes to DAS Integration v0.38a: DAS Trader integration v0.37a: Long Short with probability threshold v0.36a: Long Short version (market exposure parameter) v0.35a: CV and training improvements v0.34a: Storing features instead of model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.42a @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingClassifier import utils as utl class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.capital = self.GetParameter("capital", 800000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability self.use_kelly = self.GetParameter("use_kelly", 1) # Whether to use the kelly criterion for sizing self.SetStartDate(2021, 6, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.kelly = 0 self.model = GradientBoostingClassifier(n_iter_no_change=3) self.last_training = datetime(2000, 1, 1, 0, 0, 0) at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.Train(every_day, at(9, 0), self.train_model) self.Schedule.On(every_day, at(9, 35), self.enter_trades) self.Schedule.On(every_day, at(15, 55), self.exit_trades) self.Schedule.On(every_day, at(16, 0), self.save_data) self.x, self.y = None, None if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data"))) self.print(f"Loaded data {self.x.shape} {self.y.shape}") def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if (self.Time - self.last_training).days < 7 \ or self.x is None or self.y is None: return self.clean_data() days = self.x.index.get_level_values("time") if len(days.unique()) <= 21: return y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both weight = self.y # TODO: How to use log with -100% returns? model_temp = sklearn.base.clone(self.model) x_train, x_test, y_train, y_test, w_train, w_test = train_test_split( self.x, y_bin, weight, train_size=0.5, shuffle=False) model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns y_pred = model_temp.predict(x_test) score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) \ if self.use_kelly else 1 # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight))) self.kelly = np.nan_to_num(score).clip(0, 1) self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n" f"Kelly: {self.kelly:.1%}\n") self.Plot("ML", "Score", self.kelly) def enter_trades(self): self.calc_features() x_pred = self.x.query("time == @self.Time.date()") if self.kelly <= 0 or len(x_pred) == 0: return x_pred.index = x_pred.index.droplevel("time") y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1], index=x_pred.index) self.print(f"Predictions: {y_pred.to_string()}") for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]: self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string()) proba_desc = y_pred.sort_values(ascending=False) proba_asc = y_pred.sort_values(ascending=True) pairs = dict(proba=proba_desc.values-proba_asc.values, long_sym=proba_desc.index.get_level_values("symbol"), short_sym=proba_asc.index.get_level_values("symbol")) pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba") pairs.eval("pos = proba * @self.kelly", inplace=True) if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum() self.print(f"Positions {pairs}") for _, row in pairs.iterrows(): order_value = self.Portfolio.TotalPortfolioValue*row["pos"]/2 self.SetHoldings(row["long_sym"], order_value) self.SetHoldings(row["short_sym"], -order_value) if self.LiveMode: long_qty = int(order_value/self.Securities[row["long_sym"]].Price) utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty) short_qty = int(order_value/self.Securities[row["short_sym"]].Price) utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() if self.LiveMode: utl.das_liquidate(self) self.calc_targets() def calc_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / yesterday_close - 1 self.x = pd.concat([new_x.dropna(), self.x]) self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}") def calc_targets(self): last_x = self.x.query("time == @self.Time.date()") if len(last_x) == 0: return tickers = list(last_x.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) if len(minute_bars) == 0: return trading_bar = minute_bars.droplevel("time").join(last_x) new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}") def clean_data(self): idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] def save_data(self): mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True) self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb") self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y))) def print(self, msg): self.Debug(f"{self.Time} {msg}")
""" Big Bertha Strategy with Machine Learning Last changes: v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.29 @creation date: 05/07/2022 """ from AlgorithmImports import * import pickle import numpy as np import pandas as pd from sklearn.pipeline import Pipeline from sklearn.dummy import DummyClassifier from sklearn.cluster import DBSCAN from sklearn.linear_model import LogisticRegression from sklearn.ensemble import GradientBoostingClassifier from sklearn.preprocessing import StandardScaler from sklearn.model_selection import GridSearchCV, cross_val_score import indicators as idx from timeseriescv import TimeSeriesSplitGroups pd.set_option('mode.use_inf_as_na', True) GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")] AGG_OPS = {"open": "first", "close": "last", "high": "max", "low": "min", "volume": "sum"} def catch_errors(func): def wrap(self, *args, **kwargs): try: result = func(self, *args, **kwargs) return result except (KeyError, ValueError) as e: self.print(e) return return wrap class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal self.entry_mns = self.GetParameter("entry_mn", 5) # Entry time (minutes after 9.30) self.exit_mns = self.GetParameter("exit_mn", 385) # Exit time (minutes after 9.30) self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 9, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) #self.ObjectStore.Delete(self.store_model) # Deleting existing data if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model): self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model))) else: pipe = Pipeline([("scaling", None), ("clustering", None), ("model", LogisticRegression())]) params = dict(scaling=[None, StandardScaler()], clustering=[None, DBSCAN()], model=[DummyClassifier(), GradientBoostingClassifier(n_iter_no_change=3)]) self.model = GridSearchCV(pipe, param_grid=params) self.model.edge = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.features, self.targets = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) entry_hr, entry_mn = 9 + self.entry_mns // 60, 30 + self.entry_mns % 60 exit_hr, exit_mn = 9 + self.exit_mns // 60, 30 + self.exit_mns % 60 self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(entry_hr, entry_mn), self.enter_trades) self.Schedule.On(every_day, at(exit_hr, exit_mn), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData and x.DollarVolume > self.min_usd_volume] def train_model(self): if self.features is None or self.targets is None: return idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] self.features = self.features.loc[idx] self.targets = self.targets.loc[idx] training_days = idx.get_level_values("time") if len(training_days.unique()) <= 21: return # Require more than one month of training data cv_scores = cross_val_score(self.model, X=self.features, y=self.targets, cv=self.cv, groups=training_days, scoring="balanced_accuracy") self.model.fit(self.features, self.targets) if self.store_model is not None: self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model)) score = np.mean(cv_scores) n_classes = len(self.model.classes_) self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}") self.print(f"Best model: {self.model.best_estimator_}") self.Log(pd.DataFrame(self.model.cv_results_).to_string()) self.Plot("ML", "Edge", self.model.kelly) def enter_trades(self): self.store_features() if self.model.kelly <= 0: return x_pred = self.features.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) y_pred = y_proba.idxmax(axis=1) scaling = 1 / (1 - self.min_proba) sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @catch_errors def store_features(self): start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0) tickers = list(self.ActiveSecurities.Keys) last_minute = self.Time.replace(second=0, microsecond=0) minute_bars = self.History(tickers, start, last_minute, Resolution.Minute) pm_bar = agg_bars(minute_bars, "07:01", "09:30") entry_hr, entry_mn = last_minute.hour, last_minute.minute bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}") new_features = bertha_bar.add_prefix("bb_") new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_features["pm_volume_usd"] = pm_bar.eval("close * volume") yesterday_bar = self.History(tickers, 1, Resolution.Daily) yesterday_close = yesterday_bar["close"].droplevel("time") new_features["gap"] = bertha_bar["open"] / yesterday_close - 1 self.features = pd.concat([new_features.dropna(), self.features]) self.print(f"Stored new features, total: {len(self.features)}") @catch_errors def store_targets(self): last_features = self.features.query("time == @self.Time.date()") self.Log(last_features) tickers = list(last_features.index.get_level_values("symbol")) last_minute = self.Time.replace(second=0, microsecond=0) self.Log(f"Target time: {last_minute}") minute_bars = self.History(tickers, last_minute - timedelta(minutes=1), last_minute, Resolution.Minute) self.Log(minute_bars) trading_bar = minute_bars.droplevel("time").join(last_features) new_targets = trading_bar.apply(self.calc_target, axis=1) self.targets = pd.concat([new_targets.dropna(), self.targets]) self.print(f"Stored new targets, total: {len(self.targets)}") def calc_target(self, price_bar): entry_price, exit_price = price_bar.bb_close, price_bar.close window = (price_bar.bb_high - price_bar.bb_low) if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade if self.sl_retr > 0: stop_loss = price_bar.bb_high - window * self.sl_retr return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL else: return +1 elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade if self.sl_retr > 0: stop_loss = price_bar.bb_low + window * self.sl_retr return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL else: return -1 else: return 0 def print(self, msg): self.Debug(f"{self.Time} {msg}") def agg_bars(minute_bars, start_time, end_time): filtered_bars = idx.filter_bars(minute_bars, start_time, end_time) return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
""" Big Bertha Strategy with Machine Learning Last changes: v0.34a: Backward features creation and no need for pre-trained model v0.33a: Improved email notification v0.32a: Improved logging v0.31a: Minor improvements and library implementation v0.30a: New cv metric based on Kelly criterion v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance) v0.29: Parameter for entry/exit time v0.28: Gridsearch pipeline with multiple models v0.27: Minimum probability parameter v0.26: Added both TP and SL capabilities (naive triple barrier targets) v0.25: Individual probability-based sizing (in addition to general Kelly sizing) v0.24: Offline model storage v0.23: Lookback parameter @version: 0.34a @creation date: 05/07/2022 """ from AlgorithmImports import * import sklearn import pandas as pd pd.set_option('mode.use_inf_as_na', True) from sklearn.ensemble import GradientBoostingClassifier import utils as utl from timeseriescv import TimeSeriesSplitGroups class BigBerthaML(QCAlgorithm): def Initialize(self): self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade self.capital = self.GetParameter("capital", 80000) # Starting capital self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored self.entry_mns = self.GetParameter("entry_mn", 5) # Entry time (minutes after 9.30) self.exit_mns = self.GetParameter("exit_mn", 385) # Exit time (minutes after 9.30) self.SetStartDate(2021, 6, 1) self.SetEndDate(2022, 11, 1) self.SetCash(self.capital) self.UniverseSettings.Resolution = Resolution.Minute self.UniverseSettings.ExtendedMarketHours = True self.AddUniverse(self.coarse_filter) self.AddEquity(self.benchmark, Resolution.Minute) self.SetBenchmark(self.benchmark) self.model = GradientBoostingClassifier(n_iter_no_change=3) self.kelly = 0 self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits) self.x, self.y = None, None at = self.TimeRules.At every_day = self.DateRules.EveryDay(self.benchmark) self.entry_hr, self.entry_mn = 9 + self.entry_mns // 60, 30 + self.entry_mns % 60 self.exit_hr, self.exit_mn = 9 + self.exit_mns // 60, 30 + self.exit_mns % 60 self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model) self.Schedule.On(every_day, at(self.entry_hr, self.entry_mn), self.enter_trades) self.Schedule.On(every_day, at(self.exit_hr, self.exit_mn), self.exit_trades) def coarse_filter(self, coarse): return [x.Symbol for x in coarse if x.HasFundamentalData] def train_model(self): if self.x is None or self.y is None: return idx = self.x.index.intersection(self.y.index) # Removing features without matching targets idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days self.x, self.y = self.x.loc[idx], self.y.loc[idx] days = idx.get_level_values("time") if len(days.unique()) <= self.cv_splits: return # Days required for CV y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0) # Replace cross_val_score with custom loop to use sample weighted metrics scores = [] for train_idx, test_idx in self.cv.split(self.x, groups=days): model_tmp = sklearn.base.clone(self.model) x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx] y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx] w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx] model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns y_pred = model_tmp.predict(x_test) scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y))) self.kelly = np.clip(np.nanmean(scores), 0, 1) self.print(f"Training: {y_bin.value_counts()} Kelly:{self.kelly:.1%}") self.Plot("ML", "Kelly", self.kelly) def enter_trades(self): self.store_features() if self.kelly <= 0: return x_pred = self.x.query("time == @self.Time.date()") x_pred.index = x_pred.index.droplevel("time") y_proba = pd.DataFrame(self.model.predict_proba(x_pred), index=x_pred.index, columns=self.model.classes_) self.print(f"Predictions: {y_proba.to_string()}") self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html()) self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html()) y_pred = y_proba.idxmax(axis=1) sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling positions = y_pred * sizes * self.kelly # Sizing based on Kelly and individual probability if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used self.print(f"Trading: {y_pred.value_counts()}") for symbol, pos in positions[positions != 0].items(): qty = self.CalculateOrderQuantity(symbol, pos) self.MarketOrder(symbol, qty) # TODO: Change to Limit Order? def exit_trades(self): self.Transactions.CancelOpenOrders() self.Liquidate() self.store_targets() @utl.catch_errors def store_features(self): tickers = list(self.ActiveSecurities.Keys) #tickers = list(np.random.choice(tickers, size=100, replace=False)) today = self.Time.replace(hour=0, minute=0, second=0, microsecond=0) start_day = today - timedelta(self.lookback) if self.x is None else \ self.x.index.get_level_values("time").max() day_bars = self.History(tickers, start_day, today, Resolution.Daily) time_idx = day_bars.index.get_level_values("time").shift(-1, freq="D") symbol_idx = day_bars.index.get_level_values("symbol") day_bars.set_index([symbol_idx, time_idx], inplace=True) today_start = today.replace(hour=9, minute=30) today_stop = today.replace(hour=self.entry_hr, minute=self.entry_mn) today_bar = self.History(tickers, today_start, today_stop, Resolution.Minute) day_bars = day_bars.append(utl.agg_bars(today_bar)).groupby("symbol").shift(1) valid_bars = day_bars.query("close * volume >= @self.min_usd_volume") universe = valid_bars.reset_index().groupby("time")["symbol"].apply(list) for day, symbols in universe.items(): start = day.replace(hour=7, minute=1) last_minute = day.replace(hour=self.entry_hr, minute=self.entry_mn) minute_bars = self.History(symbols, start, last_minute, Resolution.Minute) if len(minute_bars) == 0: continue pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30") bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{self.entry_hr}:{self.entry_mn}") # TODO: Check 9:31 filter new_x = bertha_bar.add_prefix("bb_") new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True) new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True) new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True) new_x["pm_volume_usd"] = pm_bar.eval("close * volume") """prev_day_bar = self.History(tickers, 1, Resolution.Daily) prev_day_close = prev_day_bar["close"].droplevel("time") new_x["gap"] = bertha_bar["open"] / prev_day_close - 1""" new_x = new_x.dropna() self.x = pd.concat([new_x, self.x]) self.print(f"Stored features, total: {len(self.x)}") @utl.catch_errors def store_targets(self): last_x = self.x if self.y is None else \ self.x.loc[self.x.index.difference(self.y.index)] universe = last_x.reset_index().groupby("time")["symbol"].apply(list) for day, symbols in universe.items(): entry_time = day.replace(hour=self.entry_hr, minute=self.entry_mn) exit_time = day.replace(hour=self.exit_hr, minute=self.exit_mn) minute_bars = self.History(symbols, entry_time, exit_time, Resolution.Minute) trading_bar = utl.agg_bars(minute_bars) new_y = trading_bar.eval("close / open - 1").dropna() # Calculate the trading return self.y = pd.concat([new_y, self.y]) self.print(f"Stored targets, total: {len(self.y)}") def print(self, msg): self.Debug(f"{self.Time} {msg}")