Overall Statistics |
Total Orders 796 Average Win 0.30% Average Loss -0.27% Compounding Annual Return 22.691% Drawdown 16.700% Expectancy 0.146 Start Equity 1000000 End Equity 1161330.40 Net Profit 16.133% Sharpe Ratio 0.756 Sortino Ratio 0.832 Probabilistic Sharpe Ratio 52.130% Loss Rate 46% Win Rate 54% Profit-Loss Ratio 1.11 Alpha -0.066 Beta 1.175 Annual Standard Deviation 0.144 Annual Variance 0.021 Information Ratio -0.529 Tracking Error 0.076 Treynor Ratio 0.093 Total Fees $2246.54 Estimated Strategy Capacity $4100000.00 Lowest Capacity Asset SPY 32L2AOK4HRE5I|SPY R735QTJ8XC9X Portfolio Turnover 29.52% |
# region imports from AlgorithmImports import * # endregion # Your New Python File general_setting = { "If_manual_choose_replication": True, # If we manually choose hedge, then we will choose replication method based on replicate_method, Or compare different methods based on some metrix "replicate_method": "lasso", # "lasso" , "elasticNet", "groupSelection" "opt_method": 'Momentum', # "markowitz", "StockRanker", "Momentum" "mom_window": 7, # Calculate the window of previous n days. "mom_diff_weight" : 0.005, "rebalance_interval": "week", # biweek; week; .... "risk_pct": 0.1, "Manual_Hedge": False, # If we manually choose hedge, then we will notice if_hedge button, Or we can use other methods to choose if hedge or not "If_Hedge": True, "hedge_size": 1, # set minimum weight selection "if_preselect": True, "weight_threshold": 0.002, "pct_num_group": 0.05, "if_manual_set_number_lasso": True, "lasso_alpha": 0.3, "num_lasso": 10, # or it can set by calculate the min tracking error "if_manual_set_number_elasticNet": True, "elasticNet_alpha": 0.1, "num_elasticNet": 10, # or it can set by calculate the min tracking error "elasticNet_l1_ratio": 0.5, # Not yet set up TP/SL mechanism "Take_Profit_pct": 0.1, "Stop_Loss_pct": 0.05, # Regime Recognization "regime_method": "GMM", # SMA150, SMA200, VIX, GMM "regime_interval": "week", "regime_lookback_window": 90, # To be constructed "Index": "SPY", "stockranker_feature":['roc'], }
# region imports from AlgorithmImports import * # endregion import random from scipy.stats import norm from scipy.optimize import fsolve import pandas as pd from datetime import datetime import cvxopt as opt from cvxopt import blas, solvers from config import general_setting import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns from scipy.stats import pearsonr from scipy import stats import statsmodels.api as sm import statsmodels.formula.api as smf from statsmodels.stats.stattools import durbin_watson from sklearn.linear_model import ElasticNet, Lasso, Ridge, LinearRegression from sklearn.metrics import mean_squared_error from sklearn.preprocessing import PolynomialFeatures import joblib import os import lightgbm as lgb import math from scipy.stats import linregress from hmmlearn.hmm import GaussianHMM import scipy import datetime import json import seaborn as sns import joblib import sklearn from datetime import datetime import numpy as np import pandas as pd import statsmodels.api as sm from scipy import stats from matplotlib import cm,pyplot as plt import random class GBSM: def __init__(self, S, X, T, r, b, sigma, type): self.S = S self.X = X self.T = T self.r = r self.b = b self.sigma = sigma self.type = type self.d1=(np.log(S/X)+(b+sigma*sigma/2)*T)/(sigma*np.sqrt(T)) self.d2 = self.d1 - sigma * np.sqrt(T) self.Delta = self.GBSM_Delta() def GBSM_Delta(self): tmp = np.exp(((self.b-self.r) * self.T)) if self.type == "call": return tmp * norm.cdf(self.d1) else: return tmp * (norm.cdf(self.d1) - 1) class EnhancedIndexing(QCAlgorithm): def initialize(self): self.set_start_date(2024, 1, 1) # self.set_end_date(2016, 1, 10) self.set_cash(1000000) self.spy = self.add_equity('SPY', resolution = Resolution.MINUTE) self.spy_sma150 = self.sma("SPY", 150) self.spy_sma200 = self.sma("SPY", 200) option = self.AddOption('SPY',Resolution.MINUTE) self.option_symbol = option.Symbol # option.SetFilter(lambda universe: universe.include_weeklys()) # option.SetFilter(-10, 10, 0, 180) option.set_filter(lambda option_filter_universe: option_filter_universe.standards_only().strikes(-20, 20).expiration(0, 90)) self.contract = None self.set_benchmark("SPY") self.contract_size = None self.strike = None self.has_contract = False self.curr_spy = None # self.set_warm_up(100) # Access to General Setting self.if_manual_rep = general_setting["If_manual_choose_replication"] self.replicate_method = general_setting['replicate_method'] self.opt_method = general_setting["opt_method"] self.rebalance_interval = general_setting["rebalance_interval"] self.manual_Hedge = general_setting["Manual_Hedge"] if self.manual_Hedge: self.if_hedge = general_setting["If_Hedge"] self.hedge_size = general_setting['hedge_size'] self.pct_num_group = general_setting["pct_num_group"] self.lasso_alpha = general_setting['lasso_alpha'] self.if_manual_set_number_lasso = general_setting["if_manual_set_number_lasso"] self.num_lasso = general_setting["num_lasso"] self.elasticNet_alpha = general_setting['elasticNet_alpha'] self.elasticNet_l1_ratio = general_setting['elasticNet_l1_ratio'] self.if_manual_set_number_elasticNet = general_setting["if_manual_set_number_elasticNet"] self.num_elasticNet = general_setting["num_elasticNet"] self.regime_interval = general_setting["regime_interval"] self.regime_method = general_setting["regime_method"] self.regime_lookback_window = general_setting["regime_lookback_window"] self.if_preselect = general_setting['if_preselect'] self.weight_threshold = general_setting['weight_threshold'] self.mom_window = general_setting['mom_window'] self.risk_pct = general_setting['risk_pct'] # Add universe _universe = self.add_universe(self.universe.etf("SPY")) history = self.History(_universe, 360, Resolution.MINUTE) self.debug(history) history = history.droplevel('symbol', axis=0) self.universe_dict = {} self.ticker_list = [] for date, fundamentals in history.items(): ls = [] for fundamental in fundamentals: symbol = fundamental.symbol weight = fundamental.weight row = {'ticker': symbol.value, 'symbol': symbol, 'weight': weight} if self.if_preselect: if weight > self.weight_threshold: # if weight > 0.004: self.ticker_list.append(symbol.value) self.universe_dict[symbol.value] = self.add_equity(symbol.value, resolution = Resolution.DAILY) ls.append(row) else: continue else: self.ticker_list.append(symbol.value) self.universe_dict[symbol.value] = self.add_equity(symbol.value, resolution = Resolution.DAILY) ls.append(row) break self.init_df = pd.DataFrame(ls) self.init_df['weight_adj'] = self.init_df['weight']/self.init_df['weight'].sum() self.weight_universe = self.init_df.set_index('ticker')['weight_adj'].to_dict() self.rebalance_signal = False self.selected_stocks = [] self.portfolio_weights = {} self.latent_states_sequence = None self.best_status_sequence = None # Add Options # option = self.AddOption('SPX',Resolution.Daily) # self.option_symbol = option.Symbol # option.SetFilter(timedelta(0), timedelta(150)) # Initial a optimized weight at the begining self.Schedule.On( self.DateRules.today, self.TimeRules.now, self.regime_recognization, ) self.Schedule.On( self.DateRules.today, self.TimeRules.now, self.rebalance_weights, ) # Every Month rebalance if self.rebalance_interval == 'month': self.Schedule.On( self.DateRules.month_start(self.spy.symbol, 0), # Month Start or Month end or add days offset (daysOffset = 0), or dynamically adjust based on weekdays, earnings, FOMC ... self.TimeRules.after_market_open(self.spy.symbol, 10), # Before market or after market self.rebalance_weights, ) # Every week rebalance if self.rebalance_interval == 'week': self.Schedule.On( self.DateRules.week_start(self.spy.symbol, 0), # self.date_rules.every(DayOfWeek.THURSDAY), self.TimeRules.after_market_open(self.spy.symbol, 10),# Before market or after market self.rebalance_weights, ) if not self.manual_Hedge: self.Schedule.On( self.DateRules.today, self.TimeRules.now, self.regime_recognization, ) # Every Month recognize regimes if self.rebalance_interval == 'month': self.Schedule.On( self.DateRules.month_start(self.spy.symbol, 0), # Month Start or Month end or add days offset (daysOffset = 0), or dynamically adjust based on weekdays, earnings, FOMC ... self.TimeRules.after_market_open(self.spy.symbol, 5), # Before market or after market self.regime_recognization, ) # Every week recognize regime if self.rebalance_interval == 'week': self.Schedule.On( self.DateRules.week_start(self.spy.symbol, 0), # self.date_rules.every(DayOfWeek.THURSDAY), self.TimeRules.after_market_open(self.spy.symbol, 5),# Before market or after market self.regime_recognization, ) # Manually set event date function # date_rule = FuncDateRule( # name="10th_day_of_the_month", # get_dates_function=lambda start, end: [ # datetime(year, month, 10) # for year in range(start.year, end.year) for month in range(1,12) # ] # ) # Every 2 week rebalance # if self.rebalance_interval == 'biweek': # self.Schedule.On( # self.DateRules.week_start(self.spy.symbol,daysOffset= 0), # Month Start or Month end or add days offset, or dynamically adjust based on weekdays, earnings, FOMC ... # self.TimeRules.after_market_open(self.spy.symbol, 10), # self.rebalance_weights, # ) # Cancel schedule # self.schedule.remove(scheduled_event) def regime_recognization(self): random.seed(10) ''' 3 status: Bullish, Bearish, flat method: Machine learning: classification VIX value Other indicators ''' if self.regime_method == 'SMA200': if self.spy_sma200.current.value > self.spy.Close : self.if_hedge = True return else: self.if_hedge = False return if self.regime_method == 'SMA150': if self.spy_sma150.current.value > self.spy.Close : self.if_hedge = True return else: self.if_hedge = False return if self.regime_method == 'GMM': start_time = datetime(self.Time.year-4,self.Time.month, self.Time.day) end_time = self.Time.date data_use = self.history(self.spy.symbol, timedelta(self.regime_lookback_window), Resolution.DAILY).droplevel('symbol', axis=0) data_use = data_use.reset_index() close=data_use['close'] high=data_use['high'][5:] low=data_use['low'][5:] volume=data_use['volume'][5:] money=data_use['volume'][5:] logreturn=(np.log(np.array(close[1:]))-np.log(np.array(close[:-1])))[4:] logreturn5=(np.log(np.array(close[5:]))-np.log(np.array(close[:-5]))) diffreturn = (np.log(np.array(high))-np.log(np.array(low))) closeidx = close[5:] # data_1 = data_use.copy() # data_1['log_close'] = np.log(data_1['close']) # data_1['log_ret'] = data_1['log_close'].diff() # data_1['log_high'] = np.log(data_1['high']) # data_1['log_low'] = np.log(data_1['low']) # data_1['log_ret5'] = data_1['log_close'].diff(5) # data_1['diff_ret'] = data_1['log_high'] - data_1['log_low'] # data_1['closeidx'] = data_1['close'] X = np.column_stack([logreturn,diffreturn,logreturn5]) datelist= pd.to_datetime(data_use.time[5:]) hmm=GaussianHMM(n_components = 3, covariance_type ='diag', n_iter = 6000, random_state = 6).fit(X) try: latent_states_sequence = hmm.predict(X) except: self.if_hedge = True return # len(latent_states_sequence) # # PLOTTING # data_use_2=data_use.drop_duplicates(subset=['time'],keep='last') datelist= pd.to_datetime(data_use_2.time[5:]) # # sns.set_style("white") # plt.figure(figsize = (15,8)) for i in range(hmm.n_components): state = (latent_states_sequence ==i) # plt.plot(datelist[state],closeidx[state],'.',label='latent state %d'%i,lw =1) # plt.legend() # plt.grid(1) data=pd.DataFrame({'datelist':datelist,'logreturn':logreturn,'state':latent_states_sequence}).set_index('datelist') # plt.figure(figsize=(15,8)) max_idx = 0 max = -10000 for i in range(hmm.n_components): state = (latent_states_sequence ==i) idx = np.append(0,state[:-1]) data['state %d_return'%i] = data.logreturn.multiply(idx,axis = 0) if np.exp(data['state %d_return'%i].cumsum())[-1] > max: max_idx = i max = np.exp(data['state %d_return'%i].cumsum())[-1] # plt.plot(np.exp(data['state %d_return'%i].cumsum()),label='latent state %d'%i) # plt.legend() # plt.grid(1) # buy = (latent_states_sequence ==0) # buy = np.append(0,buy[:-1]) # sell = (latent_states_sequence == 1) # sell = np.append(0,sell[:-1]) # data['backtest_return'] = data.logreturn.multiply(buy,axis=0)-data.logreturn.multiply(sell,axis=0) # plt.figure(figsize = (15,8)) # plt.plot_date(datelist,np.exp(data['backtest_return'].cumsum()),'-',label='backtest result') # plt.legend() # plt.grid(1) self.latent_states_sequence = latent_states_sequence[-1] self.best_status_sequence = max_idx if latent_states_sequence[-1] == max_idx: self.if_hedge = False return else: self.if_hedge = True return if self.regime_method== 'VIX': ... ''' VIX vs EMA(100) ''' if self.regime_method == 'vote': ... return res def get_historical_data(self): def slope(data): return linregress(np.arange(data.shape[0]),data).slope ticker_list = self.ticker_list start_time = datetime(self.Time.year-2,self.Time.month, self.Time.day) # obtain 2 years historical data end_time = self.Time.date daily_dfs = [] fund_ = {} daily_dfs2 = [] for i in ticker_list: try: # daily_data = self.history([self.securities[i].symbol], timedelta(500), Resolution.DAILY).droplevel('symbol', axis=0)[['close']].rename(columns = {'close':i}) # daily_dfs.append(daily_data) daily_data = self.history(self.securities[i].symbol, timedelta(500), Resolution.DAILY).droplevel('symbol', axis=0)[['close']] # test_data = self.history(universe_dict[i].symbol, start_time1, end_time1).droplevel('symbol', axis=0)[['close']] daily_data1 = daily_data.rename(columns= {'close':i}) daily_data2 = daily_data.copy() daily_data2['ticker'] = i daily_data2['slope_20'] = daily_data2['close'].rolling(20,min_periods=20).apply(slope,raw=True) daily_data2['ret'] = daily_data2['close'].shift(5)/daily_data2['close'] - 1 # test_data['ticker'] = i # test_data['slope_20'] = test_data['close'].rolling(20,min_periods=20).apply(slope,raw=True) # test_data['ret'] = test_data['close'].shift(5)/test_data['close'] - 1 # daily_data['label1'] = pd.qcut(daily_data['label'],5, labels= [1,2,3,4,5]) daily_dfs.append(daily_data1) daily_dfs2.append(daily_data2) # test_dfs.append(test_data) except: continue daily_ = pd.concat(daily_dfs, axis = 1, sort = True, join = 'outer') daily_1 = daily_.dropna(axis=1).reset_index() spy_data = self.history([self.spy.symbol], timedelta(500), Resolution.DAILY).droplevel('symbol', axis=0)[['close']].rename(columns = {'close':'spy'}) daily_2 = pd.merge(daily_1, spy_data.reset_index(), on = ['time'], how = 'left') daily2_ = pd.concat(daily_dfs2, axis = 0, sort = True, join = 'outer') daily2_ = daily2_.dropna() if len(self.selected_stocks) > 0: daily2_ = daily2_[daily2_['ticker'].isin(self.selected_stocks)] latest_date = daily2_.index[-1] test_df = daily2_[daily2_.index > pd.to_datetime((self.Time.date()-timedelta(30))) ] daily2_ = daily2_[daily2_.index <= pd.to_datetime((self.Time.date()-timedelta(30))) ] daily2_ = daily2_.set_index([daily2_.index, 'ticker']).sort_index() daily2_['label'] = daily2_.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5])) test_df = test_df.set_index([test_df.index, 'ticker']).sort_index() test_df['label'] = test_df.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5])) test_df = test_df.loc[(latest_date,slice(None)),:] return daily_1, daily_2, daily2_, test_df def get_fundamental_data(self): fund_ = [] a_time = self.Time - timedelta(7) b_time = self.Time for i in self.ticker_list: single_fundamental_history = self.history[Fundamental](i, a_time, b_time) for fundamental in single_fundamental_history: if fundamental.has_fundamental_data: symbol = fundamental.symbol industry1 = fundamental.asset_classification.MorningstarSectorCode market_cap = fundamental.market_cap # self.debug(f"symbol is {symbol}, sector is {industry1}") row = {'ticker': i , 'industry1': industry1, 'market_cap': market_cap} fund_.append(row) else: continue fund_df = pd.DataFrame(fund_) fund_df=fund_df.drop_duplicates(subset = 'ticker',keep = 'last') fund_df = fund_df[fund_df['ticker'].isin([101, 102, 103, 104, 205, 206, 207, 308, 310])] MORNINGSTAR_SECTOR_CODES = { # 0 : 'Other', # xiabiande, meiyou 0 zhegexuanxiang 101: 'Basic Materials', 102: 'Consumer Cyclical', 103: 'Financial Services', 104: 'Real Estate', 205: 'Consumer Defensive', 206: 'Healthcare', 207: 'Utilities', 308: 'Communication Services', 309: 'Energy', 310: 'Industrials', 311: 'Technology' , } fund_df['sector'] = fund_df['industry1'].apply(lambda x:MORNINGSTAR_SECTOR_CODES[x]) return fund_df def replicate_portfolio(self, method = 'lasso'): random.seed(10) daily_1, daily_2, _, _ = self.get_historical_data() fund_df = self.get_fundamental_data() # train_df = daily_1[:3019] # test_df = daily_1[3020:] train_df = daily_1.copy() train_df = train_df.drop(columns = {'time'}) # test_df = test_df.drop(columns = {'time'}) train_features = train_df.copy() train_index = daily_2[['spy']] # self.debug(f"train feature length is {train_features} and train_index length is {train_features}") # test_features = test_df.copy() # test_index = spy_data.reset_index()[['spy']][3020:] assets_names = train_df.columns if method == 'lasso': lasso_reg = Lasso(self.lasso_alpha) lasso_reg.fit(train_features, train_index) # Get the model parameters intercept_lasso = lasso_reg.intercept_ coefficients_lasso = lasso_reg.coef_ # print("Lasso Intercept: \n", intercept_lasso) # print("Lasso Coefficients: \n", coefficients_lasso) # Evaluate the model y_pred_lasso = lasso_reg.predict(train_features) # Calculate the mean squared error of the predictions mse_lasso = mean_squared_error(train_index, y_pred_lasso) df_Lasso = pd.DataFrame(coefficients_lasso) df_Lasso.columns = ['weight'] df_Lasso['symbol'] = assets_names df_Lasso['abs_weight'] = abs(df_Lasso['weight']) df_Lasso_select = df_Lasso.sort_values(by=['abs_weight'], ascending= False) df_Lasso_select = df_Lasso_select.head(30) train_lasso_feature = train_features[df_Lasso_select.symbol] train_lasso_feature_T = train_lasso_feature.T.reset_index() train_lasso_feature_T = train_lasso_feature_T.rename(columns = {'index':'symbol'}) d = train_lasso_feature_T.drop(columns=['symbol']) # Calculate the replication index weights = np.array([list(df_Lasso_select['weight'])]) data = np.array(d) result = np.dot(weights, data) result = result[0] # result = weights@data # Calculate the TrackingError df_lasso_TrackingError = pd.DataFrame(train_index) df_lasso_TrackingError['RepIdx'] = result df_lasso_TrackingError['SPY_ret'] = df_lasso_TrackingError['spy'].pct_change() df_lasso_TrackingError['RepIdx_ret'] = df_lasso_TrackingError['RepIdx'].pct_change() df_lasso_TrackingError = df_lasso_TrackingError.dropna() df_lasso_TrackingError['TD'] = df_lasso_TrackingError['SPY_ret'] - df_lasso_TrackingError['RepIdx_ret'] df_lasso_TrackingError['TD_squared'] = df_lasso_TrackingError['TD']**2 TE = np.sqrt(sum(df_lasso_TrackingError['TD_squared'])/(len(df_lasso_TrackingError)-1)) num_trial = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] min_te_num = 10 min_te = 10000 TE_ = [] for i in num_trial: df_Lasso = pd.DataFrame(coefficients_lasso) df_Lasso.columns = ['weight'] df_Lasso['symbol'] = assets_names df_Lasso['abs_weight'] = abs(df_Lasso['weight']) df_Lasso_select = df_Lasso.sort_values(by=['abs_weight'], ascending= False) df_Lasso_select = df_Lasso_select.head(i) train_lasso_feature = train_features[df_Lasso_select.symbol] train_lasso_feature_T = train_lasso_feature.T.reset_index() train_lasso_feature_T = train_lasso_feature_T.rename(columns = {'index':'symbol'}) d = train_lasso_feature_T.drop(columns=['symbol']) weights = np.array([list(df_Lasso_select['weight'])]) data = np.array(d) result = np.dot(weights, data) result = result[0] df_lasso_TrackingError = pd.DataFrame(train_index) df_lasso_TrackingError['RepIdx'] = result df_lasso_TrackingError['SPY_ret'] = df_lasso_TrackingError['spy'].pct_change() df_lasso_TrackingError['RepIdx_ret'] = df_lasso_TrackingError['RepIdx'].pct_change() df_lasso_TrackingError = df_lasso_TrackingError.dropna() df_lasso_TrackingError['TD'] = df_lasso_TrackingError['SPY_ret'] - df_lasso_TrackingError['RepIdx_ret'] df_lasso_TrackingError['TD_squared'] = df_lasso_TrackingError['TD']**2 TE = np.sqrt(sum(df_lasso_TrackingError['TD_squared'])/(len(df_lasso_TrackingError)-1)) if TE < min_te: min_te = TE min_te_num = i TE_.append(TE) if self.if_manual_set_number_lasso: return list(df_Lasso_select.head(self.num_lasso)['symbol']) else: return list(df_Lasso_select.head(min_te_num)['symbol']) if method == 'elasticNet': elastic_reg = ElasticNet( alpha=self.elasticNet_alpha, l1_ratio=self.elasticNet_l1_ratio ) elastic_reg.fit(train_features, train_index) # Get the model parameters intercept_elastic = elastic_reg.intercept_ coefficients_elastic = elastic_reg.coef_ # print("Elastic Net Intercept: \n", intercept_elastic) # print("Elastic Net Coefficients: \n", coefficients_elastic) # Evaluate the model y_pred_elastic = elastic_reg.predict(train_features) # Calculate the mean squared error of the predictions mse_elastic = mean_squared_error(train_index, y_pred_elastic) df_Elastic = pd.DataFrame(coefficients_elastic) df_Elastic.columns = ['weight'] df_Elastic['symbol'] = assets_names df_Elastic['abs_weight'] = abs(df_Elastic['weight']) df_Elastic_select = df_Elastic.sort_values(by=['abs_weight'], ascending= False) df_Elastic_select = df_Elastic_select.head(50) train_Elastic_feature = train_features[df_Elastic_select.symbol] train_Elastic_feature_T = train_Elastic_feature.T.reset_index() train_Elastic_feature_T = train_Elastic_feature_T.rename(columns = {'index':'symbol'}) d = train_Elastic_feature_T.drop(columns=['symbol']) # Calculate the replication index weights = np.array([list(df_Elastic_select['weight'])]) data = np.array(d) result = np.dot(weights, data) result = result[0] # result = weights@data # Calculate the TrackingError df_Elastic_TrackingError = pd.DataFrame(train_index) df_Elastic_TrackingError['RepIdx'] = result df_Elastic_TrackingError['SPY_ret'] = df_Elastic_TrackingError['spy'].pct_change() df_Elastic_TrackingError['RepIdx_ret'] = df_Elastic_TrackingError['RepIdx'].pct_change() df_Elastic_TrackingError = df_Elastic_TrackingError.dropna() df_Elastic_TrackingError['TD'] = df_Elastic_TrackingError['SPY_ret'] - df_Elastic_TrackingError['RepIdx_ret'] df_Elastic_TrackingError['TD_squared'] = df_Elastic_TrackingError['TD']**2 TE = np.sqrt(sum(df_Elastic_TrackingError['TD_squared'])/(len(df_Elastic_TrackingError)-1)) num_trial = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] min_te_num = 10 min_te = 10000 TE_ = [] for i in num_trial: df_Elastic = pd.DataFrame(coefficients_elastic) df_Elastic.columns = ['weight'] df_Elastic['symbol'] = assets_names df_Elastic['abs_weight'] = abs(df_Elastic['weight']) df_Elastic_select = df_Elastic.sort_values(by=['abs_weight'], ascending= False) df_Elastic_select = df_Elastic_select.head(i) train_Elastic_feature = train_features[df_Elastic_select.symbol] train_Elastic_feature_T = train_Elastic_feature.T.reset_index() train_Elastic_feature_T = train_Elastic_feature_T.rename(columns = {'index':'symbol'}) d = train_Elastic_feature_T.drop(columns=['symbol']) weights = np.array([list(df_Elastic_select['weight'])]) data = np.array(d) result = np.dot(weights, data) result = result[0] df_Elastic_TrackingError = pd.DataFrame(train_index) df_Elastic_TrackingError['RepIdx'] = result df_Elastic_TrackingError['SPY_ret'] = df_Elastic_TrackingError['spy'].pct_change() df_Elastic_TrackingError['RepIdx_ret'] = df_Elastic_TrackingError['RepIdx'].pct_change() df_Elastic_TrackingError = df_Elastic_TrackingError.dropna() df_Elastic_TrackingError['TD'] = df_Elastic_TrackingError['SPY_ret'] - df_Elastic_TrackingError['RepIdx_ret'] df_Elastic_TrackingError['TD_squared'] = df_Elastic_TrackingError['TD']**2 TE = np.sqrt(sum(df_Elastic_TrackingError['TD_squared'])/(len(df_Elastic_TrackingError)-1)) if TE < min_te: min_te = TE min_te_num = i TE_.append(TE) if self.if_manual_set_number_elasticNet: return list(df_Elastic_select.head(self.num_elasticNet)['symbol']) return list(df_Elastic_select.head(min_te_num)['symbol']) if method == 'groupSelection': # Calculate every stock's tracking error ret_daily = daily_2.set_index(['time']).pct_change() tracking_error_dict = {} ticker_list_new = daily_2.columns[1:-1] for i in ticker_list_new: tmp_df = ret_daily[[i,'spy']].dropna() tmp_df['TD'] = tmp_df['spy'] - tmp_df[i] tmp_df['TD_squared'] = tmp_df['TD']**2 TE = np.sqrt(sum(tmp_df['TD_squared'])/(len(tmp_df)-1)) tracking_error_dict[i] = TE fund_df_new = fund_df[fund_df['ticker'].isin(ticker_list_new)] fund_df_new = fund_df_new[['ticker','market_cap','sector']] fund_df_new['tracking_error'] = fund_df_new['ticker'].apply(lambda x:tracking_error_dict[x]) fund_df_group_industry = fund_df_new.groupby(['sector']) industry_dict = {} select_stock = [] for industry_sec, industry_sec_df in fund_df_group_industry: # try: industry_sec_df = industry_sec_df.sort_values(by = ['market_cap']) industry_sec_df = industry_sec_df[industry_sec_df['market_cap'] > 0 ] industry_sec_df['market_size'] = industry_sec_df['market_cap'].transform(lambda x:pd.qcut(x,[0,0.33,0.67,1],labels=['small','medium','large'])) industry_dict[industry_sec[0]] = industry_sec_df for a, b in industry_sec_df.groupby(['market_size']): num = int(0.1*len(b)) + 1 b = b.sort_values(by = ['tracking_error'], ascending = True) select_stock = select_stock + list(b['ticker'])[:num] # except: # continue return select_stock # if method == 'passive_Ownership': # if method == 'compare': return stock_list def optimal_portfolio(self, method='markowitz'): random.seed(10) if method == 'markowitz': # Obtain returns daily_1 = self.get_historical_data()[0] opt_df1 = daily_1[['time']+self.selected_stocks] returns = opt_df1.set_index(['time']).pct_change().dropna() # Optimization n = len(returns) returns = np.asmatrix(returns) N = 3 mus = [10**(5.0 * t/N - 1.0) for t in range(N)] # 转化为cvxopt matrices S = opt.matrix(np.cov(returns)) pbar = opt.matrix(np.mean(returns, axis=1)) # 约束条件 G = -opt.matrix(np.eye(n)) # opt默认是求最大值,因此要求最小化问题,还得乘以一个负号 h = opt.matrix(1/500, (n ,1)) A = opt.matrix(1.0, (1, n)) b = opt.matrix(1.0) # 使用凸优化计算有效前沿 portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x'] for mu in mus] ## 计算有效前沿的收益率和风险 returns = [blas.dot(pbar, x) for x in portfolios] risks = [np.sqrt(blas.dot(x, S*x)) for x in portfolios] m1 = np.polyfit(returns, risks, 2) # polyfit函数可以使用最小二乘法将一些点拟合成一条曲线. x1 = np.sqrt(m1[2] / m1[0]) # 计算最优组合 try: wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x'] # return np.asarray(wt), returns, risks return np.asarray(wt) except: self.rebalance_signal = False return 0 if method == 'StockRanker': def prepare_groups(df): df_copy = df.copy(deep=True) df_copy['day'] = df_copy.index group = df_copy.groupby('day')['day'].count() return group def train(x_train, y_train, q_train, model_save_path): ''' 模型的训练和保存 ''' train_data = lgb.Dataset(x_train, label=y_train, group=q_train) params = { 'task': 'train', # 执行的任务类型 'boosting_type': 'gbrt', # 基学习器 'objective': 'lambdarank', # 排序任务(目标函数) 'metric': 'ndcg', # 度量的指标(评估函数) 'max_position': 10, # @NDCG 位置优化 'metric_freq': 1, # 每隔多少次输出一次度量结果 'train_metric': True, # 训练时就输出度量结果 'ndcg_at': [10], 'max_bin': 255, # 一个整数,表示最大的桶的数量。默认值为 255。lightgbm 会根据它来自动压缩内存。如max_bin=255 时,则lightgbm 将使用uint8 来表示特征的每一个值。 'num_iterations': 500, # 迭代次数 'learning_rate': 0.01, # 学习率 'num_leaves': 31, # 叶子数 # 'max_depth':6, 'tree_learner': 'serial', # 用于并行学习,‘serial’: 单台机器的tree learner 'min_data_in_leaf': 30, # 一个叶子节点上包含的最少样本数量 'verbose': 2 # 显示训练时的信息 } gbm = lgb.train(params, train_data, valid_sets=[train_data]) # import pickle # serializedObject = pickle.dumps(gbm) # modelKey = "MyModel" # qb.ObjectStore.SaveBytes(modelKey, serializedObject) # serializedObject = bytes(qb.ObjectStore.ReadBytes(modelKey)) gbm.save_model(model_save_path) # qb.ObjectStore.Save(model_key, clf) def predict(x_test, model_input_path): ''' 预测得分并排序 ''' # clf = pickle.loads(serializedObject) gbm = lgb.Booster(model_file=model_input_path) # 加载model ypred = gbm.predict(x_test) predicted_sorted_indexes = np.argsort(ypred)[::-1] # 返回从大到小的索引 # t_results = comments[predicted_sorted_indexes] # 返回对应的comments,从大到小的排序 # return t_results return predicted_sorted_indexes # daily2_ = pd.concat(daily_dfs2, axis = 0, sort = False, join = 'outer') # daily2_ = daily2_.dropna() # daily2_ = daily2_.set_index([daily2_.index, 'ticker']).sort_index() # daily2_['label'] = daily2_.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5])) # test_df = pd.concat(test_dfs, axis = 0, sort = False, join = 'outer') # test_df = test_df.dropna() # # test_df = test_df[test_df['time'] == datetime(2024, 6, 29)] # test_df = test_df.set_index([test_df.index, 'ticker']).sort_index() # test_df['label'] = test_df.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5])) # test_df = test_df.loc[(datetime(2024, 6,29),slice(None)),:] _, _, daily2_, test_df = self.get_historical_data() x_train = daily2_[['slope_20']] y_train = daily2_['label'].astype(int) q_train = prepare_groups(x_train) x_test = test_df[['slope_20']] y_test = test_df['label'].astype(int) q_test = prepare_groups(x_test) train(x_train, y_train, q_train, 'stock_ranker.pkl') t_results = predict(x_test, 'stock_ranker.pkl') # self.debug(test_df.index) new_seq = [i[1] for i in test_df.index] rank = [new_seq[x] for x in t_results ] self.selected_stocks = rank stock_weights = np.array( [1 / math.log(i + 2) for i in range(0, len(rank))] ) stock_weights = stock_weights / stock_weights.sum() return stock_weights if method == 'Momentum': daily_1, _, _, _, = self.get_historical_data() pct_daily = daily_1.set_index(['time']).pct_change() winrate_daily = pct_daily.dropna().copy() winrate_daily = winrate_daily.apply(lambda x:np.where(x>0, 1,0)) winrate_daily = winrate_daily.tail(self.mom_window) winrate_dict = {} # for i in self.ticker_list: # winrate_dict[i] = winrate_daily[i].sum()/self.mom_window winrate_dict = {} for i in pct_daily.columns: winrate_dict[i] = winrate_daily[i].sum()/self.mom_window winrate_df = pd.DataFrame.from_dict(winrate_dict, orient='index',columns=['winrate']) winrate_df = winrate_df.reset_index().rename(columns= {'index':'ticker'}) # stocks = list(df_Lasso_select['symbol']) stocks = self.selected_stocks winrate_df = winrate_df[winrate_df['ticker'].isin(stocks)] winrate_df = winrate_df.sort_values(by = ['winrate'], ascending= False).set_index(['ticker']).reset_index() weight_df = pd.DataFrame.from_dict(self.weight_universe,orient='index',columns=['weight'] ).reset_index().rename(columns= {'index':'ticker'}) winrate_df = winrate_df.merge(weight_df, how = 'left', on = ['ticker']) winrate_df['weight_adj'] = winrate_df['weight']/winrate_df['weight'].sum() #winrate_df['weight_new'] = winrate_df['weight_adj'].apply(lambda x:x+0.001*(len(winrate_df) - x.index)) winrate_df['weight_new'] = winrate_df['weight_adj'] + 0.001 * (len(winrate_df) - winrate_df.index) winrate_df['weight_new_adj'] = winrate_df['weight_new']/winrate_df['weight_new'].sum() self.selected_stocks = list(winrate_df['ticker']) return list(winrate_df['weight_new_adj']) def rebalance_weights(self): self.selected_stocks = self.replicate_portfolio(method = self.replicate_method) # Obtain stocks list self.rebalance_signal = True weights = self.optimal_portfolio(method = self.opt_method) if self.rebalance_signal: if self.opt_method== 'markowitz': for stock, weight in zip(self.selected_stocks, weights): self.portfolio_weights[stock] = weight[0] if self.opt_method== 'StockRanker' or self.opt_method == 'Momentum': for stock, weight in zip(self.selected_stocks, weights): self.portfolio_weights[stock] = weight return def submit_orders(self): price = self.curr_spy if self.if_hedge: for i in self.selected_stocks: self.set_holdings(i, self.portfolio_weights[i]) # self.set_holdings('SPY', - 1) self.contract_size = int(self.portfolio.total_portfolio_value * self.hedge_size / (price *100)) self.market_order(self.contract.Symbol, self.contract_size) self.has_contract = True else: # self.contract = None # self.contract_size = None # self.strike = None for i in self.selected_stocks: self.set_holdings(i, self.portfolio_weights[i]) # self.debug(f"test: {i} weight is {self.portfolio_weights[i]} ") self.has_contract = False def on_data(self, slice: Slice): # if slice.Bars.ContainsKey("SPY"): # bar = slice["SPY"].close # self.debug('yes we have SPY') # else: # self.debug('no we do not have SPY') # if not self.portfolio.invested: # self.market_order(self.contract.Symbol, 1) if self.rebalance_signal: self.curr_spy = slice["SPY"].close # if self.has_contract and slice[self.spy.symbol].close < self.strike: # self.exercise_option(self.contract.Symbol, self.contract_size) # self.debug('exercise the option') chain = slice.OptionChains.get(self.option_symbol) self.liquidate() self.has_contract = False if chain: # self.debug('has chain') contracts = [i for i in chain if i.Right == OptionRight.PUT and i.Strike < i.UnderlyingLastPrice ] e = [i.expiry for i in contracts] e = sorted(list(set(sorted(e, reverse = True)))) e = [i.expiry for i in contracts if i.expiry- self.Time> timedelta(5)] expiry = e[0] contracts = [contract for contract in contracts if contract.expiry == expiry] #self.debug(f"{len(contracts)}") S = [i.Strike for i in contracts] S = list(set(sorted(S, reverse=True))) S = sorted(S, reverse= True) # self.debug(f"the expiry is {expiry}, time is {self.time.date()}") TTM = expiry.date() - self.time.date() T = (TTM.days-7)/252 r = 0.0525 q = 0 b = r-q ivol = contracts[0].ImpliedVolatility # self.debug(f"The implied vol is {ivol}") for i in S: put = GBSM(self.curr_spy*(1-self.risk_pct), i, T, r, b, ivol,'put') delta = put.Delta p = [contract for contract in contracts if contract.Strike == i][0] # self.debug(f"Strike: {i} | Delta: {delta} | Current Price: {self.Securities[p.Symbol].Price} | ratio : {self.Securities[p.Symbol].Price/(abs(delta))} ") # e = [i.expiry for i in contracts] # e = sorted(list(set(sorted(e)))) # self.debug(f"strike max is {max(S)}, min is {min(S)}") # self.debug(f"expiry max is {max(e)}, min is {min(e)}") # strike = sorted(contracts, key=lambda x: x.Strike, reverse = True)[0].Strike # expiry = sorted(contracts, key=lambda x: x.expiry)[2].expiry strike = S[0] self.strike = strike # for i in S: # self.debug(f"{self.Time}: {i}") #self.debug(f"Date is {self.time}, the expiry is {expiry}") # self.debug( sorted(contracts, key=lambda x: x.expiry)) # self.contract = [contract for contract in contracts if contract.Strike == strike and contract.expiry == expiry][0] # self.debug(f"expiry max is {max(e)}, min is {min(e)}") # for i in e: # self.debug(f'{self.Time}: {i}') # self.debug(f'{self.Time} has {len(e)}') # self.debug(f"{self.Time}: expiry is {expiry}, strike is {strike}") self.contract = [contract for contract in contracts if contract.Strike == strike][0] # self.debug(f"{self.Time}: expiry is {expiry}, strike is {strike}, underlying is {self.contract.UnderlyingLastPrice}") self.submit_orders() self.rebalance_signal = False # self.debug(f"latent_states_sequence is {self.latent_states_sequence}") # self.debug(f"best state is {self.best_status_sequence}") return ''' stop loss and take profit (if necessary) ''' return def OnOrderEvent(self, orderEvent): if orderEvent.Status != OrderStatus.Filled: return # Webhook Notification symbol = orderEvent.symbol price = orderEvent.FillPrice quantity = orderEvent.quantity # self.debug(f"SP500 Enhanced-Indexing Paper order update] \nSymbol: {symbol} \nPrice: {price} \nQuantity: {quantity}") a = { "text": f"[SP500 Enhanced-Indexing Paper order update] \nSymbol: {symbol} \nPrice: {price} \nQuantity: {quantity}" } payload = json.dumps(a) self.notify.web("https://hooks.slack.com/services/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", payload)