Overall Statistics |
Total Trades 2894 Average Win 0.25% Average Loss -0.22% Compounding Annual Return 4.890% Drawdown 5.200% Expectancy 0.127 Net Profit 46.508% Sharpe Ratio 1.029 Loss Rate 47% Win Rate 53% Profit-Loss Ratio 1.14 Alpha 0.043 Beta 0.076 Annual Standard Deviation 0.047 Annual Variance 0.002 Information Ratio -0.132 Tracking Error 0.172 Treynor Ratio 0.641 Total Fees $12233.15 |
import math import bisect import operator from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split import itertools from sklearn.mixture import BayesianGaussianMixture as BGM import random import talib as tb import numpy as np import pandas as pd import sklearn as sn from sklearn.neighbors import KNeighborsRegressor as KNR from sklearn.tree import DecisionTreeRegressor as DTR from sklearn.metrics import mean_absolute_error as mae from sklearn.ensemble import RandomForestClassifier as RFC from sklearn.ensemble import RandomForestRegressor as RFR from clr import AddReference AddReference("System") AddReference("QuantConnect.Algorithm") AddReference("QuantConnect.Common") from System import * from QuantConnect import * from QuantConnect.Algorithm import * from sklearn.model_selection import cross_validate as CV seed = 1 random.seed(seed) np.random.seed(seed) class Data_preparator(): def __init__(self,y_periods = [10,20,30,60],max_timeframe=2,max_for_period=15): self.eval_lookback = 500 self.train_lookback = 6000 self.warmup_count = self.eval_lookback+self.train_lookback self.y_periods = y_periods self.max_timeframe = max_timeframe self.max_for_period = max_for_period def get_labels(self,df_xy): #self.df_y = pd.DataFrame(index = np.arange(self.train_lookback)) close = np.array(list(self.hist_close), float) self.outputs = [] for y_period in self.y_periods: self.outputs.append('y'+str(y_period)) df_xy['y'+str(y_period)] = 0 df_xy['y'+str(y_period)][:-y_period] = (tb.SMA(self.close,timeperiod=y_period)[y_period:] \ -self.close[:-y_period])/self.close[:-y_period] return df_xy def market_data_interface(self,how='train'): if how=='train': length = self.warmup_count if how=='eval': length = self.eval_lookback self.opn = np.array(list(self.hist_open), float)[-length:]#[::-1] self.close = np.array(list(self.hist_close), float)[-length:]#[::-1] self.high = np.array(list(self.hist_high), float)[-length:]#[::-1] self.low = np.array(list(self.hist_low), float)[-length:]#[::-1] self.vol = np.array(list(self.hist_vol), float)[-length:]#[::-1] def get_data_for_train(self): self.rand_vec = np.random.randint(high=15,low=2,size=10) self.market_data_interface() df_xy = pd.DataFrame(index = np.arange(self.warmup_count)) df_xy = self.timeframe_aggregator(df_x = df_xy) df_xy = self.get_labels(df_xy) df_xy = df_xy.dropna(axis = 0, how = 'any') self.df_xy = df_xy x = self.df_xy.drop(self.outputs,axis = 1).as_matrix() y = self.df_xy[self.outputs].as_matrix() return x,y def get_data_for_eval(self): self.market_data_interface(how='eval') #df_xy = pd.DataFrame(index = np.arange(self.eval_lookback)) df_xy = self.timeframe_aggregator() x = df_xy.values[-1,:] return x.reshape(1,-1) def timeframe_aggregator(self,df_x=None): flag = 0 # training by default if df_x is None: flag = 1 df_x = pd.DataFrame(index = np.arange(self.eval_lookback)) for i in range(1,self.max_timeframe,2): self.compute_indics(df_x=df_x, idd='_'+str(i), multiplier=i) return self.df_x def reset_indicators(self): self.get_data_for_train() def compute_indics(self, df_x = None,idd=None,multiplier = 1): scaler_trigger = 0 if multiplier==1: opn,high,low,close,vol = self.opn,self.high,self.low,self.close,self.vol else: opn,close,high,low,vol = self.get_bigger_tf(multiplier) if df_x is None: df_x = pd.DataFrame(index = np.arange(self.eval_lookback)) for i in range(1,self.max_for_period): df_x['CCI_'+str(14*i)+idd] = tb.CCI(high, low, close, timeperiod=self.rand_vec[0]*i) df_x['AROON_'+str(10*i)+idd] = tb.AROONOSC(high,low, timeperiod=self.rand_vec[1]*i) df_x['sma_slow_'+str(i*25)+idd] = tb.SMA(close, timeperiod=self.rand_vec[2]*i)/close df_x['sma'+str(i)+idd] =tb.ROCP(close, timeperiod=self.rand_vec[3]*i) df_x['atr'+str(i)+idd] = tb.WILLR(high, low, close, timeperiod=self.rand_vec[4]*i) df_x['natr'+str(i)+idd] = tb.NATR(high, low, close, timeperiod=self.rand_vec[5]*i) df_x['cci'+str(i)+idd] = tb.DX(high, low, close, timeperiod=i*self.rand_vec[6]) df_x['mfi'+str(i)+idd] = tb.MFI(high, low, close, vol, timeperiod=self.rand_vec[7]*i) #df_x = df_x.dropna(axis = 0, how = 'any') df_x['atr_fast'+idd] = tb.ATR(high, low, close, timeperiod=30) df_x['atr_slow'+idd] = tb.ATR(high, low, close, timeperiod=160) df_x['ad'+idd] = tb.AD(high, low, close, vol) df_x['obv'+idd] = tb.OBV(close, vol) df_x['hil1'+idd] = tb.HT_DCPERIOD(close) df_x['hil2'+idd] = tb.HT_DCPHASE(close) self.df_x = df_x def get_bigger_tf(self,multiplier): opn,high,low,close,vol = self.opn,self.high,self.low,self.close,self.vol new_opn = np.array(opn) new_high = np.array(opn) new_close = np.array(opn) new_low = np.array(opn) new_vol = np.array(opn) for i in range(multiplier, opn.shape[0]-multiplier+1,multiplier): new_opn[i:i+multiplier] = opn[i-multiplier] new_high[i:i+multiplier] = np.max(high[i-multiplier:i+1]) new_low[i:i+multiplier] = np.min(low[i-multiplier:i+1]) new_close[i:i+multiplier] = close[i] new_vol[i:i+multiplier] = np.sum(vol[i-multiplier:i+1]) return new_opn, new_close, new_high, new_low, new_vol class Interactor(): def __init__(self,num_of_subsets=2000,num_of_best=5,max_num_of_feat = 6,y_period_coeffs = [0.1,0.1,0.1,0.1]): self.num_of_subsets = num_of_subsets self.num_of_best = num_of_best self.max_num_of_feat = max_num_of_feat self.eval_lookback = 1000 self.train_lookback = 6000 self.warmup_count = self.eval_lookback+self.train_lookback self.y_period_coeffs = y_period_coeffs def get_pred_minus_test(self,X,y): esti = KNR() if y.ndim<2: y = y.reshape(-1,1) if X.ndim<2: X = X.reshape(-1,1) esti.fit(X,y) cv = CV(estimator = esti,n_jobs = -1,X=X,y=y,scoring = 'neg_mean_absolute_error') res = -np.average(cv['test_score']) return res def get_score_diff(self,x,y): #each sample error, take mininum for each features, aggregate; compare to the subset score_diff_final = [] score_final = [] #coeffs = [0.1,0.2,0.3,0.5] for y_period in range(y.shape[1]): subset_pred = self.get_pred_minus_test(x,y[:,y_period]) score_diff = 1#np.sum(singles_pred_aggregate)-np.sum(subset_pred) # the more the better score = 1.0/subset_pred#*np.std(y_test[:,y_period]) score_final.append(score*self.y_period_coeffs[y_period]) score_diff_final.append(score_diff*self.y_period_coeffs[y_period]) score_diff_final = np.sum(score_diff_final)/float(len(score_diff_final)) score_final = np.sum(score_final)/float(len(score_final)) return score_diff_final,score_final def split(self,x,y): split = 2000 X_train, X_test, y_train, y_test = x[:-split,:],x[-split:,:],y[:-split],y[-split:] return X_train,X_test,y_train,y_test def scale(self,X_train,X_test,y_train,y_test): self.scaler_x = StandardScaler() self.scaler_y = StandardScaler() self.scaler_x.fit(X_train) self.scaler_y.fit(y_train) X_train,X_test = self.scaler_x.transform(X_train),self.scaler_x.transform(X_test) y_train,y_test = self.scaler_y.transform(y_train),self.scaler_y.transform(y_test) return X_train,X_test,y_train,y_test def generate_subsets(self,x,y): num_of_inds = x.shape[1] X_train,X_test,y_train,y_test = self.split(x,y) X_train,X_test,y_train,y_test = self.scale(X_train,X_test,y_train,y_test) df_res = pd.DataFrame(index = np.arange(self.num_of_subsets),columns=['score_diff','subs_score','indexes']) temp = [] for i in (range(self.num_of_subsets)): temp_subset_size = np.random.randint(low=2,high=self.max_num_of_feat) rand_subs_inds = np.random.randint(low=0,high=num_of_inds,size=temp_subset_size) while len(rand_subs_inds)>len(set(rand_subs_inds)) or str(np.sort(rand_subs_inds)) in temp: rand_subs_inds = np.random.randint(low=0,high=num_of_inds,size=temp_subset_size) temp.append(str(np.sort(rand_subs_inds))) df_res.loc[i].score_diff,df_res.loc[i].subs_score = self.get_score_diff(x[:,rand_subs_inds],y) df_res.loc[i].indexes = list(rand_subs_inds) return df_res def get_top_subsets(self,x,y): df = self.generate_subsets(x,y) #df_1 = df.sort_values(by='score_diff',ascending=False).iloc[:self.num_of_best].indexes.values df_2 = df.sort_values(by='subs_score',ascending=False).iloc[:self.num_of_best].indexes.values final = df_2 self.tempy = df.sort_values(by='subs_score',ascending=False).iloc[0].subs_score return final class Estimator(): def __init__(self, indx): self.inds = list(np.array(indx)) self.create() def create(self): hehe=np.random.randint(low=2,high=15) self.classy = KNR(n_neighbors=hehe,weights='distance')#RFR() def fit(self, tr_x, tr_y, te_x=None, te_y=None): self.classy.fit(tr_x[:,self.inds],tr_y) #self.score = 1#self.classy.score(te_x[:,self.inds],te_y) def predict(self, x): return self.classy.predict(x[:,self.inds]) class Predictor(): def __init__(self,y_period_ind = 0,filter_coef=1.): self.y_period_ind = y_period_ind self.error_frac = 0.25 self.filter_coef = filter_coef #if prediciton > avg(pred)*filter_coef -> do not trade def split(self,x,y): X_test, X_train, y_test,y_train = x[:-3000,:],x[-3000:,:],y[:-3000],y[-3000:] return X_train,X_test,y_train,y_test def train(self,x,y,subsets): self.scaler_x = StandardScaler() #self.scaler_y = StandardScaler() self.x = self.scaler_x.fit_transform(x) if y.ndim==1: y = y.reshape(-1,1) self.y = y[:,self.y_period_ind].reshape(-1,1)#self.scaler_y.fit_transform(y[:,self.y_period_ind].reshape(-1,1)) self.estimators = [] self.X_train,self.X_test,self.y_train,self.y_test = self.split(self.x,self.y) for subset in subsets: self.estimators.append(Estimator(subset)) self.estimators[-1].fit(self.X_train,self.y_train) self.train_manager_estimator() def train_filter(self): errors = np.zeros((self.X_test.shape[0],len(self.estimators)),float) preds = np.zeros((self.X_test.shape[0],len(self.estimators)),float) it = 0 for est in self.estimators: y_pred = est.predict(self.X_test) preds[:,it] = y_pred.reshape(-1,) errors[:,it] = abs(self.y_test.reshape(-1,)-y_pred.reshape(-1,))/abs(self.y_test.reshape(-1,)) it += 1 errors[errors>5] = 1.0 agg_errs = np.average(errors,axis=1) avg = np.sum(errors.flatten())/float(len(errors.flatten()))/1.5 labels_temp = np.array(agg_errs) labels = np.array(labels_temp) labels[labels_temp>avg] = 0 labels[labels_temp<=avg] = 1 self.labels = np.array(np.count_nonzero(labels)) self.filter =RFC() self.filter.fit(self.X_test, labels) #preds self.avg_pred = np.average(abs(preds.flatten())) def train_manager_estimator(self): predic_errors = np.zeros((self.x.shape[0],len(self.estimators)),float) it = 0 for est in self.estimators: y_pred = est.predict(self.x).reshape(-1,1) difference = abs(y_pred-self.y.reshape(-1,1)) predic_errors[:,it] = difference.reshape(-1,) it+=1 best_esti = np.argmin(predic_errors,axis=1) self.manager = RFC() self.manager.fit(self.x,best_esti) self.train_filter() def ensamble_predict(self,x): if x.shape[0] == 1: x = self.scaler_x.transform(x.reshape(1,-1)) #if self.filter.predict(x) != 1: # return 0 estimator_num = self.manager.predict(x[-1,:].reshape(1,-1))[0] #------------------------------- preds = [] for est in self.estimators: preds.append(est.predict(x)) #if self.filter.predict(np.array(x).reshape(1,-1)) != 1: #preds # return 0 if self.avg_pred > np.average(preds)*self.filter_coef: return 0 #------------------------------- prediction = self.estimators[estimator_num].predict(x.reshape(1,-1)) return prediction else: x = self.scaler_x.transform(x) prediction = [] for i in range(x.shape[0]): estimator_num = self.manager.predict(x[i,:].reshape(1,-1))[0] prediction.extend(self.estimators[estimator_num].predict(x[i,:].reshape(1,-1))) return np.array(prediction).reshape(-1,1) return prediction # class Y_preparator(): # def __init__(self,x): # self.periods = [30] # self.x = x # assuming that x[:,0] = close # def get_atr_result(self,period): # y = np.zeros(self.x[:,0]) # atr = tb.ATR(self.x[:,0],period) # time_coef = 1.0/np.log(np.arange(2+self.periods[0]) # for i in range(period,self.x.shape[0]-self.periods[0],1): # y[i-self.periods[0]] = ((x[i-self.periods[0],0]-np.max(x[i-self.periods[0]:i+1,0]*time_coef)) + (x[i-self.periods[0],0]-np.min(x[i-self.periods[0]:i+1,0]*time_coef)))/atr[i] # y = y[y!=0] # atr = atr[atr is not np.nan] # return np.sum(y)*np.average(atr) # def optimize_atr_period(self): # periods = [] # results = [] # for atr_period in range(10,100,5): # reults.append(self.get_atr_result(atr_period)) # best = periods[np.argmax(results)] class Position(): def __init__(self): self.max_position = 4.0 self.num_of_stds = 24.0 self.price = 0.0 self.position = 0.0 self.volatility = 0.0 self.current_price = 0.0 self.pl_fac = 1.0 def manage(self): if self.position != 0.0: sign = abs(self.position)/self.position if (self.price+self.num_of_stds*self.volatility < self.current_price and self.position>0.0): #self.num_of_stds*self.volatility self.price = self.current_price if abs(self.position) < self.max_position: self.position += 1.0 return 1.0 else: return 0.0 if (self.price-self.num_of_stds*self.volatility > self.current_price and self.position<0.0): #self.num_of_stds*self.volatility self.price = self.current_price if abs(self.position) < self.max_position: self.position -= 1.0 return 1.0 else: return 0.0 if (self.price-self.num_of_stds*self.volatility/self.pl_fac > self.current_price and self.position>0): #self.volatility*(self.num_of_stds) return -1.0 if (self.price+self.num_of_stds*self.volatility/self.pl_fac < self.current_price and self.position<0): #self.volatility*(self.num_of_stds) return -1.0 return 0.0 class BasicTemplateAlgorithm(QCAlgorithm): def Initialize(self): self.position1 = Position() self.data_prep = Data_preparator(y_periods = [10],max_timeframe=3,max_for_period=10) self.inter_detect = Interactor(num_of_subsets=200,num_of_best=5,max_num_of_feat = 7) self.predictor = Predictor(y_period_ind=0,filter_coef=0.75) self.cash = 100000 self.SetStartDate(2010,1,1) #Set Start Date self.SetEndDate(2018,1,1) #Set End Date self.SetCash(self.cash) #Set Strategy Cash self.multiplier = 100.0 self.init_coef = 0.6 # initial trade size - fraction of the deposit self.add_coef = 0.2 # position management trade size - fraction of the deposit # Find more symbols here: http://quantconnect.com/data self.symbol = "XOM" #self.model.symbol = self.symbol self.granularity = Resolution.Minute self.HighBar_p = RollingWindow[float](self.data_prep.eval_lookback) self.LowBar_p = RollingWindow[float](self.data_prep.eval_lookback) self.OpenBar_p = RollingWindow[float](self.data_prep.eval_lookback) self.CloseBar_p = RollingWindow[float](self.data_prep.eval_lookback) self.VolBar_p = RollingWindow[float](self.data_prep.eval_lookback) self.HighBar = RollingWindow[float](self.data_prep.warmup_count) self.LowBar = RollingWindow[float](self.data_prep.warmup_count) self.OpenBar = RollingWindow[float](self.data_prep.warmup_count) self.CloseBar = RollingWindow[float](self.data_prep.warmup_count) self.VolBar = RollingWindow[float](self.data_prep.warmup_count) self.ttrig = 0 self.SetBrokerageModel(BrokerageName.InteractiveBrokersBrokerage, AccountType.Margin) mm = self.AddEquity(self.symbol, self.granularity) mm.MarginModel = PatternDayTradingMarginModel() self.SetBenchmark(self.symbol) self.SetWarmUp(self.data_prep.warmup_count) self.consolidator = TradeBarConsolidator(5) self.consolidator.DataConsolidated += self.OnDataConsolidated self.SubscriptionManager.AddConsolidator(self.symbol, self.consolidator) sPlot = Chart('Strategy Equity') self.atr_slow = self.ATR(self.symbol, 160, MovingAverageType.Simple, Resolution.Minute); self.atr_fast = self.ATR(self.symbol, 30, MovingAverageType.Simple, Resolution.Minute); self.previous = None self.Schedule.On(self.DateRules.MonthStart(self.symbol), self.TimeRules.AfterMarketOpen(self.symbol), \ Action(self.reset_train)) self.Schedule.On(self.DateRules.EveryDay(self.symbol), self.TimeRules.BeforeMarketClose(self.symbol,1), \ Action(self.liqui)) def liqui(self): self.Liquidate(self.symbol) self.position1.position = 0.0 def OnDataConsolidated(self, sender, TradeBar): try: self.HighBar_p.Add(float(TradeBar.High)) self.LowBar_p.Add(float(TradeBar.Low)) self.OpenBar_p.Add(float(TradeBar.Open)) self.CloseBar_p.Add(float(TradeBar.Close)) self.VolBar_p.Add(float(TradeBar.Volume)) self.HighBar.Add(float(TradeBar.High)) self.LowBar.Add(float(TradeBar.Low)) self.OpenBar.Add(float(TradeBar.Open)) self.CloseBar.Add(float(TradeBar.Close)) self.VolBar.Add(float(TradeBar.Volume)) except: self.Debug('Failed to retrieve quotes') if not self.VolBar.IsReady: return stock_coef = self.cash/float(self.Securities[self.symbol].Price) #Evaluated once a month - generates features ans estimators if self.ttrig == 1: self.data_prep.hist_open = self.OpenBar self.data_prep.hist_close = self.CloseBar self.data_prep.hist_high = self.HighBar self.data_prep.hist_low = self.LowBar self.data_prep.hist_vol = self.VolBar x,y = self.data_prep.get_data_for_train() subsets = self.inter_detect.get_top_subsets(x,y) #self.Debug('Number of subsets:'+str(len(subsets))) self.predictor.train(x,y,subsets) self.ttrig = 0 return self.data_prep.hist_open = self.OpenBar_p self.data_prep.hist_close = self.CloseBar_p self.data_prep.hist_high = self.HighBar_p self.data_prep.hist_low = self.LowBar_p self.data_prep.hist_vol = self.VolBar_p x = self.data_prep.get_data_for_eval() prediction = self.predictor.ensamble_predict(x.reshape(1,-1)) #self.Debug(str(predictio.shape)) holdings = float(self.Portfolio[self.symbol].Quantity) self.position1.volatility = float(self.atr_fast.Current.Value) self.position1.current_price = float(self.Securities[self.symbol].Price) #position management if self.position1.position != 0.0 and holdings != 0.0: self.position1.volatility = float(self.atr_fast.Current.Value) action = self.position1.manage() if self.position1.position > 0: direc = OrderDirection.Buy else: direc = OrderDirection.Sell #bp = self.Portfolio.GetBuyingPower(self.symbol, direction = direc) bp = float(self.Portfolio.GetMarginRemaining(self.symbol)) if self.position1.position < 0: ordr = -1 else: ordr = 1 if action == 1.0 and abs(bp) > abs(stock_coef*self.add_coef)*1.5*self.position1.current_price: self.MarketOrder(self.symbol, ordr*stock_coef*self.add_coef) #self.Debug('Added: '+str(ordr*stock_coef*self.add_coef)) self.position1.price = float(self.Securities[self.symbol].Price) if action == -1.0: self.position1.position = 0.0 self.Liquidate(self.symbol) #self.Debug('Closed all') else: return bp = float(self.Portfolio.GetMarginRemaining(self.symbol)) if self.position1.position == 0.0 and holdings == 0.0: if prediction > 0.0 and abs(bp) > stock_coef*self.init_coef*self.position1.current_price: self.position1.position = 1.0 self.MarketOrder(self.symbol, stock_coef*self.init_coef) #self.Debug('Initiated: '+str(stock_coef*self.init_coef)) self.position1.price = float(self.Securities[self.symbol].Price) self.position1.current_price = float(self.Securities[self.symbol].Price) self.position1.volatility = float(self.atr_fast.Current.Value) if prediction < 0.0 and abs(bp) > stock_coef*self.init_coef*self.position1.current_price: self.position1.position = -1.0 self.MarketOrder(self.symbol, -stock_coef*self.init_coef) #self.Debug('Initiated: '+str(-stock_coef*self.init_coef)) self.position1.volatility = float(self.atr_fast.Current.Value) self.position1.price = float(self.Securities[self.symbol].Price) self.position1.current_price = float(self.Securities[self.symbol].Price) if float(self.Portfolio[self.symbol].Quantity) != 0 and self.position1.position == 0: self.position1.position = abs(float(self.Portfolio[self.symbol].Quantity))/float(self.Portfolio[self.symbol].Quantity) self.Debug('ACHTUNG!!!!') self.previous = self.Time def reset_train(self): self.ttrig = 1