Overall Statistics |
Total Trades 1484 Average Win 0.23% Average Loss -0.18% Compounding Annual Return -2.605% Drawdown 10.000% Expectancy -0.038 Net Profit -5.464% Sharpe Ratio -0.292 Loss Rate 57% Win Rate 43% Profit-Loss Ratio 1.23 Alpha -0.026 Beta 0.021 Annual Standard Deviation 0.079 Annual Variance 0.006 Information Ratio -0.74 Tracking Error 0.194 Treynor Ratio -1.101 Total Fees $6111.40 |
# QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. # Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import clr clr.AddReference("System") clr.AddReference("QuantConnect.Algorithm") clr.AddReference("QuantConnect.Indicators") clr.AddReference("QuantConnect.Common") from System import * from QuantConnect import * from QuantConnect.Algorithm import * from QuantConnect.Indicators import * import decimal as d import numpy as np import pandas as pd from sklearn.metrics.pairwise import cosine_similarity from sklearn.metrics import confusion_matrix from sklearn.preprocessing import OneHotEncoder from sklearn.ensemble import RandomForestClassifier as RFC from sklearn.linear_model import LogisticRegression from CNN_classifier import CNNClassifier def calcSignal(data,algo): r=data.p.diff().fillna(0) s_brk=dual_thr(data,15,0.25) s_rsi=rsi(r,15,0.2) s_mom=px_mom(r,15,300) s_chmom=chan_mom(data,60,5) s_wr=williamsr(data,15,0.3) # conv. net # clf=algo.fitted_models[-1] vdp=data.v*r n1=2**np.arange(0,8) n2=2**np.arange(1,8) X=madiff(vdp,n1,n2) #X=X.reshape((1,X.shape[0],X.shape[1])) #X=np.concatenate((X[:,0].T,X[:,1].T)).reshape((1,X.shape[1]*algo.window)) # yhat=clf.predict(X) return s_chmom def dual_thr(data,n,gamma): rg1=data.h.iloc[-n:].max()-data.p.iloc[-n:].min() rg2=data.p.iloc[-n:].max()-data.l.iloc[-n:].min() rg=max(rg1,rg2) p=data.p.values[-1] o=data.o.values[-1] return max(p-(o+gamma*rg),0) + min(p-(o-gamma*rg),0) def williamsr(data,n,thr=0.2): h=data.h.iloc[-n:].max() ratio=(h-data.p.iloc[-1])/(h-data.l.iloc[-n:].min()) return ratio-0.5 #-np.int8(ratio>1.0-thr)+np.int8(ratio<thr) def rsi(ts,n,thr): s=ts.rolling(window=n,min_periods=n).sum() a=np.abs(ts).rolling(window=n,min_periods=n).sum() rsi=(0.5*(s+a)/a-0.5).fillna(0).values return max(np.abs(rsi[-1])-thr,0)*np.sign(rsi[-1]) def px_mom(ts,n1,n2): s1=ts.rolling(window=n1,min_periods=n1).mean().fillna(0) v1=np.abs(s1).rolling(window=n2,min_periods=1).mean() s2=ts.rolling(window=n2,min_periods=n2).mean().fillna(0) mom=((s1-s2)/v1).fillna(0).values return mom[-1] def chan_mom(data,n,n_s): h=data.h.values[-n:] l=data.l.values[-n:] v=data.v.values[-n:] rg=h.max()-l.min() mh=(h[-n_s:].mean()-h[:n_s].mean())/rg ml=(l[-n_s:].mean()-l[:n_s].mean())/rg vz=(v[-n_s:].mean()-v.mean())/v.std() return np.sign(mh+ml)*np.int8(vz>1) def build_acc(r,n_f): cand=[] N=len(n_f) for i1 in range(N-2): m1=r.rolling(window=n_f[i1],min_periods=n_f[-1]).ema() s=m1.rolling(window=n_f[-1],min_periods=n_f[-1]).std() cand.append(m1/s1) for i2 in range(i1+1,N): m2=r.rolling(window=n_f[i2],min_periods=n_f[-1]).ema() cand.append((m1-m2)/s) cand.append(r.shift(-1).fillna(0).values) ds=np.column_stack(cand) T=len(ds) ds_train=ds[T//4:] ds_test =ds[:T//4] n_c=ds.shape[1]-1 metric_train=np.zeros(n_c) metric_test=np.zeros(n_c) for n in range(n_c): metrics_train[n]=cosine_similarity(ds_train[:,n].T,ds_train[:,-1]) metrics_test[n]=metrics_train[n]/np.abs(np.diff(ds_train[:,n])).sum() del ds return metrics_train,metrics_test def prepareDataForCNN(df,params): train_test_split=params.get('train_test_split',0.75) ds=df.values N=len(ds) window=params['window'] pred_window=params['pred_window'] threshold=params['threshold'] data=np.ones(ds.shape) data[1:,0]=ds[1:,0]/ds[0,0] data[1:,1]=data[1:,1]/data[0,1] X = np.atleast_3d(np.array([data[start:start + window] for start in range(0, N - window)])) r = np.ones(N) r[:-pred_window]=data[pred_window:,0]/data[:-pred_window,0] y=np.ones((N,1)) y[r>1.0+threshold]=2 y[r<1.0-threshold]=0 onehot_encoder = OneHotEncoder(sparse=False) Y = onehot_encoder.fit_transform(y[window:]) k=int((N-window)*train_test_split) X_train=X[:k] Y_train=Y[:k] X_test =X[k:] Y_test =Y[k:] return (X_train,Y_train,X_test,Y_test) def prepareDataForRFC(df,params): train_test_split=params.get('train_test_split',0.75) pred_window=params['pred_window'] r=df['p'].diff().fillna(0) R=r.rolling(window=pred_window,min_periods=pred_window).sum().fillna(0) R=R.shift(-pred_window) ds=df.values N=len(ds) window=params['window'] threshold=params['threshold'] data=np.ones(ds.shape) data[1:,0]=ds[1:,0]/ds[0,0]-1.0 data[1:,1]=data[1:,1]/data[0,1]-1.0 X=pd.DataFrame([np.concatenate((data[start:start + window,0].T,data[start:start + window,1].T)) for start in range(0, N - window)]).values y=np.zeros((N,1)) y[R>threshold]=1 y[R<-threshold]=-1 #onehot_encoder = OneHotEncoder(sparse=False) Y = y[window:] #onehot_encoder.fit_transform(y[window:]) k=int((N-window)*train_test_split) X_train=X[:k] Y_train=Y[:k].reshape((k,)) X_test =X[k:] Y_test =Y[k:].reshape((N-window-k,)) return (X_train,Y_train,X_test,Y_test) def prepareVdpDataForRFC(df,params): train_test_split=params.get('train_test_split',0.75) pred_window=params['pred_window'] threshold=params['threshold'] N=df.shape[0] r=(df['p'].diff()/df['p'].shift()).fillna(0) R=r.rolling(window=pred_window,min_periods=pred_window).sum().fillna(0) R=R.shift(-pred_window) vdp=df.v*r n1=2**np.arange(0,8) n2=2**np.arange(1,8) window=n2[-1] X=madiff(vdp,n1,n2) y=np.zeros((N,1)) y[R>threshold]=1 y[R<-threshold]=-1 Y = y[window-1:] k=int((N-window+1)*train_test_split) X_train=X[:k] Y_train=Y[:k].reshape((k,)) X_test =X[k:] Y_test =Y[k:].reshape((N-window+1-k,)) return (X_train,Y_train,X_test,Y_test) def madiff(ts,n1,n2): window=n2[-1] mm=[] cols=[] for n_s in n1: m1=ts.rolling(window=n_s,min_periods=n_s).mean() s=m1.rolling(window=n2[-1],min_periods=n2[-1]).std() for n_l in n2[np.int(np.log2(n_s))+1:]: m2=ts.rolling(window=n_l,min_periods=n_l).mean() mm.append((m1-m2)/s) cols.append('ts'+str(n_s)+'x'+str(n_l)) return pd.DataFrame(np.column_stack(mm)).fillna(0).values[window-1:] class MovingAverageCrossAlgorithm(QCAlgorithm): def Initialize(self): '''Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm. All algorithms must initialized.''' self.SetStartDate(2011, 01, 02) #Set Start Date self.SetEndDate(2016, 12, 23) #Set End Date self.SetCash(100000) #Set Strategy Cash self.symbols = ["SPY"]#,"FXI","EWJ","EWG","EWQ","EQI","EWZ"] for s in self.symbols: self.AddEquity(s,Resolution.Minute) #self.previous = None self.n_fit=2000 self.SetWarmup(self.n_fit) self.n_f=[1,2,4,8,16,32,64] #,128,256,512] self.window=128 self.pred_window=30 self.threshold=0.0001 self.conv_layers=[{'kernel_size':4,'num_filters':4},{'kernel_size':4,'num_filters':4}] self.dropout=0.5 self.pooling='avg' self.target_dim=3 self.calc_len=self.window #self.Schedule.On(self.DateRules.MonthStart("SPY"),self.TimeRules.BeforeMarketClose("SPY",5),Action(self.modelFit)) # start at 11:50 and go in 30 min increments till 15:20 for i in range(16): self.Schedule.On(self.DateRules.EveryDay("SPY"),self.TimeRules.AfterMarketOpen("SPY",140+i*15),Action(self.runAndTrade)) #self.Schedule.On(self.DateRules.EveryDay("SPY"),self.TimeRules.BeforeMarketClose("SPY",10),Action(self.runAndTrade)) self.fitted_models=[] self.signal_values=[] def runAndTrade(self): # wait for our slow ema to fully initialize if self.IsWarmingUp: return # only once per day #if self.previous is not None and self.previous.date() == self.Time.date(): # return for s in self.symbols: history = self.History(s,self.calc_len,Resolution.Minute) data=[] index=[] for slice in history: data.append([np.float(slice.Open),np.float(slice.High),np.float(slice.Low),np.float(slice.Close),np.float(slice.Volume)]) index.append(slice.Time.date()) df=pd.DataFrame(data,columns=['o','h','l','p','v'],index=pd.Series(index)) if len(df)==self.calc_len: if len(self.fitted_models)==0: signal=calcSignal(df,self) self.signal_values.append(signal) self.SetHoldings(s,signal/self.Securities.Count) def modelFit(self): # wait for our slow ema to fully initialize if self.IsWarmingUp: return else: self.Debug("Running modelFit on "+str(self.Time.date())) # only once per day #if self.previous is not None and self.previous.date() == self.Time.date(): # return for s in self.symbols: history = self.History(s,self.n_fit,Resolution.Minute) data=[] index=[] for slice in history: index.append(slice.Time.date()) data.append([np.float(slice.Close),np.float(slice.Volume)]) #data.append(np.float(slice.Close)) #data.append([np.float(slice.Open),np.float(slice.High),np.float(slice.Low),np.float(slice.Close),np.float(slice.Volume)]) df=pd.DataFrame(data,columns=['p','v'],index=pd.Series(index)) #self.Debug("Loaded "+str(len(df))+" rows") if len(data)==self.n_fit: ''' input_shape=(self.window,df.shape[1]) clf=CNNClassifier(input_shape,self.conv_layers,self.dropout,self.pooling,self.target_dim) self.Debug(str(clf.model.summary())) (X_train,Y_train,X_test,Y_test) = prepareDataForCNN(df,{'window':self.window,'pred_window':self.pred_window,'threshold':self.threshold}) clf.fit(X_train,Y_train,validation_data=(X_test,Y_test),sample_weight=None) self.Debug(clf.model.evaluate(X_test,Y_test)) yhat=np.argmax(clf.predict_proba(X_test),axis=1)-1 y_test=np.argmax(Y_test,axis=1)-1 ''' (X_train,Y_train,X_test,Y_test) = prepareVdpDataForRFC(df,{'window':self.window,'pred_window':self.pred_window,'threshold':self.threshold}) #self.Debug("Y_train: "+str(pd.Series(Y_train).value_counts())) #self.Debug("Y_test: "+str(pd.Series(Y_test).value_counts())) #self.Debug("X_train shape: "+str(X_train.shape)) #self.Debug("X_test shape: "+str(X_test.shape)) #clf=RFC(n_estimators=100) clf=LogisticRegression(C=0.1) clf.fit(X_train,Y_train) yhat=clf.predict(X_test) cm=confusion_matrix(Y_test,yhat) self.Debug("Short acc: "+str(1.0*cm[0,0]/cm[0,:].sum())) self.Debug("Long acc: " +str(1.0*cm[2,2]/cm[2,:].sum())) self.fitted_models.append(clf)
from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.preprocessing import OneHotEncoder from keras.losses import cosine_proximity from keras.models import Sequential from keras.layers import LSTM,Dense,Activation,Flatten from keras.layers.convolutional import Conv1D, AveragePooling1D, MaxPooling1D from keras.layers.advanced_activations import LeakyReLU, ELU from keras.layers.core import Dropout from keras.layers.normalization import BatchNormalization from keras import optimizers from keras import initializers from keras import regularizers from keras.callbacks import ReduceLROnPlateau from keras import backend as K class CNNClassifier(BaseEstimator, ClassifierMixin): def __init__(self,input_shape,conv_layers,dropout,pooling,target_dim): self.input_shape=input_shape self.conv_layers=conv_layers self.dropout=dropout if pooling=='max': self.pooling=MaxPooling1D else: self.pooling=AveragePooling1D self.n_classes_=target_dim # build the model self._build_model() def fit(self,X,Y,validation_data=None,sample_weight=None): """ y must be one-hot encoded """ self.history = self.model.fit(X,Y, epochs = 50, batch_size = 256, verbose=1, validation_data=validation_data, sample_weight=sample_weight, shuffle=True, callbacks=self.callbacks) def predict_proba(self,X): return self.model.predict(X) def _build_model(self): layer_params=self.conv_layers[0] model = Sequential() model.add(Conv1D(input_shape=self.input_shape, filters=layer_params['num_filters'], kernel_size=layer_params['kernel_size'], activity_regularizer=regularizers.l2(0.001), activation=layer_params.get('activation','selu'))) model.add(self.pooling()) model.add(BatchNormalization()) model.add(Dropout(self.dropout)) for n in range(1,len(self.conv_layers)): layer_params=self.conv_layers[n] model.add(Conv1D(filters=layer_params['num_filters'], kernel_size=layer_params['kernel_size'], activity_regularizer=regularizers.l2(0.001), activation=layer_params.get('activation','selu'))) model.add(self.pooling()) model.add(BatchNormalization()) model.add(Dropout(self.dropout)) model.add(Flatten()) # need to flatten the 3d convolution arrays in order to feed them into Dense layer model.add(Dense(3,activation='softmax')) opt=optimizers.Nadam(lr=0.001) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=5, min_lr=0.000001, verbose=1) model.compile(optimizer=opt,loss='categorical_crossentropy', metrics=['accuracy']) self.model=model self.callbacks=[reduce_lr]
# Your New Python File ''' input_shape=(self.window,df.shape[1]) clf=CNNClassifier(input_shape,self.conv_layers,self.dropout,self.pooling,self.target_dim) self.Debug(str(clf.model.summary())) (X_train,Y_train,X_test,Y_test) = prepareDataForCNN(df,{'window':self.window,'pred_window':self.pred_window,'threshold':self.threshold}) clf.fit(X_train,Y_train,validation_data=(X_test,Y_test),sample_weight=None) self.Debug(clf.model.evaluate(X_test,Y_test)) yhat=np.argmax(clf.predict_proba(X_test),axis=1)-1 y_test=np.argmax(Y_test,axis=1)-1 '''