Overall Statistics |
Total Trades 10960 Average Win 0.10% Average Loss -0.08% Compounding Annual Return 25.744% Drawdown 14.000% Expectancy 0.154 Net Profit 92.677% Sharpe Ratio 1.52 Loss Rate 48% Win Rate 52% Profit-Loss Ratio 1.20 Alpha 0.197 Beta -0.012 Annual Standard Deviation 0.129 Annual Variance 0.017 Information Ratio 0.717 Tracking Error 0.173 Treynor Ratio -16.882 Total Fees $13611.46 |
# Derek M Tishler - 2017 # https://tishlercapital.com/ # Based on the MINST TensorFlow Softmax Classification Example # https://www.tensorflow.org/get_started/mnist/beginners # https://www.tensorflow.org/get_started/mnist/pros # Extended from single class to multi class probabilities using: # "Multi-label image classification with Inception net" - Radek Bartyzal # https://towardsdatascience.com/multi-label-image-classification-with-inception-net-cbb2ee538e30 # In case you missed it, there is a slightly simpler single asset tensorflow example located here(use second post in thread): # https://www.quantconnect.com/forum/discussion/2880/machine-learning---tensorflow-basic-example/p1/comment-8880 import random import numpy as np import pandas as pd import tensorflow as tf from sklearn.model_selection import train_test_split seed = 1 random.seed(seed) np.random.seed(seed) tf.set_random_seed(seed) class BasicTemplateAlgorithm(QCAlgorithm): def Initialize(self): # setup backtest self.SetStartDate(2015,1,1) #Set Start Date self.SetEndDate(2017,11,10) #Set End Date self.SetCash(100000) #Set Strategy Cash # We think(step through minute data) faster than we act(daily forecast signal/rebalance) for better execution estimation. self.resolution = Resolution.Minute self.SetBrokerageModel(BrokerageName.InteractiveBrokersBrokerage, AccountType.Margin) # Current top market cap, simple universe, lots of bias but easy to work with data wise. self.portfolio = [ self.AddEquity("AAPL", self.resolution).Symbol, self.AddEquity("GOOGL", self.resolution).Symbol, self.AddEquity("MSFT", self.resolution).Symbol, self.AddEquity("NVDA", self.resolution).Symbol, self.AddEquity("FB", self.resolution).Symbol, self.AddEquity("BABA", self.resolution).Symbol, self.AddEquity("AMZN", self.resolution).Symbol, self.AddEquity("JNJ", self.resolution).Symbol, self.AddEquity("JPM", self.resolution).Symbol, self.AddEquity("XOM", self.resolution).Symbol, self.AddEquity("BAC", self.resolution).Symbol, self.AddEquity("WFC", self.resolution).Symbol, self.AddEquity("WMT", self.resolution).Symbol, self.AddEquity("V", self.resolution).Symbol, self.AddEquity("CVX", self.resolution).Symbol, self.AddEquity("INTC", self.resolution).Symbol, self.AddEquity("ORCL", self.resolution).Symbol, self.AddEquity("PFE", self.resolution).Symbol, self.AddEquity("T", self.resolution).Symbol, self.AddEquity("KO", self.resolution).Symbol, ] # init the tensorflow model object and pass our portfolio string so we know the number of classes in output layer. self.model = Model(symbols=self.portfolio) # Custom charting for model performance sPlot = Chart('Strategy Equity') sPlot.AddSeries(Series('Model_Accuracy', SeriesType.Line, 2)) sPlot.AddSeries(Series('Train_Model_Accuracy', SeriesType.Line, 2)) sPlot.AddSeries(Series('Test_Model_Accuracy', SeriesType.Line, 2)) sPlot.AddSeries(Series('Loss', SeriesType.Line, 3)) sPlot.AddSeries(Series('Train_Model_Cross_Entropy_x100', SeriesType.Line, 3)) sPlot.AddSeries(Series('Test_Model_Cross_Entropy_x100', SeriesType.Line, 3)) self.AddChart(sPlot) # Our big history call, only done once to save time self.model.hist_data = self.History(self.portfolio, self.model.warmup_count, Resolution.Daily).astype(np.float32) # Flag to know when to start gathering history in OnData or Rebalance self.do_once = True # prevent order spam by tracking current weight target and comparing against new targets self.target = np.zeros((len(self.portfolio),)) # We are forecasting and trading on open-to-ooen price changes on a daily time scale. So work every morning. self.Schedule.On(self.DateRules.EveryDay(self.portfolio[0]), self.TimeRules.AfterMarketOpen(self.portfolio[0]), Action(self.Rebalance)) def Rebalance(self): # Update the current price dictionary, asset(STRING NOT OBJECT ugh had some issues there) used for key for asset in self.portfolio: asset = str(asset) self.model.current_price[asset] = float(self.Securities[asset].Price) # Accrew history over time vs making huge, slow history calls each step. if not self.do_once: new_hist = self.History(self.portfolio, 1, Resolution.Daily).astype(np.float32) self.model.hist_data = self.model.hist_data.append(new_hist).iloc[1:] #append and pop stack self.Log(str([str(asset) for asset in self.portfolio])) else: self.do_once = False # Prepare our data now that it has been updated self.model.preproessing(self) # Perform a number of training steps with the new data self.model.train(self) # Using the latest input feature set, lets get the predicted assets expected to make the desired profit by the next open self.weights = self.model.predict(self) # Some charting of model metrics self.Checkpoint() # A little ugly, but lets keep our weight info in the log #self.Log(str([str(asset) for asset in self.portfolio])+'\n'+str(self.weights)) self.Log(str(self.weights)) # In case of repeated forecast, lets skip rebalance and reduce fees/orders(not much help in this case cause large universe) if np.any(self.weights != self.target): # track our current target to allow for above filter self.target = self.weights # Loop through each asset and assign the relative weight for simple rebalancing efforts. for asset, weight in zip(self.portfolio, self.weights): # Please note the weights are already adjusted to yield a leverage of 1 daily(see predict function) self.SetHoldings(asset, weight) def Checkpoint(self): # Some custom charts so better see model performance over time (and see if our training is even progressing) self.Plot("Strategy Equity",'Train_Model_Accuracy', 100.*self.model.train_accuracy) self.Plot("Strategy Equity",'Test_Model_Accuracy', 100.*self.model.test_accuracy) self.Plot("Strategy Equity",'Train_Model_Cross_Entropy_x100', 100.*self.model.train_ce) self.Plot("Strategy Equity",'Test_Model_Cross_Entropy_x100', 100.*self.model.test_ce) class Model(): def __init__(self, symbols): # list of strings, portfolio symbols self.symbols = symbols # Number of inputs for training (will loose 1) self.eval_lookback = 252*4 + 1 # the past n open-to-open price changes per asset, to be assembled into full feature set per input sample self.n_features_per_asset = 15 # The input window will have each assets price data self.n_features = self.n_features_per_asset * len(self.symbols) # Each assets now has a probability to determine if it is relevant to the input 'image' self.n_classes = len(self.symbols)#2 # How much historical data do we need? self.warmup_count = self.eval_lookback + self.n_features # define our tensorflow model/network self.network_setup() # a dict used to contain every asset's current open price at rebalance time. self.current_price = {} def network_setup(self): # Tensorflow Turorial does a great job(with illustrations) so comments left out here mostly: https://www.tensorflow.org/get_started/mnist/beginners self.sess = tf.InteractiveSession() # Our feed dicts pipe data into these tensors on runs/evals. Input layer and correct-labels. self.x = tf.placeholder(tf.float32, shape=[None, self.n_features]) self.y_ = tf.placeholder(tf.float32, shape=[None, self.n_classes]) # The brain of our networkk, the weights and biases. Nice and simple for a linear softmax network. #self.W = tf.Variable(tf.zeros([self.n_features, self.n_classes])) #self.b = tf.Variable(tf.zeros([self.n_classes])) def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) self.W = weight_variable([self.n_features, self.n_classes]) self.b = bias_variable([self.n_classes]) # The actual model is a painfully simple linear regressor self.y = tf.matmul(self.x,self.W) + self.b # Output lauer: using sigmoid instead to prevent norm of all probabilities and retrieve a per class probability instead, source: # https://towardsdatascience.com/multi-label-image-classification-with-inception-net-cbb2ee538e30 self.y_pred = tf.nn.sigmoid(self.y) self.cross_entropy = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y_, logits=self.y)) #tf.nn.softmax_cross_entropy_with_logits(labels=self.y_, logits=self.y)) # For fun we use AdamOptimizer instead of basic vanilla GradientDescentOptimizer. self.train_step = tf.train.AdamOptimizer(1e-3).minimize(self.cross_entropy) # metric ops, adjusted for multi class/label as per multi-label tutorial. self.correct_prediction = tf.equal(tf.round(self.y_pred), self.y_) self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32)) # This is done later vs Tensorflow Tutorial because of AdamOptimizer usage, which needs its own vars to be init'ed self.sess.run(tf.global_variables_initializer()) def preproessing(self, algo_context): # Inout features: # We are using a sliding window of past change in open prices per asset to act as our input "image". #By no means a good idea to discover alpha... all_data = {} for asset in self.symbols: asset = str(asset) all_data[asset] = np.append(self.hist_data.loc[asset].open.values.flatten().astype(np.float32), self.current_price[asset]) features = [] labels = [] for i in range(self.n_features_per_asset+1, len(all_data.itervalues().next())-1): temp_feat = [] for asset in self.symbols: asset = str(asset) temp_feat.append( np.diff(all_data[asset][i-self.n_features_per_asset-1:i])/all_data[asset][i-self.n_features_per_asset-1:i-1] ) features.append( np.array(temp_feat).flatten() ) # Get the open-to-open change for the next day per asset. Use percent change to better compare assets. temp_lab = [] for asset in self.symbols: asset = str(asset) temp_lab.append( 100.*(all_data[asset][i+1]-all_data[asset][i])/all_data[asset][i] ) temp_lab = np.array(temp_lab) # For multi class labels: # we want to set 1 for any "image"(sliding price data) with the relevant "labels"(an asset that was far enough in the green to be favorable) ml = np.zeros_like(temp_lab, dtype=np.float32) ml[np.where(temp_lab > 0.001)[0]] = 1.0 # multi label classification ground truth vector based on money making assets labels.append( ml ) features = np.array(features) labels = np.array(labels) # Here we use test train spit so we can better evaluate the model. # But due to the nature of our sliding window and the use of overlapping input features, # this may still be useless and lead to rapid overfitting. # Try to discover other ways to creat and manage your dataset. #self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(features, labels, test_size=0.2, random_state=seed) # Test train split, unfortunate to loose recent data, but need data not seen ever by train set. split_len = int(len(labels)*0.05) self.X_train = features[:-split_len] self.X_test = features[-split_len:] self.y_train = labels[:-split_len] self.y_test = labels[-split_len:] def train(self, algo_context): # Perform training step(s) and check train accuracy. This is really lame, use a test/train split and measure OOS data for good info about test/validation accuracy. for _ in range(100): #batch = np.random.permutation(np.arange(len(self.X_train)))[:100] self.train_step.run(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train}) # Collect some metrics for charting self.train_accuracy = self.accuracy.eval(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train}) self.test_accuracy = self.accuracy.eval(session=self.sess, feed_dict={self.x: self.X_test, self.y_: self.y_test}) self.train_ce = self.cross_entropy.eval(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train}) self.test_ce = self.cross_entropy.eval(session=self.sess, feed_dict={self.x: self.X_test, self.y_: self.y_test}) #print("\nTrain Accuracy: %0.5f %0.5f"%(self.train_accuracy,self.test_accuracy)) # commented out to reduce log def predict(self, algo_context): # Perform inference #pred_feat = np.append(self.hist_data.open.values.flatten().astype(np.float32), self.current_price)[-self.n_features-1:] all_data = {} temp_feat = [] for asset in self.symbols: asset = str(asset) all_data[asset] = np.append(self.hist_data.loc[asset].open.values.flatten().astype(np.float32), self.current_price[asset])[-self.n_features-1:] temp_feat.append( np.diff(all_data[asset][-self.n_features_per_asset-1:])/all_data[asset][-self.n_features_per_asset-1:-1] ) pred_feat = np.array(temp_feat).flatten() #pred_feat = 100.*np.diff(all_data)/all_data[:-1] pred_proba = self.y_pred.eval(session=self.sess, feed_dict={self.x: [pred_feat]}) #print("Forecast Probabilities: %s"%str(pred_proba[0])) # commented out to reduce log self.current_forecast = pred_proba[0] # Cash or Long, additionaly ensure no nans to prevent crash(NOT IDEAL CAN FLAG BUYS) classified = np.clip(np.nan_to_num(np.round(pred_proba[0])), 0.,1.) # So now each asset with a 1.0 needs to be purchased, lets keep the leverage to 1.0 and adjust our weights. if np.sum(classified) != 0.: classified /= np.sum(classified) return classified