Backtest

Overall Statistics
Total Trades 10960 Average Win 0.10% Average Loss -0.08% Compounding Annual Return 25.744% Drawdown 14.000% Expectancy 0.154 Net Profit 92.677% Sharpe Ratio 1.52 Loss Rate 48% Win Rate 52% Profit-Loss Ratio 1.20 Alpha 0.197 Beta -0.012 Annual Standard Deviation 0.129 Annual Variance 0.017 Information Ratio 0.717 Tracking Error 0.173 Treynor Ratio -16.882 Total Fees $13611.46
# Derek M Tishler - 2017
# https://tishlercapital.com/

# Based on the MINST TensorFlow Softmax Classification Example
# https://www.tensorflow.org/get_started/mnist/beginners
# https://www.tensorflow.org/get_started/mnist/pros

# Extended from single class to multi class probabilities using:
# "Multi-label image classification with Inception net" - Radek Bartyzal
# https://towardsdatascience.com/multi-label-image-classification-with-inception-net-cbb2ee538e30

# In case you missed it, there is a slightly simpler single asset tensorflow example located here(use second post in thread):
# https://www.quantconnect.com/forum/discussion/2880/machine-learning---tensorflow-basic-example/p1/comment-8880

import random
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

seed  = 1
random.seed(seed)
np.random.seed(seed)
tf.set_random_seed(seed)

class BasicTemplateAlgorithm(QCAlgorithm):

    def Initialize(self):

        # setup backtest
        self.SetStartDate(2015,1,1)  #Set Start Date
        self.SetEndDate(2017,11,10)  #Set End Date
        self.SetCash(100000)         #Set Strategy Cash
        
        # We think(step through minute data) faster than we act(daily forecast signal/rebalance) for better execution estimation.
        self.resolution      = Resolution.Minute 
        self.SetBrokerageModel(BrokerageName.InteractiveBrokersBrokerage, AccountType.Margin)
        
        # Current top market cap, simple universe, lots of bias but easy to work with data wise.
        self.portfolio = [
            self.AddEquity("AAPL",  self.resolution).Symbol,
            self.AddEquity("GOOGL", self.resolution).Symbol,
            self.AddEquity("MSFT",  self.resolution).Symbol,
            self.AddEquity("NVDA",  self.resolution).Symbol,
            self.AddEquity("FB",    self.resolution).Symbol,
            self.AddEquity("BABA",  self.resolution).Symbol,
            self.AddEquity("AMZN",  self.resolution).Symbol,
            self.AddEquity("JNJ",   self.resolution).Symbol,
            self.AddEquity("JPM",   self.resolution).Symbol,
            self.AddEquity("XOM",   self.resolution).Symbol,
            self.AddEquity("BAC",   self.resolution).Symbol,
            self.AddEquity("WFC",   self.resolution).Symbol,
            self.AddEquity("WMT",   self.resolution).Symbol,
            self.AddEquity("V",     self.resolution).Symbol,
            self.AddEquity("CVX",   self.resolution).Symbol,
            self.AddEquity("INTC",  self.resolution).Symbol,
            self.AddEquity("ORCL",  self.resolution).Symbol,
            self.AddEquity("PFE",   self.resolution).Symbol,
            self.AddEquity("T",     self.resolution).Symbol,
            self.AddEquity("KO",    self.resolution).Symbol,
         ]
        
        # init the tensorflow model object and pass our portfolio string so we know the number of classes in output layer.
        self.model = Model(symbols=self.portfolio)
        
        # Custom charting for model performance
        sPlot = Chart('Strategy Equity')
        sPlot.AddSeries(Series('Model_Accuracy', SeriesType.Line, 2))
        sPlot.AddSeries(Series('Train_Model_Accuracy', SeriesType.Line, 2))
        sPlot.AddSeries(Series('Test_Model_Accuracy',  SeriesType.Line, 2))
        sPlot.AddSeries(Series('Loss',  SeriesType.Line, 3))
        sPlot.AddSeries(Series('Train_Model_Cross_Entropy_x100',  SeriesType.Line, 3))
        sPlot.AddSeries(Series('Test_Model_Cross_Entropy_x100',  SeriesType.Line, 3))
        self.AddChart(sPlot)

        # Our big history call, only done once to save time
        self.model.hist_data = self.History(self.portfolio, self.model.warmup_count, Resolution.Daily).astype(np.float32)
        # Flag to know when to start gathering history in OnData or Rebalance
        self.do_once         = True

        # prevent order spam by tracking current weight target and comparing against new targets
        self.target          = np.zeros((len(self.portfolio),))
        
        # We are forecasting and trading on open-to-ooen price changes on a daily time scale. So work every morning.
        self.Schedule.On(self.DateRules.EveryDay(self.portfolio[0]),
            self.TimeRules.AfterMarketOpen(self.portfolio[0]),
            Action(self.Rebalance))

        
    def Rebalance(self):
        
        
        # Update the current price dictionary, asset(STRING NOT OBJECT ugh had some issues there) used for key
        for asset in self.portfolio:
            asset = str(asset)
            self.model.current_price[asset] = float(self.Securities[asset].Price)
        
        # Accrew history over time vs making huge, slow history calls each step.
        if not self.do_once:
            new_hist             = self.History(self.portfolio, 1, Resolution.Daily).astype(np.float32)
            self.model.hist_data = self.model.hist_data.append(new_hist).iloc[1:] #append and pop stack   
            self.Log(str([str(asset) for asset in self.portfolio]))
        else:
            self.do_once  = False
        
        # Prepare our data now that it has been updated
        self.model.preproessing(self)
        
        # Perform a number of training steps with the new data
        self.model.train(self)
        
        # Using the latest input feature set, lets get the predicted assets expected to make the desired profit by the next open
        self.weights = self.model.predict(self)

        # Some charting of model metrics        
        self.Checkpoint()
        
        # A little ugly, but lets keep our weight info in the log
        #self.Log(str([str(asset) for asset in self.portfolio])+'\n'+str(self.weights))
        self.Log(str(self.weights))
        
        # In case of repeated forecast, lets skip rebalance and reduce fees/orders(not much help in this case cause large universe)
        if np.any(self.weights != self.target):
            
            # track our current target to allow for above filter
            self.target = self.weights
            
            # Loop through each asset and assign the relative weight for simple rebalancing efforts.
            for asset, weight in zip(self.portfolio, self.weights):
                # Please note the weights are already adjusted to yield a leverage of 1 daily(see predict function)
                self.SetHoldings(asset, weight)
        
    
    def Checkpoint(self):
        
        # Some custom charts so better see model performance over time (and see if our training is even progressing)    
        self.Plot("Strategy Equity",'Train_Model_Accuracy', 100.*self.model.train_accuracy)
        self.Plot("Strategy Equity",'Test_Model_Accuracy', 100.*self.model.test_accuracy)
        self.Plot("Strategy Equity",'Train_Model_Cross_Entropy_x100', 100.*self.model.train_ce)
        self.Plot("Strategy Equity",'Test_Model_Cross_Entropy_x100', 100.*self.model.test_ce)
        
        

class Model():

    def __init__(self, symbols):

        # list of strings, portfolio symbols
        self.symbols = symbols

        # Number of inputs for training (will loose 1)
        self.eval_lookback        = 252*4 + 1
        
        # the past n open-to-open price changes per asset, to be assembled into full feature set per input sample
        self.n_features_per_asset = 15
        
        # The input window will have each assets price data
        self.n_features   = self.n_features_per_asset * len(self.symbols)
        
        # Each assets now has a probability to determine if it is relevant to the input 'image'
        self.n_classes    = len(self.symbols)#2
        
        # How much historical data do we need?
        self.warmup_count = self.eval_lookback + self.n_features

        # define our tensorflow model/network
        self.network_setup()
        
        # a dict used to contain every asset's current open price at rebalance time.
        self.current_price = {}


    def network_setup(self):
        
        # Tensorflow Turorial does a great job(with illustrations) so comments left out here mostly: https://www.tensorflow.org/get_started/mnist/beginners
        self.sess               = tf.InteractiveSession()

        # Our feed dicts pipe data into these tensors on runs/evals. Input layer and correct-labels.
        self.x                  = tf.placeholder(tf.float32, shape=[None, self.n_features])
        self.y_                 = tf.placeholder(tf.float32, shape=[None, self.n_classes])

        # The brain of our networkk, the weights and biases. Nice and simple for a linear softmax network.
        #self.W                  = tf.Variable(tf.zeros([self.n_features, self.n_classes]))
        #self.b                  = tf.Variable(tf.zeros([self.n_classes]))
        
        def weight_variable(shape):
            initial = tf.truncated_normal(shape, stddev=0.1)
            return tf.Variable(initial)
        
        def bias_variable(shape):
            initial = tf.constant(0.1, shape=shape)
            return tf.Variable(initial)
            
        self.W = weight_variable([self.n_features, self.n_classes])
        self.b = bias_variable([self.n_classes])
        
        # The actual model is a painfully simple linear regressor
        self.y                  = tf.matmul(self.x,self.W) + self.b

        # Output lauer: using sigmoid instead to prevent norm of all probabilities and retrieve a per class probability instead, source:
        # https://towardsdatascience.com/multi-label-image-classification-with-inception-net-cbb2ee538e30
        self.y_pred = tf.nn.sigmoid(self.y)
        self.cross_entropy      = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y_, logits=self.y)) #tf.nn.softmax_cross_entropy_with_logits(labels=self.y_, logits=self.y))

        # For fun we use AdamOptimizer instead of basic vanilla GradientDescentOptimizer.
        self.train_step         = tf.train.AdamOptimizer(1e-3).minimize(self.cross_entropy)

        # metric ops, adjusted for multi class/label as per multi-label tutorial.
        self.correct_prediction = tf.equal(tf.round(self.y_pred), self.y_)
        self.accuracy           = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))

        # This is done later vs Tensorflow Tutorial because of AdamOptimizer usage, which needs its own vars to be init'ed
        self.sess.run(tf.global_variables_initializer())


    def preproessing(self, algo_context):
        
        # Inout features:
        # We are using a sliding window of past change in open prices per asset to act as our input "image". 
        #By no means a good idea to discover alpha...
        
        all_data = {}
        for asset in self.symbols:
            asset = str(asset)
            all_data[asset] = np.append(self.hist_data.loc[asset].open.values.flatten().astype(np.float32), self.current_price[asset])
        features   = []
        labels     = []
        for i in range(self.n_features_per_asset+1, len(all_data.itervalues().next())-1):
            
            temp_feat = []
            for asset in self.symbols:
                asset = str(asset)
                temp_feat.append( np.diff(all_data[asset][i-self.n_features_per_asset-1:i])/all_data[asset][i-self.n_features_per_asset-1:i-1] )
            features.append( np.array(temp_feat).flatten() )
            
            # Get the open-to-open change for the next day per asset. Use percent change to better compare assets.
            temp_lab = []
            for asset in self.symbols:
                asset = str(asset)
                temp_lab.append( 100.*(all_data[asset][i+1]-all_data[asset][i])/all_data[asset][i] )
            temp_lab = np.array(temp_lab)
            
            # For multi class labels: 
            # we want to set 1 for any "image"(sliding price data) with the relevant "labels"(an asset that was far enough in the green to be favorable)
            ml = np.zeros_like(temp_lab, dtype=np.float32)
            ml[np.where(temp_lab > 0.001)[0]] = 1.0 # multi label classification ground truth vector based on money making assets
            labels.append( ml )
        features = np.array(features)
        labels   = np.array(labels)
        
        # Here we use test train spit so we can better evaluate the model.
        # But due to the nature of our sliding window and the use of overlapping input features, 
        #    this may still be useless and lead to rapid overfitting. 
        #    Try to discover other ways to creat and manage your dataset.
        #self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(features, labels, test_size=0.2, random_state=seed)
        
        # Test train split, unfortunate to loose recent data, but need data not seen ever by train set.
        split_len    = int(len(labels)*0.05)
        self.X_train = features[:-split_len]
        self.X_test  = features[-split_len:]
        self.y_train = labels[:-split_len]
        self.y_test  = labels[-split_len:]
        

    def train(self, algo_context):
        
        # Perform  training step(s) and check train accuracy. This is really lame, use a test/train split and measure OOS data for good info about test/validation accuracy.
        for _ in range(100):
            #batch = np.random.permutation(np.arange(len(self.X_train)))[:100]
            self.train_step.run(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train})
            
        # Collect some metrics for charting
        self.train_accuracy = self.accuracy.eval(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train})
        self.test_accuracy  = self.accuracy.eval(session=self.sess, feed_dict={self.x: self.X_test, self.y_: self.y_test})
        self.train_ce       = self.cross_entropy.eval(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train})
        self.test_ce        = self.cross_entropy.eval(session=self.sess, feed_dict={self.x: self.X_test, self.y_: self.y_test})
        #print("\nTrain Accuracy: %0.5f %0.5f"%(self.train_accuracy,self.test_accuracy)) # commented out to reduce log
        

    def predict(self, algo_context):
        
        # Perform inference
        #pred_feat  =  np.append(self.hist_data.open.values.flatten().astype(np.float32), self.current_price)[-self.n_features-1:]
        all_data = {}
        temp_feat = []
        for asset in self.symbols:
            asset = str(asset)
            all_data[asset] = np.append(self.hist_data.loc[asset].open.values.flatten().astype(np.float32), self.current_price[asset])[-self.n_features-1:]
            temp_feat.append( np.diff(all_data[asset][-self.n_features_per_asset-1:])/all_data[asset][-self.n_features_per_asset-1:-1] )
        pred_feat =  np.array(temp_feat).flatten() 
        
        #pred_feat  = 100.*np.diff(all_data)/all_data[:-1]
        pred_proba = self.y_pred.eval(session=self.sess, feed_dict={self.x: [pred_feat]})
        
        #print("Forecast Probabilities: %s"%str(pred_proba[0])) # commented out to reduce log
        self.current_forecast = pred_proba[0]
        
        # Cash or Long, additionaly ensure no nans to prevent crash(NOT IDEAL CAN FLAG BUYS)
        classified = np.clip(np.nan_to_num(np.round(pred_proba[0])), 0.,1.)
        
        # So now each asset with a 1.0 needs to be purchased, lets keep the leverage to 1.0 and adjust our weights.
        if np.sum(classified) != 0.:
            classified /= np.sum(classified)
        
        return classified