Backtest

Overall Statistics
Total Trades 3960 Average Win 0.47% Average Loss -0.47% Compounding Annual Return 14.084% Drawdown 12.800% Expectancy 0.147 Net Profit 273.746% Sharpe Ratio 0.926 Probabilistic Sharpe Ratio 32.905% Loss Rate 42% Win Rate 58% Profit-Loss Ratio 0.99 Alpha 0 Beta 0 Annual Standard Deviation 0.11 Annual Variance 0.012 Information Ratio 0.926 Tracking Error 0.11 Treynor Ratio 0 Total Fees $18948.32 Estimated Strategy Capacity $110000000.00 Lowest Capacity Asset QQQ RIWIV7K5Z9LX Portfolio Turnover 57.34%

# region imports
from AlgorithmImports import *
from sklearn.ensemble import ExtraTreesClassifier
# endregion

class MuscularFluorescentPinkSnake(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2010, 1, 1)  # Set Start Date
        self.SetEndDate(2020, 1, 1)  # Set End Date
        self.SetCash(100000)  # Set Strategy Cash
        self.SetBrokerageModel(BrokerageName.InteractiveBrokersBrokerage, AccountType.Margin)

        # Manual universe of individual tickers
        tickers = ["SPY","QQQ"]
        self.symbols = [ Symbol.Create(ticker, SecurityType.Equity, Market.USA) for ticker in tickers]
        self.AddUniverseSelection(ManualUniverseSelectionModel(self.symbols))

        self.resolution = Resolution.Daily
        self.UniverseSettings.Resolution = self.resolution

        self.models = {}

        # Train the model once every week
        self.Train(self.TrainModel)
        self.Train(self.DateRules.Every(DayOfWeek.Sunday), self.TimeRules.At(8,0), self.TrainModel)

        self.SetAlpha(ClassifierAlpha(self))

        # Insight weighting Portfolio Construction Model
        self.SetPortfolioConstruction(InsightWeightingPortfolioConstructionModel())        
        self.SetExecution(ImmediateExecutionModel())
        self.SetRiskManagement(NullRiskManagementModel())

        self.SetWarmup(2)
    
    def TrainModel(self):

        start = self.Time - timedelta(days=4*365)
        end = self.Time
        
        for symbol in self.symbols:
            # Train a model per symbol on 4 years of history, use 'volume' as the feature
            X, y = self.GetHistoryAndFeatures(symbol, ['volume'], start, end, include_y=True)
            self.models[symbol] = ExtraTreesClassifier(random_state=42)
            self.models[symbol].fit(X, y)

    def GetHistoryAndFeatures(self, symbol, features, start = None, end = None, include_y = False, lookback = None):

        X = pd.DataFrame()
        y = pd.DataFrame()

        if not lookback:
            history = self.History(symbol, start, end, self.resolution)
        else:
            history = self.History(symbol, lookback, self.resolution)

        # the target is the one day return
        history['returns_1d'] = -history.close.pct_change(-1)

        # clean up data and prepare for split into X and y
        history = history.reset_index().set_index('time').sort_index()
        history.index = history.index.normalize()
        history = history.set_index('symbol', append=True)

        history = history.dropna(axis=1, how='all')

        all_cols = history.columns.tolist()
        cols_to_drop = list(set(all_cols) - set(features))

        if include_y:
            history = history.dropna()
            X = history.drop(cols_to_drop, axis=1, inplace=False)
            y = history['returns_1d'] > 0
        else:
            X = history.drop(cols_to_drop, axis=1, inplace=False)
            X = X.dropna()

        if include_y:
            return X, y
        else:
            return X

class ClassifierAlpha(AlphaModel):

    def __init__(self, algo):
        self.algo = algo

    def Update(self, algo, data):
        
        insights = []

        if not self.algo.IsWarmingUp:

            for symbol in self.algo.symbols:
                symbol = SymbolCache.GetSymbol(symbol)

                # In the original algo we need a certain length of history to run several moving averages etc.
                # this isn't really needed here as we're only using 'volume' as our dummy feature, 
                # but we're getting 2 days anyway
                X_symbol = self.algo.GetHistoryAndFeatures(symbol, ['volume'], lookback = 2)

                if not X_symbol.empty and self.algo.models[symbol]:

                    # use the most recent row of the feature dataframe
                    time = X_symbol.index[-1][0]
                    X_symbol = X_symbol.loc[pd.IndexSlice[time, :], :]

                    # predict whether the next 1d return is positive
                    model = self.algo.models[symbol]
                    result = model.predict(X_symbol)
                    proba = model.predict_proba(X_symbol)[0][1]

                    # self.algo.Debug("Algorithm Time "+str(self.algo.Time)+" - Last row in features "+str(time))

                    # create the insights and use the prediction probability as weight
                    if result:
                        insights.append(
                            Insight.Price(symbol, timedelta(days=7), InsightDirection.Up, weight = proba))
                    else:
                        insights.append(
                            Insight.Price(symbol, timedelta(days=7), InsightDirection.Flat, weight =  0))

        return insights