Backtest

Overall Statistics
Total Orders 196 Average Win 14.57% Average Loss -8.06% Compounding Annual Return 47.945% Drawdown 73.600% Expectancy 0.547 Start Equity 10000 End Equity 103703.10 Net Profit 937.031% Sharpe Ratio 0.905 Sortino Ratio 0.835 Probabilistic Sharpe Ratio 24.920% Loss Rate 45% Win Rate 55% Profit-Loss Ratio 1.81 Alpha 0.397 Beta 0.862 Annual Standard Deviation 0.537 Annual Variance 0.289 Information Ratio 0.738 Tracking Error 0.518 Treynor Ratio 0.564 Total Fees $629.60 Estimated Strategy Capacity $340000000.00 Lowest Capacity Asset MSTR RBGP9S2961YD Portfolio Turnover 8.97%
from AlgorithmImports import *
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

class MLTradingAlgorithm(QCAlgorithm):
    
    def Initialize(self):
        # 1. Setup Algorithm Parameters
        self.SetStartDate(2019, 1, 1)  # Start date
        self.SetEndDate(2024, 12, 31)  # End date
        self.SetCash(10000)            # Initial capital

        # 2. Add Equity (MSTR)
        self.symbol = self.AddEquity("MSTR", Resolution.Daily).Symbol

        # 3. Rolling Window for 200 Days of TradeBar Data
        self.data = RollingWindow[TradeBar](200)  # Store last 200 bars

        # 4. Warm-Up Period to ensure rolling window is filled
        self.SetWarmUp(200)

        # 5. Initialize Machine Learning Model
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)
        self.training_count = 0
        self.is_model_trained = False  # Flag to check if model is trained

        # 6. Schedule Training Every Monday at 10:00 AM
        self.Schedule.On(self.DateRules.Every(DayOfWeek.Monday), 
                         self.TimeRules.At(10, 0), 
                         self.TrainModel)
    
    def OnData(self, data):
        # 7. Check data existence
        if not data.ContainsKey(self.symbol):
            return
        trade_bar = data[self.symbol]
        if trade_bar is None:
            return
        
        # 8. Add TradeBar data to rolling window
        self.data.Add(trade_bar)

        # 9. Ensure rolling window is ready
        if not self.data.IsReady or self.data.Count < 200:
            return
        
        # Check if the model is trained
        if not self.is_model_trained:
            self.Debug("Model not trained yet. Skipping prediction.")
            return

        # 10. Prepare the latest feature row
        df = self.GetFeatureDataFrame()
        if df is None or len(df) == 0:
            return
        
        # Extract the most recent feature row (X)
        latest_features = df.iloc[-1, :-1].values.reshape(1, -1)

        # 11. Predict (try-catch in case model fails)
        try:
            prediction = self.model.predict(latest_features)[0]  # 1 = Buy, 0 = Sell
        except:
            self.Debug("Error during prediction.")
            return
        
        # 12. Execute Trading Logic
        holdings = self.Portfolio[self.symbol].Quantity
        
        # If prediction is Buy (1), and we don't currently hold shares, go long
        if prediction == 1 and holdings <= 0:
            self.SetHoldings(self.symbol, 1.0)
        # If prediction is Sell (0), and we do hold shares, liquidate
        elif prediction == 0 and holdings > 0:
            self.Liquidate(self.symbol)

    def TrainModel(self):
        # Prepare features for training
        df = self.GetFeatureDataFrame()
        if df is None or len(df) < 50:
            self.Debug("Insufficient data for training.")
            return
        
        # Split into X (features) and y (target)
        X = df.iloc[:, :-1]
        y = df.iloc[:, -1]
        
        # 80/20 time-based split
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, shuffle=False, random_state=42
        )

        # Fit the model
        self.model.fit(X_train, y_train)
        self.is_model_trained = True

        # Evaluate model accuracy
        y_train_pred = self.model.predict(X_train)
        train_accuracy = accuracy_score(y_train, y_train_pred)

        y_test_pred = self.model.predict(X_test)
        test_accuracy = accuracy_score(y_test, y_test_pred)

        self.training_count += 1
        self.Debug(f"Training #{self.training_count}: "
                   f"Train Accuracy: {train_accuracy:.2%}, "
                   f"Test Accuracy: {test_accuracy:.2%}")

    def GetFeatureDataFrame(self):
        """
        Convert rolling window data to DataFrame and compute:
          - Bollinger Bands (20-day)
          - Historical Volatility (HV_30)
          - Target variable (1 if next day's Close > today's Close, else 0)
        """
        if self.data.Count < 200:
            return None
        
        # Extract close prices
        close_prices = [bar.Close for bar in self.data]
        df = pd.DataFrame(close_prices, columns=["Close"])
        
        # -----------------------------------------
        # 1) Bollinger Bands (20-day)
        # -----------------------------------------
        period = 20
        df["BB_mid"] = df["Close"].rolling(period).mean()
        df["BB_std"] = df["Close"].rolling(period).std()
        df["BB_upper"] = df["BB_mid"] + 2 * df["BB_std"]
        df["BB_lower"] = df["BB_mid"] - 2 * df["BB_std"]
        
        # -----------------------------------------
        # 2) Historical Volatility (HV_30)
        #    annualized = std dev of daily returns * sqrt(252)
        # -----------------------------------------
        df["daily_returns"] = df["Close"].pct_change()
        df["HV_30"] = df["daily_returns"].rolling(window=30).std() * np.sqrt(252)
        
        # -----------------------------------------
        # 3) Target variable
        #    1 = next day close > current day close
        #    0 = otherwise
        # -----------------------------------------
        df["Target"] = (df["Close"].shift(-1) > df["Close"]).astype(int)
        
        # Clean up
        df.dropna(inplace=True)
        
        # Drop any columns we don't want in the features
        # daily_returns is intermediate; keep HV_30, Bollinger bands
        df.drop(columns=["daily_returns"], inplace=True)
        
        return df