Backtest

Overall Statistics
Total Orders 112 Average Win 2.51% Average Loss -1.56% Compounding Annual Return 6.053% Drawdown 19.100% Expectancy 0.447 Start Equity 10000 End Equity 14206.57 Net Profit 42.066% Sharpe Ratio 0.214 Sortino Ratio 0.16 Probabilistic Sharpe Ratio 7.499% Loss Rate 45% Win Rate 55% Profit-Loss Ratio 1.61 Alpha 0.003 Beta 0.142 Annual Standard Deviation 0.086 Annual Variance 0.007 Information Ratio -0.53 Tracking Error 0.164 Treynor Ratio 0.13 Total Fees $112.00 Estimated Strategy Capacity $480000000.00 Lowest Capacity Asset MSTR RBGP9S2961YD Portfolio Turnover 1.03%
from AlgorithmImports import *
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

class MLTradingAlgorithm(QCAlgorithm):
    
    def Initialize(self):
        # 1. Algorithm Parameters
        self.SetStartDate(2019, 1, 1)         # Start date
        self.SetEndDate(2024, 12, 31)         # End date
        self.SetCash(10000)                   # Initial capital

        # 2. Add MSTR Equity
        self.symbol = self.AddEquity("MSTR", Resolution.Daily).Symbol

        # 3. RollingWindow to Store 200 Days of TradeBar Data
        self.data = RollingWindow[TradeBar](200)

        # 4. Warm-Up Period
        self.SetWarmUp(200)

        # 5. Initialize SVM Model
        #    probability=True so we can get class probabilities
        self.model = SVC(probability=True, random_state=42)
        self.training_count = 0
        self.is_model_trained = False  # Tracks if the model is trained

        # 6. **Set Partial Allocation to 20%** of total capital
        self.allocation_fraction = 0.2

        # 7. Schedule Training Every Monday at 10:00 AM
        self.Schedule.On(self.DateRules.Every(DayOfWeek.Monday), 
                         self.TimeRules.At(10, 0), 
                         self.TrainModel)
    
    def OnData(self, data):
        # Ensure Data Exists
        if not data.ContainsKey(self.symbol):
            return
        
        trade_bar = data[self.symbol]
        if trade_bar is None:
            return
        
        # Add TradeBar to Rolling Window
        self.data.Add(trade_bar)

        # Check if RollingWindow is Ready
        if not self.data.IsReady or self.data.Count < 200:
            return
        
        # Ensure Model is Fitted Before Using It
        if not self.is_model_trained:
            self.Debug("Model is not trained yet. Skipping prediction.")
            return

        # Extract Features for Prediction
        df = self.GetFeatureDataFrame()
        if df is None or len(df) < 1:
            return
        
        latest_features = df.iloc[-1, :-1].values.reshape(1, -1)
        
        # Make Predictions using Probability Threshold
        try:
            # predict_proba gives [prob_class0, prob_class1]
            prob_class = self.model.predict_proba(latest_features)[0][1]  
            prediction = 1 if prob_class > 0.5 else 0
        except Exception as e:
            self.Debug(f"Error: Model prediction failed. {e}")
            return
        
        # Trading Logic
        holdings = self.Portfolio[self.symbol].Quantity
        
        # Buy if prediction == 1 and not currently invested
        if prediction == 1 and holdings <= 0:
            self.SetHoldings(self.symbol, self.allocation_fraction) 
        # Sell if prediction == 0 and currently invested
        elif prediction == 0 and holdings > 0:
            self.Liquidate(self.symbol)

    def TrainModel(self):
        # Prepare Training Data
        df = self.GetFeatureDataFrame()
        if df is None or len(df) < 50:  # Require enough data to train
            self.Debug("Insufficient data for training.")
            return

        # Split Data (chronological, no shuffle)
        X = df.iloc[:, :-1]  # Features
        y = df.iloc[:, -1]   # Target (0 or 1)
        
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, shuffle=False, random_state=42
        )

        # Train SVM Model
        self.model.fit(X_train, y_train)
        self.is_model_trained = True

        # Evaluate Model Performance (Use predicted probabilities)
        y_train_prob = self.model.predict_proba(X_train)[:, 1]
        y_train_pred_binary = [1 if val > 0.5 else 0 for val in y_train_prob]
        train_accuracy = accuracy_score(y_train, y_train_pred_binary)

        y_test_prob = self.model.predict_proba(X_test)[:, 1]
        y_test_pred_binary = [1 if val > 0.5 else 0 for val in y_test_prob]
        test_accuracy = accuracy_score(y_test, y_test_pred_binary)
        
        self.training_count += 1
        self.Debug(f"Training #{self.training_count}: "
                   f"Train Accuracy: {train_accuracy:.2%}, "
                   f"Test Accuracy: {test_accuracy:.2%}")

    def GetFeatureDataFrame(self):
        # Wait until we have 200 data points in the rolling window
        if self.data.Count < 200:
            return None
        
        # Convert rolling window data (TradeBars) to a DataFrame
        close_prices = [bar.Close for bar in self.data]
        df = pd.DataFrame(close_prices, columns=["Close"])
        
        # Feature Engineering
        df["SMA_10"] = df["Close"].rolling(window=10).mean()
        df["SMA_50"] = df["Close"].rolling(window=50).mean()
        
        # RSI Calculation
        delta = df["Close"].diff()
        gain = (delta.where(delta > 0, 0)).rolling(14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
        rs = gain / loss
        df["RSI"] = 100 - (100 / (1 + rs))
        
        # MACD Calculation
        df["MACD"] = df["Close"].ewm(span=12, adjust=False).mean() - df["Close"].ewm(span=26, adjust=False).mean()
        df["MACD_Signal"] = df["MACD"].ewm(span=9, adjust=False).mean()

        # Historical Volatility (HV_30)
        df["HV_30"] = df["Close"].pct_change().rolling(window=30).std() * np.sqrt(252)

        # Define Target: 1 if next day's Close > today's Close, else 0
        df["Target"] = (df["Close"].shift(-1) > df["Close"]).astype(int)
        
        # Remove rows with NaN values from rolling calculations
        df.dropna(inplace=True)

        return df