Backtest

Overall Statistics
Total Orders 15 Average Win 14.41% Average Loss -3.32% Compounding Annual Return 123.934% Drawdown 16.900% Expectancy 2.818 Start Equity 100000 End Equity 223439.95 Net Profit 123.440% Sharpe Ratio 2.286 Sortino Ratio 3.193 Probabilistic Sharpe Ratio 80.796% Loss Rate 29% Win Rate 71% Profit-Loss Ratio 4.34 Alpha 0.833 Beta -0.197 Annual Standard Deviation 0.375 Annual Variance 0.141 Information Ratio 2.213 Tracking Error 0.444 Treynor Ratio -4.359 Total Fees $124.93 Estimated Strategy Capacity $140000000.00 Lowest Capacity Asset SQQQ UK280CGTCB51 Portfolio Turnover 1.92%

# region imports
from AlgorithmImports import *

import tensorflow as tf
from transformers import TFBertForSequenceClassification, BertTokenizer, set_seed
from pathlib import Path
# endregion

class FinbertBaseModelAlgorithm(QCAlgorithm):

    def initialize(self):
        self.set_start_date(2022, 1, 1)
        self.set_end_date(2023, 1, 1)
        self.set_cash(100_000)

        spy = Symbol.create("SPY", SecurityType.EQUITY, Market.USA)
        self.universe_settings.resolution = Resolution.DAILY
        self.universe_settings.schedule.on(self.date_rules.month_start(spy))
        self._universe = self.add_universe(
            lambda fundamental: [
                self.history(
                    [f.symbol for f in sorted(fundamental, key=lambda f: f.dollar_volume)[-10:]], 
                    timedelta(365), Resolution.DAILY
                )['close'].unstack(0).pct_change().iloc[1:].std().idxmax()
            ]
        )

        set_seed(1, True)
        
        # Load the tokenizer and the model
        model_path = "ProsusAI/finbert"
        self._tokenizer = BertTokenizer.from_pretrained(model_path, local_files_only=True)
        self._model = TFBertForSequenceClassification.from_pretrained(model_path, local_files_only=True)

        self._last_rebalance_time = datetime.min
        self.schedule.on(
            self.date_rules.month_start(spy, 1),
            self.time_rules.midnight,
            self._trade
        )

        self.set_warm_up(timedelta(30))

    def on_warmup_finished(self):
        self._trade()

    def on_securities_changed(self, changes):
        for security in changes.removed_securities:
            self.remove_security(security.dataset_symbol)
        for security in changes.added_securities:
            security.dataset_symbol = self.add_data(TiingoNews, security.symbol).symbol

    def _trade(self):
        if self.is_warming_up or self.time - self._last_rebalance_time < timedelta(14):
            return

        # Get the target security.
        security = self.securities[list(self._universe.selected)[0]]

        # Get the latest news articles.
        articles = self.history[TiingoNews](security.dataset_symbol, 10, Resolution.DAILY)
        article_text = [article.description for article in articles]
        if not article_text:
            return

        # Prepare the input sentences
        inputs = self._tokenizer(article_text, padding=True, truncation=True, return_tensors='tf')

        # Get the model outputs
        outputs = self._model(**inputs)

        # Apply softmax to the outputs to get probabilities
        scores = tf.nn.softmax(outputs.logits, axis=-1).numpy()
        self.log(f"{str(scores)}")
        scores = self._aggregate_sentiment_scores(scores)
        
        self.plot("Sentiment Probability", "Negative", scores[0])
        self.plot("Sentiment Probability", "Neutral", scores[1])
        self.plot("Sentiment Probability", "Positive", scores[2])

        # Rebalance
        weight = 1 if scores[2] > scores[0] else -0.25
        self.set_holdings(security.symbol, weight, True)
        self._last_rebalance_time = self.time

    def _aggregate_sentiment_scores(self, sentiment_scores):
        n = sentiment_scores.shape[0]
        
        # Generate exponentially increasing weights
        weights = np.exp(np.linspace(0, 1, n))
        
        # Normalize weights to sum to 1
        weights /= weights.sum()
        
        # Apply weights to sentiment scores
        weighted_scores = sentiment_scores * weights[:, np.newaxis]
        
        # Aggregate weighted scores by summing them
        aggregated_scores = weighted_scores.sum(axis=0)
        
        return aggregated_scores