Overall Statistics
Total Orders
15
Average Win
14.41%
Average Loss
-3.32%
Compounding Annual Return
123.934%
Drawdown
16.900%
Expectancy
2.818
Start Equity
100000
End Equity
223439.95
Net Profit
123.440%
Sharpe Ratio
2.286
Sortino Ratio
3.193
Probabilistic Sharpe Ratio
80.796%
Loss Rate
29%
Win Rate
71%
Profit-Loss Ratio
4.34
Alpha
0.833
Beta
-0.197
Annual Standard Deviation
0.375
Annual Variance
0.141
Information Ratio
2.213
Tracking Error
0.444
Treynor Ratio
-4.359
Total Fees
$124.93
Estimated Strategy Capacity
$140000000.00
Lowest Capacity Asset
SQQQ UK280CGTCB51
Portfolio Turnover
1.92%
# region imports
from AlgorithmImports import *

import tensorflow as tf
from transformers import TFBertForSequenceClassification, BertTokenizer, set_seed
from pathlib import Path
# endregion

class FinbertBaseModelAlgorithm(QCAlgorithm):

    def initialize(self):
        self.set_start_date(2022, 1, 1)
        self.set_end_date(2023, 1, 1)
        self.set_cash(100_000)

        spy = Symbol.create("SPY", SecurityType.EQUITY, Market.USA)
        self.universe_settings.resolution = Resolution.DAILY
        self.universe_settings.schedule.on(self.date_rules.month_start(spy))
        self._universe = self.add_universe(
            lambda fundamental: [
                self.history(
                    [f.symbol for f in sorted(fundamental, key=lambda f: f.dollar_volume)[-10:]], 
                    timedelta(365), Resolution.DAILY
                )['close'].unstack(0).pct_change().iloc[1:].std().idxmax()
            ]
        )

        set_seed(1, True)
        
        # Load the tokenizer and the model
        model_path = "ProsusAI/finbert"
        self._tokenizer = BertTokenizer.from_pretrained(model_path, local_files_only=True)
        self._model = TFBertForSequenceClassification.from_pretrained(model_path, local_files_only=True)

        self._last_rebalance_time = datetime.min
        self.schedule.on(
            self.date_rules.month_start(spy, 1),
            self.time_rules.midnight,
            self._trade
        )

        self.set_warm_up(timedelta(30))

    def on_warmup_finished(self):
        self._trade()

    def on_securities_changed(self, changes):
        for security in changes.removed_securities:
            self.remove_security(security.dataset_symbol)
        for security in changes.added_securities:
            security.dataset_symbol = self.add_data(TiingoNews, security.symbol).symbol

    def _trade(self):
        if self.is_warming_up or self.time - self._last_rebalance_time < timedelta(14):
            return

        # Get the target security.
        security = self.securities[list(self._universe.selected)[0]]

        # Get the latest news articles.
        articles = self.history[TiingoNews](security.dataset_symbol, 10, Resolution.DAILY)
        article_text = [article.description for article in articles]
        if not article_text:
            return

        # Prepare the input sentences
        inputs = self._tokenizer(article_text, padding=True, truncation=True, return_tensors='tf')

        # Get the model outputs
        outputs = self._model(**inputs)

        # Apply softmax to the outputs to get probabilities
        scores = tf.nn.softmax(outputs.logits, axis=-1).numpy()
        self.log(f"{str(scores)}")
        scores = self._aggregate_sentiment_scores(scores)
        
        self.plot("Sentiment Probability", "Negative", scores[0])
        self.plot("Sentiment Probability", "Neutral", scores[1])
        self.plot("Sentiment Probability", "Positive", scores[2])

        # Rebalance
        weight = 1 if scores[2] > scores[0] else -0.25
        self.set_holdings(security.symbol, weight, True)
        self._last_rebalance_time = self.time

    def _aggregate_sentiment_scores(self, sentiment_scores):
        n = sentiment_scores.shape[0]
        
        # Generate exponentially increasing weights
        weights = np.exp(np.linspace(0, 1, n))
        
        # Normalize weights to sum to 1
        weights /= weights.sum()
        
        # Apply weights to sentiment scores
        weighted_scores = sentiment_scores * weights[:, np.newaxis]
        
        # Aggregate weighted scores by summing them
        aggregated_scores = weighted_scores.sum(axis=0)
        
        return aggregated_scores