Overall Statistics |
Total Orders 14 Average Win 9.04% Average Loss -9.56% Compounding Annual Return 14.613% Drawdown 35.800% Expectancy -0.027 Start Equity 100000 End Equity 114570.54 Net Profit 14.571% Sharpe Ratio 0.495 Sortino Ratio 0.547 Probabilistic Sharpe Ratio 25.745% Loss Rate 50% Win Rate 50% Profit-Loss Ratio 0.95 Alpha 0.08 Beta -1.626 Annual Standard Deviation 0.574 Annual Variance 0.329 Information Ratio 0.578 Tracking Error 0.708 Treynor Ratio -0.175 Total Fees $59.62 Estimated Strategy Capacity $260000000.00 Lowest Capacity Asset SQQQ UK280CGTCB51 Portfolio Turnover 2.99% |
# region imports from AlgorithmImports import * import tensorflow as tf from transformers import TFBertForSequenceClassification, BertTokenizer, set_seed from pathlib import Path from datasets import Dataset import pytz import torch # endregion class FinbertBaseModelAlgorithm(QCAlgorithm): def initialize(self): self.set_start_date(2022, 1, 1) self.set_end_date(2023, 1, 1) self.set_cash(100_000) spy = Symbol.create("SPY", SecurityType.EQUITY, Market.USA) self.universe_settings.resolution = Resolution.DAILY self.universe_settings.schedule.on(self.date_rules.month_start(spy)) self._universe = self.add_universe( lambda fundamental: [ self.history( [f.symbol for f in sorted(fundamental, key=lambda f: f.dollar_volume)[-10:]], timedelta(365), Resolution.DAILY )['close'].unstack(0).pct_change().iloc[1:].std().idxmax() ] ) set_seed(1, True) self._last_rebalance_time = datetime.min self.schedule.on( self.date_rules.month_start(spy, 1), self.time_rules.midnight, self._trade ) self.set_warm_up(timedelta(30)) self._model_name = "ProsusAI/finbert" self._tokenizer = BertTokenizer.from_pretrained(self._model_name) def on_warmup_finished(self): self._trade() def on_securities_changed(self, changes): for security in changes.removed_securities: self.remove_security(security.dataset_symbol) for security in changes.added_securities: security.dataset_symbol = self.add_data( TiingoNews, security.symbol ).symbol def _trade(self): if (self.is_warming_up or self.time - self._last_rebalance_time < timedelta(14)): return # Get the target security. security = self.securities[list(self._universe.selected)[0]] # Get samples to fine-tune the model samples = pd.DataFrame(columns=['text', 'label']) news_history = self.history(security.dataset_symbol, 30, Resolution.DAILY) if news_history.empty: return news_history = news_history.loc[security.dataset_symbol]['description'] asset_history = self.history( security.symbol, timedelta(30), Resolution.SECOND ).loc[security.symbol]['close'] for i in range(len(news_history.index)-1): # Get factor (article description). factor = news_history.iloc[i] if not factor: continue # Get the label (the market reaction to the news, for now). release_time = self._convert_to_eastern(news_history.index[i]) next_release_time = self._convert_to_eastern(news_history.index[i+1]) reaction_period = asset_history[ (asset_history.index > release_time) & (asset_history.index < next_release_time + timedelta(seconds=1)) ] if reaction_period.empty: continue label = ( (reaction_period.iloc[-1] - reaction_period.iloc[0]) / reaction_period.iloc[0] ) # Save the training sample. samples.loc[len(samples), :] = [factor, label] samples = samples.iloc[-100:] if samples.shape[0] < 10: self.liquidate() return # Classify the market reaction into positive/negative/neutral. # 75% of the most negative labels => class 0 (negative) # 75% of the most postiive labels => class 2 (positive) # Remaining labels => class 1 (netural) sorted_samples = samples.sort_values(by='label', ascending=False).reset_index(drop=True) percent_signed = 0.75 positive_cutoff = ( int(percent_signed * len(sorted_samples[sorted_samples.label > 0])) ) negative_cutoff = ( len(sorted_samples) - int(percent_signed * len(sorted_samples[sorted_samples.label < 0])) ) sorted_samples.loc[list(range(negative_cutoff, len(sorted_samples))), 'label'] = 0 sorted_samples.loc[list(range(positive_cutoff, negative_cutoff)), 'label'] = 1 sorted_samples.loc[list(range(0, positive_cutoff)), 'label'] = 2 # Load the pre-trained model. model = TFBertForSequenceClassification.from_pretrained( self._model_name, num_labels=3, from_pt=True ) # Compile the model. model.compile( optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) ) # Create the training dataset. dataset = Dataset.from_pandas(sorted_samples) dataset = dataset.map( lambda sample: self._tokenizer( sample['text'], padding='max_length', truncation=True ) ) dataset = model.prepare_tf_dataset( dataset, shuffle=True, tokenizer=self._tokenizer ) # Train the model. model.fit(dataset, epochs=2) # Prepare the input sentences. inputs = self._tokenizer( list(samples['text'].values), padding=True, truncation=True, return_tensors='tf' ) # Get the model outputs. outputs = model(**inputs) # Apply softmax to the outputs to get probabilities. scores = tf.nn.softmax(outputs.logits, axis=-1).numpy() scores = self._aggregate_sentiment_scores(scores) self.plot("Sentiment Probability", "Negative", scores[0]) self.plot("Sentiment Probability", "Neutral", scores[1]) self.plot("Sentiment Probability", "Positive", scores[2]) # Rebalance. weight = 1 if scores[2] > scores[0] else -0.25 self.set_holdings(security.symbol, weight, True) self._last_rebalance_time = self.time def _convert_to_eastern(self, dt): return dt.astimezone(pytz.timezone('US/Eastern')).replace(tzinfo=None) def _aggregate_sentiment_scores(self, sentiment_scores): n = sentiment_scores.shape[0] # Generate exponentially increasing weights weights = np.exp(np.linspace(0, 1, n)) # Normalize weights to sum to 1 weights /= weights.sum() # Apply weights to sentiment scores weighted_scores = sentiment_scores * weights[:, np.newaxis] # Aggregate weighted scores by summing them aggregated_scores = weighted_scores.sum(axis=0) return aggregated_scores