Overall Statistics |
Total Trades 1450 Average Win 0.31% Average Loss -0.20% Compounding Annual Return 2.534% Drawdown 16.000% Expectancy 0.098 Net Profit 14.487% Sharpe Ratio 0.366 Probabilistic Sharpe Ratio 4.746% Loss Rate 56% Win Rate 44% Profit-Loss Ratio 1.50 Alpha 0.006 Beta 0.148 Annual Standard Deviation 0.053 Annual Variance 0.003 Information Ratio -0.626 Tracking Error 0.109 Treynor Ratio 0.132 Total Fees $3369.15 |
# coding: utf-8 # Natural Language Toolkit: vader # # Copyright (C) 2001-2019 NLTK Project # Author: C.J. Hutto <Clayton.Hutto@gtri.gatech.edu> # Ewan Klein <ewan@inf.ed.ac.uk> (modifications) # Pierpaolo Pantone <24alsecondo@gmail.com> (modifications) # George Berry <geb97@cornell.edu> (modifications) # URL: <http://nltk.org/> # For license information, see LICENSE.TXT # # Modifications to the original VADER code have been made in order to # integrate it into NLTK. These have involved changes to # ensure Python 3 compatibility, and refactoring to achieve greater modularity. """ If you use the VADER sentiment analysis tools, please cite: Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. """ import math import re import string from itertools import product from itertools import tee #import nltk.data #from .util import pairwise ##Constants## # (empirically derived mean sentiment intensity rating increase for booster words) B_INCR = 0.293 B_DECR = -0.293 # (empirically derived mean sentiment intensity rating increase for using # ALLCAPs to emphasize a word) C_INCR = 0.733 N_SCALAR = -0.74 # for removing punctuation REGEX_REMOVE_PUNCTUATION = re.compile('[{0}]'.format(re.escape(string.punctuation))) PUNC_LIST = [ ".", "!", "?", ",", ";", ":", "-", "'", "\"", "!!", "!!!", "??", "???", "?!?", "!?!", "?!?!", "!?!?", ] NEGATE = { "aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt", "ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't", "dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither", "don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't", "neednt", "needn't", "never", "none", "nope", "nor", "not", "nothing", "nowhere", "oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent", "oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't", "without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite", } # booster/dampener 'intensifiers' or 'degree adverbs' # http://en.wiktionary.org/wiki/Category:English_degree_adverbs BOOSTER_DICT = { "absolutely": B_INCR, "amazingly": B_INCR, "awfully": B_INCR, "completely": B_INCR, "considerably": B_INCR, "decidedly": B_INCR, "deeply": B_INCR, "effing": B_INCR, "enormously": B_INCR, "entirely": B_INCR, "especially": B_INCR, "exceptionally": B_INCR, "extremely": B_INCR, "fabulously": B_INCR, "flipping": B_INCR, "flippin": B_INCR, "fricking": B_INCR, "frickin": B_INCR, "frigging": B_INCR, "friggin": B_INCR, "fully": B_INCR, "fucking": B_INCR, "greatly": B_INCR, "hella": B_INCR, "highly": B_INCR, "hugely": B_INCR, "incredibly": B_INCR, "intensely": B_INCR, "majorly": B_INCR, "more": B_INCR, "most": B_INCR, "particularly": B_INCR, "purely": B_INCR, "quite": B_INCR, "really": B_INCR, "remarkably": B_INCR, "so": B_INCR, "substantially": B_INCR, "thoroughly": B_INCR, "totally": B_INCR, "tremendously": B_INCR, "uber": B_INCR, "unbelievably": B_INCR, "unusually": B_INCR, "utterly": B_INCR, "very": B_INCR, "almost": B_DECR, "barely": B_DECR, "hardly": B_DECR, "just enough": B_DECR, "kind of": B_DECR, "kinda": B_DECR, "kindof": B_DECR, "kind-of": B_DECR, "less": B_DECR, "little": B_DECR, "marginally": B_DECR, "occasionally": B_DECR, "partly": B_DECR, "scarcely": B_DECR, "slightly": B_DECR, "somewhat": B_DECR, "sort of": B_DECR, "sorta": B_DECR, "sortof": B_DECR, "sort-of": B_DECR, } # check for special case idioms using a sentiment-laden keyword known to SAGE SPECIAL_CASE_IDIOMS = { "the shit": 3, "the bomb": 3, "bad ass": 1.5, "yeah right": -2, "cut the mustard": 2, "kiss of death": -1.5, "hand to mouth": -2, } ##Static methods## def negated(input_words, include_nt=True): """ Determine if input contains negation words """ neg_words = NEGATE if any(word.lower() in neg_words for word in input_words): return True if include_nt: if any("n't" in word.lower() for word in input_words): return True for first, second in pairwise(input_words): if second.lower() == "least" and first.lower() != 'at': return True return False def normalize(score, alpha=15): """ Normalize the score to be between -1 and 1 using an alpha that approximates the max expected value """ norm_score = score / math.sqrt((score * score) + alpha) return norm_score def allcap_differential(words): """ Check whether just some words in the input are ALL CAPS :param list words: The words to inspect :returns: `True` if some but not all items in `words` are ALL CAPS """ is_different = False allcap_words = 0 for word in words: if word.isupper(): allcap_words += 1 cap_differential = len(words) - allcap_words if 0 < cap_differential < len(words): is_different = True return is_different def scalar_inc_dec(word, valence, is_cap_diff): """ Check if the preceding words increase, decrease, or negate/nullify the valence """ scalar = 0.0 word_lower = word.lower() if word_lower in BOOSTER_DICT: scalar = BOOSTER_DICT[word_lower] if valence < 0: scalar *= -1 # check if booster/dampener word is in ALLCAPS (while others aren't) if word.isupper() and is_cap_diff: if valence > 0: scalar += C_INCR else: scalar -= C_INCR return scalar class SentiText(object): """ Identify sentiment-relevant string-level properties of input text. """ def __init__(self, text): if not isinstance(text, str): text = str(text.encode('utf-8')) self.text = text self.words_and_emoticons = self._words_and_emoticons() # doesn't separate words from\ # adjacent punctuation (keeps emoticons & contractions) self.is_cap_diff = allcap_differential(self.words_and_emoticons) def _words_plus_punc(self): """ Returns mapping of form: { 'cat,': 'cat', ',cat': 'cat', } """ no_punc_text = REGEX_REMOVE_PUNCTUATION.sub('', self.text) # removes punctuation (but loses emoticons & contractions) words_only = no_punc_text.split() # remove singletons words_only = set(w for w in words_only if len(w) > 1) # the product gives ('cat', ',') and (',', 'cat') punc_before = {''.join(p): p[1] for p in product(PUNC_LIST, words_only)} punc_after = {''.join(p): p[0] for p in product(words_only, PUNC_LIST)} words_punc_dict = punc_before words_punc_dict.update(punc_after) return words_punc_dict def _words_and_emoticons(self): """ Removes leading and trailing puncutation Leaves contractions and most emoticons Does not preserve punc-plus-letter emoticons (e.g. :D) """ wes = self.text.split() words_punc_dict = self._words_plus_punc() wes = [we for we in wes if len(we) > 1] for i, we in enumerate(wes): if we in words_punc_dict: wes[i] = words_punc_dict[we] return wes class SentimentIntensityAnalyzer(object): """ Give a sentiment intensity score to sentences. """ def __init__(self, lexicon_file): self.lexicon_file = lexicon_file self.lexicon = self.make_lex_dict() def make_lex_dict(self): """ Convert lexicon file to a dictionary """ lex_dict = {} for line in self.lexicon_file.split('\n'): (word, measure) = line.strip().split('\t')[0:2] lex_dict[word] = float(measure) return lex_dict def polarity_scores(self, text): """ Return a float for sentiment strength based on the input text. Positive values are positive valence, negative value are negative valence. """ sentitext = SentiText(text) # text, words_and_emoticons, is_cap_diff = self.preprocess(text) sentiments = [] words_and_emoticons = sentitext.words_and_emoticons for item in words_and_emoticons: valence = 0 i = words_and_emoticons.index(item) if ( i < len(words_and_emoticons) - 1 and item.lower() == "kind" and words_and_emoticons[i + 1].lower() == "of" ) or item.lower() in BOOSTER_DICT: sentiments.append(valence) continue sentiments = self.sentiment_valence(valence, sentitext, item, i, sentiments) sentiments = self._but_check(words_and_emoticons, sentiments) return self.score_valence(sentiments, text) def sentiment_valence(self, valence, sentitext, item, i, sentiments): is_cap_diff = sentitext.is_cap_diff words_and_emoticons = sentitext.words_and_emoticons item_lowercase = item.lower() if item_lowercase in self.lexicon: # get the sentiment valence valence = self.lexicon[item_lowercase] # check if sentiment laden word is in ALL CAPS (while others aren't) if item.isupper() and is_cap_diff: if valence > 0: valence += C_INCR else: valence -= C_INCR for start_i in range(0, 3): if ( i > start_i and words_and_emoticons[i - (start_i + 1)].lower() not in self.lexicon ): # dampen the scalar modifier of preceding words and emoticons # (excluding the ones that immediately preceed the item) based # on their distance from the current item. s = scalar_inc_dec( words_and_emoticons[i - (start_i + 1)], valence, is_cap_diff ) if start_i == 1 and s != 0: s = s * 0.95 if start_i == 2 and s != 0: s = s * 0.9 valence = valence + s valence = self._never_check( valence, words_and_emoticons, start_i, i ) if start_i == 2: valence = self._idioms_check(valence, words_and_emoticons, i) # future work: consider other sentiment-laden idioms # other_idioms = # {"back handed": -2, "blow smoke": -2, "blowing smoke": -2, # "upper hand": 1, "break a leg": 2, # "cooking with gas": 2, "in the black": 2, "in the red": -2, # "on the ball": 2,"under the weather": -2} valence = self._least_check(valence, words_and_emoticons, i) sentiments.append(valence) return sentiments def _least_check(self, valence, words_and_emoticons, i): # check for negation case using "least" if ( i > 1 and words_and_emoticons[i - 1].lower() not in self.lexicon and words_and_emoticons[i - 1].lower() == "least" ): if ( words_and_emoticons[i - 2].lower() != "at" and words_and_emoticons[i - 2].lower() != "very" ): valence = valence * N_SCALAR elif ( i > 0 and words_and_emoticons[i - 1].lower() not in self.lexicon and words_and_emoticons[i - 1].lower() == "least" ): valence = valence * N_SCALAR return valence def _but_check(self, words_and_emoticons, sentiments): # check for modification in sentiment due to contrastive conjunction 'but' if 'but' in words_and_emoticons or 'BUT' in words_and_emoticons: try: bi = words_and_emoticons.index('but') except ValueError: bi = words_and_emoticons.index('BUT') for sentiment in sentiments: si = sentiments.index(sentiment) if si < bi: sentiments.pop(si) sentiments.insert(si, sentiment * 0.5) elif si > bi: sentiments.pop(si) sentiments.insert(si, sentiment * 1.5) return sentiments def _idioms_check(self, valence, words_and_emoticons, i): onezero = "{0} {1}".format(words_and_emoticons[i - 1], words_and_emoticons[i]) twoonezero = "{0} {1} {2}".format( words_and_emoticons[i - 2], words_and_emoticons[i - 1], words_and_emoticons[i], ) twoone = "{0} {1}".format( words_and_emoticons[i - 2], words_and_emoticons[i - 1] ) threetwoone = "{0} {1} {2}".format( words_and_emoticons[i - 3], words_and_emoticons[i - 2], words_and_emoticons[i - 1], ) threetwo = "{0} {1}".format( words_and_emoticons[i - 3], words_and_emoticons[i - 2] ) sequences = [onezero, twoonezero, twoone, threetwoone, threetwo] for seq in sequences: if seq in SPECIAL_CASE_IDIOMS: valence = SPECIAL_CASE_IDIOMS[seq] break if len(words_and_emoticons) - 1 > i: zeroone = "{0} {1}".format( words_and_emoticons[i], words_and_emoticons[i + 1] ) if zeroone in SPECIAL_CASE_IDIOMS: valence = SPECIAL_CASE_IDIOMS[zeroone] if len(words_and_emoticons) - 1 > i + 1: zeroonetwo = "{0} {1} {2}".format( words_and_emoticons[i], words_and_emoticons[i + 1], words_and_emoticons[i + 2], ) if zeroonetwo in SPECIAL_CASE_IDIOMS: valence = SPECIAL_CASE_IDIOMS[zeroonetwo] # check for booster/dampener bi-grams such as 'sort of' or 'kind of' if threetwo in BOOSTER_DICT or twoone in BOOSTER_DICT: valence = valence + B_DECR return valence def _never_check(self, valence, words_and_emoticons, start_i, i): if start_i == 0: if negated([words_and_emoticons[i - 1]]): valence = valence * N_SCALAR if start_i == 1: if words_and_emoticons[i - 2] == "never" and ( words_and_emoticons[i - 1] == "so" or words_and_emoticons[i - 1] == "this" ): valence = valence * 1.5 elif negated([words_and_emoticons[i - (start_i + 1)]]): valence = valence * N_SCALAR if start_i == 2: if ( words_and_emoticons[i - 3] == "never" and ( words_and_emoticons[i - 2] == "so" or words_and_emoticons[i - 2] == "this" ) or ( words_and_emoticons[i - 1] == "so" or words_and_emoticons[i - 1] == "this" ) ): valence = valence * 1.25 elif negated([words_and_emoticons[i - (start_i + 1)]]): valence = valence * N_SCALAR return valence def _punctuation_emphasis(self, sum_s, text): # add emphasis from exclamation points and question marks ep_amplifier = self._amplify_ep(text) qm_amplifier = self._amplify_qm(text) punct_emph_amplifier = ep_amplifier + qm_amplifier return punct_emph_amplifier def _amplify_ep(self, text): # check for added emphasis resulting from exclamation points (up to 4 of them) ep_count = text.count("!") if ep_count > 4: ep_count = 4 # (empirically derived mean sentiment intensity rating increase for # exclamation points) ep_amplifier = ep_count * 0.292 return ep_amplifier def _amplify_qm(self, text): # check for added emphasis resulting from question marks (2 or 3+) qm_count = text.count("?") qm_amplifier = 0 if qm_count > 1: if qm_count <= 3: # (empirically derived mean sentiment intensity rating increase for # question marks) qm_amplifier = qm_count * 0.18 else: qm_amplifier = 0.96 return qm_amplifier def _sift_sentiment_scores(self, sentiments): # want separate positive versus negative sentiment scores pos_sum = 0.0 neg_sum = 0.0 neu_count = 0 for sentiment_score in sentiments: if sentiment_score > 0: pos_sum += ( float(sentiment_score) + 1 ) # compensates for neutral words that are counted as 1 if sentiment_score < 0: neg_sum += ( float(sentiment_score) - 1 ) # when used with math.fabs(), compensates for neutrals if sentiment_score == 0: neu_count += 1 return pos_sum, neg_sum, neu_count def score_valence(self, sentiments, text): if sentiments: sum_s = float(sum(sentiments)) # compute and add emphasis from punctuation in text punct_emph_amplifier = self._punctuation_emphasis(sum_s, text) if sum_s > 0: sum_s += punct_emph_amplifier elif sum_s < 0: sum_s -= punct_emph_amplifier compound = normalize(sum_s) # discriminate between positive, negative and neutral sentiment scores pos_sum, neg_sum, neu_count = self._sift_sentiment_scores(sentiments) if pos_sum > math.fabs(neg_sum): pos_sum += punct_emph_amplifier elif pos_sum < math.fabs(neg_sum): neg_sum -= punct_emph_amplifier total = pos_sum + math.fabs(neg_sum) + neu_count pos = math.fabs(pos_sum / total) neg = math.fabs(neg_sum / total) neu = math.fabs(neu_count / total) else: compound = 0.0 pos = 0.0 neg = 0.0 neu = 0.0 sentiment_dict = { "neg": round(neg, 3), "neu": round(neu, 3), "pos": round(pos, 3), "compound": round(compound, 4), } return sentiment_dict def pairwise(iterable): """s -> (s0,s1), (s1,s2), (s2, s3), ...""" a, b = tee(iterable) next(b, None) return zip(a, b)
from QuantConnect.Data.Custom.Tiingo import * import nltk from vaderSentiment import SentimentIntensityAnalyzer, pairwise from datetime import datetime, timedelta import numpy as np class CompetitionExampleAlgorithm(QCAlgorithm): def Initialize(self): self.SetStartDate(2014, 10, 1) self.SetCash(100000) ## Set Universe Selection Model self.SetUniverseSelection(TechnologyETFUniverse()) # download the data self.vaderData = self.Download("https://www.dropbox.com/s/q5udnl4ou35o78f/vader_lexicon.txt?dl=1") ## Set Alpha Model self.SetAlpha(NewsSentimentAlphaModel(self.vaderData)) ## Set Portfolio Construction Model self.SetPortfolioConstruction(InsightWeightingPortfolioConstructionModel()) ## Set Execution Model self.SetExecution(ImmediateExecutionModel()) ## Set Risk Management Model self.SetRiskManagement(NullRiskManagementModel()) class NewsSentimentAlphaModel: def __init__(self): self.day = -1 self.custom = [] self.vaderData = vaderData #self.vaderData = CompetitionExampleAlgorithm.Download("https://www.dropbox.com/s/q5udnl4ou35o78f/vader_lexicon.txt?dl=1") def Update(self, algorithm, data): insights = [] # Run the model daily if algorithm.Time.day == self.day: return insights self.day = algorithm.Time.day weights = {} # Fetch the wordSentiment data for the active securities and trade on any for security in self.custom: if not data.ContainsKey(security): continue news = data[security] #use the Vader Sentiment Package sid = SentimentIntensityAnalyzer(lexicon_file=self.vaderData) #sid = SentimentAnalyzer() sentiment = sid.polarity_scores(news.Description.lower()) if sentiment["compound"] > 0: weights[security.Underlying] = sentiment["compound"] # Sort securities by sentiment ranking, count = min(10, len(weights)) if count == 0: return insights # Order the sentiment by value and select the top 10 sortedbyValue = sorted(weights.items(), key = lambda x:x[1], reverse=True) selected = {kv[0]:kv[1] for kv in sortedbyValue[:count]} # Populate the list of insights with the selected data where the sentiment sign is the direction and its value is the weight closeTimeLocal = Expiry.EndOfDay(algorithm.Time) for symbol, weight in selected.items(): insights.append(Insight.Price(symbol, closeTimeLocal, InsightDirection.Up, None, None, None, abs(weight))) return insights def OnSecuritiesChanged(self, algorithm, changes): for security in changes.AddedSecurities: # Tiingo's News is for US Equities if security.Type == SecurityType.Equity: self.custom.append(algorithm.AddData(TiingoNews, security.Symbol).Symbol)