Overall Statistics
Total Orders
2093
Average Win
0.56%
Average Loss
-0.40%
Compounding Annual Return
19.362%
Drawdown
21.800%
Expectancy
0.440
Start Equity
100000
End Equity
596186.11
Net Profit
496.186%
Sharpe Ratio
0.914
Sortino Ratio
0.936
Probabilistic Sharpe Ratio
39.581%
Loss Rate
40%
Win Rate
60%
Profit-Loss Ratio
1.39
Alpha
0.049
Beta
0.846
Annual Standard Deviation
0.143
Annual Variance
0.02
Information Ratio
0.347
Tracking Error
0.099
Treynor Ratio
0.155
Total Fees
$8323.09
Estimated Strategy Capacity
$350000000.00
Lowest Capacity Asset
RHAT RN45PLW5RGBP
Portfolio Turnover
5.11%
#region imports
from AlgorithmImports import *

from scipy import stats
import statsmodels.api as sm
#endregion


class ExpectedIdiosyncraticSkewness(QCAlgorithm):
    '''Step 1. Calculating Fama-French daily regression residuals
       Step 2. Using daily residuals to calculate historical monthly moments
       Step 3. Run regression of historical monthly moments to estimate regression coefficients
       Step 4. Using historical monthly moments and estimated coefficients to calculate expected skewness
       Step 5. Sorting symbols by skewness and long the ones with lowest skewness

       Note: Fama-French factors daily data are only available up to 06/30/2019.
       So, backtest is implemented up to end of June, 2019. And, live trading is not feasible for current version.

       Reference: 
       [1] "Expected Idiosyncratic Skewness" by Boyer, Mitton and Vorkink, Rev Financ Stud, June 2009
           URL: https://academic.oup.com/rfs/article-abstract/23/1/169/1578688?redirectedFrom=PDF
       [2] Fama-French official data: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
    '''

    def initialize(self):
        self.set_start_date(2009, 7, 1)                          # Set Start Date: Right after original paper published
        self.set_end_date(2019, 7, 30)                           # Set End Date
        self.set_cash(100000)                                   # Set Strategy Cash

        # Download Fama French factors data as a dataframe
        self._fama_french_factors_per_day = self._get_fama_french_factors()

        self._number_of_coarse_symbol = 200                     # Set the number of coarse symbol to be further filtered by expected skewness
        self._bottom_percent = 0.05                             # Set the bottom percent to long out of coarse symbols according to skewness ranking
        self._weights = {}                                      # Dictionary to save desired weights with symbols as key
        self._next_rebalance = self.time                         # Define next rebalance time

        self.universe_settings.resolution = Resolution.DAILY    # Subscribe daily data for selected symbols in universe
        self.add_universe(self._coarse_selection_and_skewness_sorting, self._get_weights_in_fine_selection)

    def _coarse_selection_and_skewness_sorting(self, coarse):
        '''Coarse selection to get an initial universe for the skewness sorting trade logic.
           Then, select the symbols to trade monthly based on skewness sorting.
        '''
        # Before next rebalance time, keep the current universe unchanged
        if self.time < self._next_rebalance:
            return Universe.UNCHANGED

        ### Run the coarse selection to narrow down the universe
        # Filter stocks by price and whether they have fundamental data in QC
        # Then, sort descendingly by daily dollar volume
        sorted_by_volume = sorted([ x for x in coarse if x.has_fundamental_data and x.price > 5 ],
                                    key=lambda x: x.dollar_volume, reverse=True)
        high_volume_stocks = [ x.symbol for x in sorted_by_volume[:self._number_of_coarse_symbol] ]

        ### Select symbols to trade based on expected skewness at each month end

        # Estimate expected idiosyncratic skewness and select the lowest 5%
        symbol_and_skew = self._calculate_expected_skewness(high_volume_stocks)
        symbol_and_skew = symbol_and_skew.loc[:math.ceil(self._number_of_coarse_symbol * self._bottom_percent)]

        # Return the symbols
        return [self.symbol(x) for x in symbol_and_skew.symbol.values]

    def _get_weights_in_fine_selection(self, fine):
        '''Get fine fundamental data and calculate portfolio weights based on market capitalization
        '''
        # Calculate market cap as shares outstanding multiplied by stock price
        self._weights = { f.symbol: f.earning_reports.basic_average_shares.three_months * f.price
            for f in fine }

        # Form value-weighted portfolio
        total_cap = sum(self._weights.values())

        # Compute the weights and sort them descendingly to place bigger orders first
        self._weights = { k: v / total_cap 
            for k, v in sorted(self._weights.items(), key=lambda kv: kv[1], reverse=True) }

        return [ x.symbol for x in fine ]

    def on_securities_changed(self, changes):
        '''Liquidate symbols that are removed from the dynamic universe
        '''
        for security in changes.removed_securities:
            if security.invested:
                self.liquidate(security.symbol, 'Removed from universe')

    def on_data(self, data):
        '''Rebalance the porfolio once a month with weights based on market cap
        '''
        # Before next rebalance, do nothing
        if self.time < self._next_rebalance:
            return

        # Placing orders
        for symbol, weight in self._weights.items():
            if np.isnan(weight) or self.securities[symbol].price == 0:
                continue
            self.set_holdings(symbol, weight)

        # Rebalance at the end of every month
        self._next_rebalance = Expiry.end_of_month(self.time)

    def _calculate_expected_skewness(self, universe):
        '''Calculate expected skewness using historical moments and estimated regression coefficients
        '''

        ### Get predictors

        # Get historical returns for two months
        month_end_this = self.time
        month_end_lag_1 = (self.time - timedelta(days = 10)).replace(day = 1)
        month_end_lag_2 = (month_end_lag_1 - timedelta(days = 10)).replace(day = 1)
        history = self.history(universe, month_end_lag_2 - timedelta(days = 1), month_end_this, Resolution.DAILY)
        history = history["close"].unstack(level = 0)
        daily_returns = (np.log(history) - np.log(history.shift(1)))[1:]

        # Merge Fama-French factors to daily returns based on dates available in return series
        daily_returns.reset_index(inplace=True)
        daily_returns = daily_returns.merge(self._fama_french_factors_per_day, left_on = 'time', right_on = 'time')

        daily_returns_this = daily_returns[daily_returns['time'] > month_end_lag_1]
        daily_returns_last = daily_returns[daily_returns['time'] <= month_end_lag_1]
        daily_returns_dict = {month_end_this: daily_returns_this, month_end_lag_1: daily_returns_last}

        # For each stock and each month, run fama-french time-series regression and calculate historical moments
        column_list = list(daily_returns.columns)
        predictor_list = []

        for month, returns in daily_returns_dict.items():

            for symbol in universe:

                if str(symbol) not in column_list:
                    predictor_list.append([str(symbol), month, np.nan, np.nan])
                    continue

                Y = (returns[str(symbol)] - returns['RF']).values
                X = returns[['Mkt_RF', 'SMB', 'HML']].values
                X = sm.add_constant(X)
                results = sm.OLS(Y, X).fit()

                # Use daily residual to calculate monthly moments
                hist_skew, hist_vol = stats.skew(results.resid), stats.tstd(results.resid)
                predictor_list.append([str(symbol), month, hist_skew, hist_vol])

        predictor = pd.DataFrame(predictor_list, columns=['symbol', 'time', 'skew', 'vol'])

        ### Estimate coefficients by regressing current skewness on historical moments
        Y = predictor[predictor['time'] == month_end_this]['skew'].values
        X = predictor[predictor['time'] == month_end_lag_1][['skew', 'vol']].values
        X = sm.add_constant(X)
        results = sm.OLS(Y, X, missing='drop').fit()
        coef = results.params

        ### Calculate expected skewness
        predictor_t = predictor[predictor['time'] == month_end_this][['skew', 'vol']].values
        ones = np.ones([len(predictor_t), 1])
        predictor_t = np.append(ones, predictor_t, 1)
        exp_skew = np.inner(predictor_t, coef)

        skew_df = predictor[predictor['time'] == month_end_this][['symbol']].reset_index(drop=True)
        skew_df.loc[:,'skew'] = exp_skew
        skew_df = skew_df.sort_values(by=['skew']).reset_index(drop=True)

        return skew_df

    def _get_fama_french_factors(self):
        '''Download fama-french factors data from Github repo and read it as a DataFrame.
           Data is originally from Kenneth French's official homepage. I unzip the data folder and upload to Github repo.
        '''
        content = self.download("https://raw.githubusercontent.com/QuantConnect/Tutorials/master/04%20Strategy%20Library/354%20Expected%20Idiosyncratic%20Skewness/F-F_Research_Data_Factors_daily.CSV")

        # Drop the first 5 and last 2 lines which are not data that we need
        data = content.splitlines()
        data = [x.split(',') for x in data[5:-2]]         

        df = pd.DataFrame(data, columns = ['time', 'Mkt_RF', 'SMB', 'HML', 'RF'], dtype=np.float64)
        # Add one day to match Lean behavior: daily data is timestamped at UTC 00:00 the next day from the actual day that produced the data
        df.time = pd.to_datetime(df.time, format='%Y%m%d') + timedelta(1)
        return df.set_index('time')