Overall Statistics
Total Orders
1475
Average Win
0.55%
Average Loss
-0.41%
Compounding Annual Return
18.610%
Drawdown
19.000%
Expectancy
0.575
Start Equity
100000
End Equity
559406.86
Net Profit
459.407%
Sharpe Ratio
0.941
Sortino Ratio
0.919
Probabilistic Sharpe Ratio
44.655%
Loss Rate
33%
Win Rate
67%
Profit-Loss Ratio
1.36
Alpha
0.058
Beta
0.68
Annual Standard Deviation
0.131
Annual Variance
0.017
Information Ratio
0.251
Tracking Error
0.108
Treynor Ratio
0.181
Total Fees
$9734.83
Estimated Strategy Capacity
$260000000.00
Lowest Capacity Asset
ZUO WTMF35A5I3QD
Portfolio Turnover
3.48%
#region imports
from AlgorithmImports import *

from scipy import stats
import statsmodels.api as sm
#endregion

class ExpectedIdiosyncraticSkewness(QCAlgorithm):
    def initialize(self):
        self.set_start_date(2009, 7, 1)
        self.set_end_date(2019, 7, 30)
        self.set_cash(100000)

        self._number_of_coarse_symbol = 200
        self._bottom_percent = 0.05
        self._weights = {}
        self._next_rebalance = self.time
        self.AddEquity("SPY", Resolution.DAILY)

        self.universe_settings.resolution = Resolution.DAILY
        self.add_universe(self._coarse_selection_and_skewness_sorting, self._get_weights_in_fine_selection)

    def _coarse_selection_and_skewness_sorting(self, coarse):
        if self.time < self._next_rebalance:
            return Universe.UNCHANGED

        sorted_by_volume = sorted([x for x in coarse if x.has_fundamental_data and x.price >= 5],
                                  key=lambda x: x.dollar_volume, reverse=True)
        high_volume_stocks = [x.symbol for x in sorted_by_volume[:self._number_of_coarse_symbol]]
        
        symbol_and_skew = self._calculate_expected_skewness(high_volume_stocks)
        symbol_and_skew = symbol_and_skew.loc[:math.ceil(self._number_of_coarse_symbol * self._bottom_percent)]

        return [self.symbol(x) for x in symbol_and_skew.symbol.values]

    def _get_weights_in_fine_selection(self, fine):
        self._weights = {f.symbol: f.earning_reports.basic_average_shares.three_months * f.price for f in fine}
        total_cap = sum(self._weights.values())
        self._weights = {k: v / total_cap for k, v in sorted(self._weights.items(), key=lambda kv: kv[1], reverse=True)}
        return [x.symbol for x in fine]

    def on_securities_changed(self, changes):
        for security in changes.removed_securities:
            if security.invested:
                self.liquidate(security.symbol, 'Removed from universe')

    def on_data(self, data):
        if self.time < self._next_rebalance:
            return

        for symbol, weight in self._weights.items():
            if np.isnan(weight) or self.securities[symbol].price == 0:
                continue
            self.set_holdings(symbol, weight)

        self._next_rebalance = Expiry.end_of_month(self.time)

    def _calculate_expected_skewness(self, universe):
        month_end_this = self.time
        month_end_lag_1 = (self.time - timedelta(days=10)).replace(day=1)
        month_end_lag_2 = (month_end_lag_1 - timedelta(days=10)).replace(day=1)
        history = self.history(universe + ['SPY'], month_end_lag_2 - timedelta(days=1), month_end_this, Resolution.DAILY)
        history = history["close"].unstack(level=0)
        daily_returns = (np.log(history) - np.log(history.shift(1)))[1:]

        daily_returns_this = daily_returns[daily_returns.index > month_end_lag_1]
        daily_returns_last = daily_returns[daily_returns.index <= month_end_lag_1]
        daily_returns_dict = {month_end_this: daily_returns_this, month_end_lag_1: daily_returns_last}

        predictor_list = []
        for month, returns in daily_returns_dict.items():
            for symbol in universe:
                if str(symbol) not in returns.columns:
                    predictor_list.append([str(symbol), month, np.nan, np.nan])
                    continue

                Y = returns[str(symbol)].values
                X = returns['SPY'].values  # Assume 'SPY' as market return
                X = sm.add_constant(X)
                results = sm.OLS(Y, X).fit()

                hist_skew, hist_vol = stats.skew(results.resid), stats.tstd(results.resid)
                predictor_list.append([str(symbol), month, hist_skew, hist_vol])

        predictor = pd.DataFrame(predictor_list, columns=['symbol', 'time', 'skew', 'vol'])

        Y = predictor[predictor['time'] == month_end_this]['skew'].values
        X = predictor[predictor['time'] == month_end_lag_1][['skew', 'vol']].values
        X = sm.add_constant(X)
        results = sm.OLS(Y, X, missing='drop').fit()
        coef = results.params

        predictor_t = predictor[predictor['time'] == month_end_this][['skew', 'vol']].values
        ones = np.ones([len(predictor_t), 1])
        predictor_t = np.append(ones, predictor_t, 1)
        exp_skew = np.inner(predictor_t, coef)

        skew_df = predictor[predictor['time'] == month_end_this][['symbol']].reset_index(drop=True)
        skew_df['skew'] = exp_skew
        skew_df = skew_df.sort_values(by=['skew']).reset_index(drop=True)

        return skew_df