Overall Statistics
Total Orders
198
Average Win
0.67%
Average Loss
-0.76%
Compounding Annual Return
6.081%
Drawdown
49.200%
Expectancy
0.725
Start Equity
100000
End Equity
176131.22
Net Profit
76.131%
Sharpe Ratio
0.277
Sortino Ratio
0.31
Probabilistic Sharpe Ratio
0.739%
Loss Rate
8%
Win Rate
92%
Profit-Loss Ratio
0.88
Alpha
-0.045
Beta
1.032
Annual Standard Deviation
0.152
Annual Variance
0.023
Information Ratio
-0.505
Tracking Error
0.083
Treynor Ratio
0.041
Total Fees
$203.72
Estimated Strategy Capacity
$7400000.00
Lowest Capacity Asset
GE R735QTJ8XC9X
Portfolio Turnover
0.10%
#region imports
from AlgorithmImports import *

import statsmodels.api as sm
from sklearn.decomposition import PCA
#endregion


class PcaStatArbitrageAlgorithm(QCAlgorithm):

    def initialize(self):
        self.set_start_date(2010, 1, 1)       # Set Start Date
        self.set_end_date(2019, 8, 1)         # Set End Date
        self.set_cash(100000)                # Set Strategy Cash

        self._next_rebalance = self.time      # Initialize next rebalance time
        self._rebalance_days = 30            # Rebalance every 30 days

        self._lookback = 60                  # Length(days) of historical data
        self._num_components = 3             # Number of principal components in PCA
        self._num_equities = 20              # Number of the equities pool
        self._weights = pd.DataFrame()       # Pandas data frame (index: symbol) that stores the weight

        self.universe_settings.resolution = Resolution.HOUR   # Use hour resolution for speed
        self.add_universe(self._coarse_selection_and_pca)         # Coarse selection + PCA


    def _coarse_selection_and_pca(self, coarse):
        '''Drop securities which have too low prices.
        Select those with highest by dollar volume.
        Finally do PCA and get the selected trading symbols.
        '''

        # Before next rebalance time, just remain the current universe
        if self.time < self._next_rebalance:
            return Universe.UNCHANGED

        ### Simple coarse selection first

        # Sort the equities in DollarVolume decendingly
        selected = sorted([x for x in coarse if x.price > 5],
                          key=lambda x: x.dollar_volume, reverse=True)

        symbols = [x.symbol for x in selected[:self._num_equities]]

        ### After coarse selection, we do PCA and linear regression to get our selected symbols

        # Get historical data of the selected symbols
        history = self.history(symbols, self._lookback, Resolution.DAILY).close.unstack(level=0)

        # Select the desired symbols and their weights for the portfolio from the coarse-selected symbols
        self._weights = self._get_weights(history)

        # If there is no final selected symbols, return the unchanged universe
        if self._weights.empty:
            return Universe.UNCHANGED

        return [x for x in symbols if str(x) in self._weights.index]


    def _get_weights(self, history):
        '''
        Get the finalized selected symbols and their weights according to their level of deviation
        of the residuals from the linear regression after PCA for each symbol
        '''
        # Sample data for PCA (smooth it using np.log function)
        sample = np.log(history.dropna(axis=1))
        sample -= sample.mean() # Center it column-wise

        # Fit the PCA model for sample data
        model = PCA().fit(sample)

        # Get the first n_components factors
        factors = np.dot(sample, model.components_.T)[:,:self._num_components]

        # Add 1's to fit the linear regression (intercept)
        factors = sm.add_constant(factors)

        # Train Ordinary Least Squares linear model for each stock
        ols_models = {ticker: sm.OLS(sample[ticker], factors).fit() for ticker in sample.columns}

        # Get the residuals from the linear regression after PCA for each stock
        resids = pd.DataFrame({ticker: model.resid for ticker, model in ols_models.items()})

        # Get the Z scores by standarize the given pandas dataframe X
        zscores = ((resids - resids.mean()) / resids.std()).iloc[-1] # residuals of the most recent day

        # Get the stocks far from mean (for mean reversion)
        selected = zscores[zscores < -1.5]

        # Return the weights for each selected stock
        weights = selected * (1 / selected.abs().sum())
        return weights.sort_values()


    def on_data(self, data):
        '''
        Rebalance every self._rebalance_days
        '''
        ### Do nothing until next rebalance
        if self.time < self._next_rebalance:
            return

        ### Open positions
        for symbol, weight in self._weights.items():
            # If the residual is way deviated from 0, we enter the position in the opposite way (mean reversion)
            self.set_holdings(symbol, -weight)

        ### Update next rebalance time
        self._next_rebalance = self.time + timedelta(self._rebalance_days)


    def on_securities_changed(self, changes):
        '''
        Liquidate when the symbols are not in the universe
        '''
        for security in changes.removed_securities:
            if security.invested:
                self.liquidate(security.symbol, 'Removed from Universe')