Backtest

Overall Statistics
Total Trades 1957 Average Win 0.33% Average Loss -0.34% Compounding Annual Return 15.286% Drawdown 28.300% Expectancy 0.099 Net Profit 47.941% Sharpe Ratio 0.773 Probabilistic Sharpe Ratio 30.684% Loss Rate 44% Win Rate 56% Profit-Loss Ratio 0.96 Alpha 0.108 Beta 0.312 Annual Standard Deviation 0.185 Annual Variance 0.034 Information Ratio 0.132 Tracking Error 0.226 Treynor Ratio 0.459 Total Fees $8896.57 Estimated Strategy Capacity $630000.00 Lowest Capacity Asset TREE U5BI1YAU3NMT

### PRODUCT INFORMATION --------------------------------------------------------------------------------
# Copyright Emilio Freire Bauzano
# Use entirely at your own risk.
# This algorithm contains open source code from other sources and no claim is being made to such code.
# Do not remove this copyright notice.
### ----------------------------------------------------------------------------------------------------

#from FactorModelUniverseSelection import FactorModelUniverseSelectionModel
from LiquidGrowthUniverse import SMIDGrowth

from LongShortAlphaCreation import LongShortAlphaCreationModel
from CustomEqualWeightingPortfolioConstruction import CustomEqualWeightingPortfolioConstructionModel

class LongShortEquityFrameworkAlgorithm(QCAlgorithmFramework):
    
    '''
    Trading Logic:
        Long-Short Equity Strategy using factor modelling
    Modules:
        Universe:
            - Final selection based on factor modelling:
                Combination of technical and fundamental factors
            - Long the Top N stocks
            - Short the Bottom N stocks
        Alpha: Creation of Up/Down Insights at the Market Open:
            - Up Insights (to go Long)
            - Down Insights (to go Short)
        Portfolio:
            Equal-Weighting Portfolio with monthly rebalancing
        Execution:
            Immediate Execution with Market Orders
        Risk:
            Null
    '''

    def Initialize(self):
        
        ### user-defined inputs ---------------------------------------------------------------------------

        self.SetStartDate(2018, 1, 1)   # set start date
        self.SetEndDate(2020, 10, 1)    # set end date
        self.SetCash(1000000)           # set strategy cash
        
        # select benchmark ticker
        benchmark = 'SPY'
        
        # date rule for rebalancing our portfolio by updating long-short positions based on factor values
        rebalancingFunc = Expiry.EndOfMonth
        
        # number of stocks to keep for factor modelling calculations
        nStocks = 100
        
        # number of positions to hold on each side (long/short)
        positionsOnEachSide = 20
        
        # lookback for historical data to calculate factors
        lookback = 252
        
        # select the leverage factor
        leverageFactor = 1
        
        ### --------------------------------------------------------------------------------------------------
        
        # calculate initialAllocationPerSecurity and maxNumberOfPositions
        initialAllocationPerSecurity = (1 / positionsOnEachSide) * leverageFactor
        maxNumberOfPositions = positionsOnEachSide * 2
        
        # set requested data resolution
        self.UniverseSettings.Resolution = Resolution.Hour
        # add leverage to new securities (this does not add leverage to current holdings in the account)
        leverageNeeded = max(1, maxNumberOfPositions * initialAllocationPerSecurity * leverageFactor)
        self.UniverseSettings.Leverage = leverageNeeded + 1
        self.UniverseSettings.DataNormalizationMode = DataNormalizationMode.Raw
        self.UniverseSettings.FillForward = False
        
        # set the brokerage model for slippage and fees
        self.SetSecurityInitializer(self.security_initializer)
        #self.SetBrokerageModel(BrokerageName.InteractiveBrokersBrokerage, AccountType.Cash)
        self.SetBrokerageModel(BrokerageName.InteractiveBrokersBrokerage, AccountType.Margin)
        
        # let's plot the series of daily total portfolio exposure %
        portfolioExposurePlot = Chart('Chart Total Portfolio Exposure %')
        portfolioExposurePlot.AddSeries(Series('Daily Portfolio Exposure %', SeriesType.Line, ''))
        self.AddChart(portfolioExposurePlot)
        
        # let's plot the series of daily number of open longs and shorts
        nLongShortPlot = Chart('Chart Number Of Longs/Shorts')
        nLongShortPlot.AddSeries(Series('Daily N Longs', SeriesType.Line, ''))
        nLongShortPlot.AddSeries(Series('Daily N Shorts', SeriesType.Line, ''))
        self.AddChart(nLongShortPlot)
        
        # let's plot the series of drawdown % from the most recent high
        drawdownPlot = Chart('Chart Drawdown %')
        drawdownPlot.AddSeries(Series('Drawdown %', SeriesType.Line, '%'))
        self.AddChart(drawdownPlot)
        
        # add benchmark
        self.SetBenchmark(benchmark)
        
        # select modules
        '''
        self.SetUniverseSelection(FactorModelUniverseSelectionModel(benchmark = benchmark,
                                                                    nStocks = nStocks,
                                                                    lookback = lookback,
                                                                    maxNumberOfPositions = maxNumberOfPositions,
                                                                    rebalancingFunc = rebalancingFunc))
        '''
        self.SetUniverseSelection(SMIDGrowth(benchmark = benchmark, 
                                            maxNumberOfPositions = maxNumberOfPositions,
                                            rebalancingFunc = rebalancingFunc))
        
        self.SetAlpha(LongShortAlphaCreationModel(maxNumberOfPositions = maxNumberOfPositions, lookback = lookback))
        
        self.SetPortfolioConstruction(CustomEqualWeightingPortfolioConstructionModel(initialAllocationPerSecurity = initialAllocationPerSecurity,
                                                                                    rebalancingFunc = rebalancingFunc))
                                                                                    
        self.SetExecution(ImmediateExecutionModel())
        
        self.SetRiskManagement(NullRiskManagementModel())
        
        
    def security_initializer(self, security):
            '''
            Description:
                Initialize the security with adjusted prices
            Args:
                security: Security which characteristics we want to change
            '''
            
            #security.SetDataNormalizationMode(DataNormalizationMode.Adjusted)
            #security.SetMarketPrice = self.GetLastKnownPrice(security)
            
            if security.Type == SecurityType.Option:
                security.SetMarketPrice(self.GetLastKnownPrice(security))

from clr import AddReference
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Algorithm.Framework import *
from QuantConnect.Algorithm.Framework.Alphas import AlphaModel, Insight, InsightType, InsightDirection

from HelperFunctions import GetFundamentalDataDict, MakeCalculations, GetLongShortLists
from datetime import timedelta, datetime
import pandas as pd
import numpy as np

class LongShortAlphaCreationModel(AlphaModel):

    def __init__(self, maxNumberOfPositions = 10, lookback = 252):
        
        self.maxNumberOfPositions = maxNumberOfPositions
        self.lookback = lookback
        
        self.securities = []
        self.day = 0

    def Update(self, algorithm, data):
        
        insights = [] # list to store the new insights to be created
        
        if algorithm.Time.day != self.day and algorithm.Time.hour > 9:
            for symbol, direction in self.insightsDict.items():
                if data.ContainsKey(symbol) and symbol in algorithm.ActiveSecurities.Keys and algorithm.ActiveSecurities[symbol].Price > 0:
                    insights.append(Insight.Price(symbol, Expiry.EndOfDay, direction))
                    
            self.day = algorithm.Time.day

        return insights
            
    def OnSecuritiesChanged(self, algorithm, changes):
        
        '''
        Description:
            Event fired each time the we add/remove securities from the data feed
        Args:
            algorithm: The algorithm instance that experienced the change in securities
            changes: The security additions and removals from the algorithm
        '''
            
        # check current securities in our self.securities list
        securitiesList = [x.Symbol.Value for x in self.securities]
        algorithm.Log('(Alpha module) securities in self.securities before OnSecuritiesChanged: ' + str(securitiesList))
            
        # add new securities
        addedSecurities = [x for x in changes.AddedSecurities if x not in self.securities]
        for added in addedSecurities:
            self.securities.append(added)
            
        newSecuritiesList = [x.Symbol.Value for x in addedSecurities]
        algorithm.Log('(Alpha module) new securities added to self.securities:'+ str(newSecuritiesList))

        # remove securities
        removedSecurities = [x for x in changes.RemovedSecurities if x in self.securities]
        for removed in removedSecurities:
            self.securities.remove(removed)
                
        removedList = [x.Symbol.Value for x in removedSecurities]
        algorithm.Log('(Alpha module) securities removed from self.securities: ' + str(removedList))
        
        # print the final securities in self.securities for today
        securitiesList = [x.Symbol.Value for x in self.securities]
        algorithm.Log('(Alpha module) final securities in self.securities after OnSecuritiesChanged: ' + str(securitiesList))
        
        # generate dictionary with factors -------------------------------------------------------
        fundamentalDataBySymbolDict = GetFundamentalDataDict(algorithm, self.securities, 'alpha')
                    
        # make calculations to create long/short lists -------------------------------------------
        currentSymbols = list(fundamentalDataBySymbolDict.keys())
        calculations = MakeCalculations(algorithm, currentSymbols, self.lookback, Resolution.Daily, fundamentalDataBySymbolDict)
        
        # get long/short lists
        longs, shorts = GetLongShortLists(self, algorithm, calculations, 'alpha')
        finalSymbols = longs + shorts
        
        
        
        # update the insightsDict dictionary with long/short signals
        self.insightsDict = {}
        for symbol in finalSymbols:
            if symbol in longs:
                direction = 1
            else:
                direction = -1
                
            self.insightsDict[symbol] = direction

from clr import AddReference
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect import Resolution, Extensions
from QuantConnect.Algorithm.Framework.Alphas import *
from QuantConnect.Algorithm.Framework.Portfolio import *
from itertools import groupby
from datetime import datetime, timedelta

class CustomEqualWeightingPortfolioConstructionModel(PortfolioConstructionModel):
    
    '''
    Description:
        Provide a custom implementation of IPortfolioConstructionModel that gives equal weighting to all active securities
    Details:
        - The target percent holdings of each security is 1/N where N is the number of securities with active Up/Down insights
        - For InsightDirection.Up, long targets are returned
        - For InsightDirection.Down, short targets are returned
        - For InsightDirection.Flat, closing position targets are returned
    '''

    def __init__(self, initialAllocationPerSecurity = 0.1, rebalancingFunc = Expiry.EndOfMonth):
        
        '''
        Description:
            Initialize a new instance of CustomEqualWeightingPortfolioConstructionModel
        Args:
            initialAllocationPerSecurity: Portfolio exposure per security (as a % of total equity)
        '''
        
        # portfolio exposure per security (as a % of total equity)
        self.initialAllocationPerSecurity = initialAllocationPerSecurity
        self.rebalancingFunc = rebalancingFunc
        
        self.insightCollection = InsightCollection()
        self.removedSymbols = []
        
        self.nextRebalance = None

    def CreateTargets(self, algorithm, insights):

        '''
        Description:
            Create portfolio targets from the specified insights
        Args:
            algorithm: The algorithm instance
            insights: The insights to create portfolio targets from
        Returns:
            An enumerable of portfolio targets to be sent to the execution model
        '''

        targets = []
            
        if len(insights) == 0:
            return targets
        
        # apply rebalancing logic
        if self.nextRebalance is not None and algorithm.Time < self.nextRebalance and len(self.removedSymbols) == 0:
            return targets
        self.nextRebalance = self.rebalancingFunc(algorithm.Time)
        
        # here we get the new insights and add them to our insight collection
        for insight in insights:
            self.insightCollection.Add(insight)
            
        # create flatten target for each security that was removed from the universe
        if len(self.removedSymbols) > 0:
            universeDeselectionTargets = [ PortfolioTarget(symbol, 0) for symbol in self.removedSymbols ]
            targets.extend(universeDeselectionTargets)
            algorithm.Log('(Portfolio module) liquidating: ' + str([x.Value for x in self.removedSymbols]))
            self.removedSymbols = []

        # get insight that have not expired of each symbol that is still in the universe
        activeInsights = self.insightCollection.GetActiveInsights(algorithm.UtcTime)

        # get the last generated active insight for each symbol
        lastActiveInsights = []
        for symbol, g in groupby(activeInsights, lambda x: x.Symbol):
            lastActiveInsights.append(sorted(g, key = lambda x: x.GeneratedTimeUtc)[-1])
        
        # determine target percent for the given insights
        for insight in lastActiveInsights:
            allocationPercent = self.initialAllocationPerSecurity * insight.Direction
            target = PortfolioTarget.Percent(algorithm, insight.Symbol, allocationPercent)
            targets.append(target)
            
        return targets
        
    def OnSecuritiesChanged(self, algorithm, changes):
        
        '''
        Description:
            Event fired each time the we add/remove securities from the data feed
        Args:
            algorithm: The algorithm instance that experienced the change in securities
            changes: The security additions and removals from the algorithm
        '''
        
        newRemovedSymbols = [x.Symbol for x in changes.RemovedSecurities if x.Symbol not in self.removedSymbols]
        
        # get removed symbol and invalidate them in the insight collection
        self.removedSymbols.extend(newRemovedSymbols)
        self.insightCollection.Clear(self.removedSymbols)
            
        removedList = [x.Value for x in self.removedSymbols]
        algorithm.Log('(Portfolio module) securities removed from Universe: ' + str(removedList))

import pandas as pd
from scipy.stats import zscore
from classSymbolData import SymbolData

def MakeCalculations(algorithm, symbols, lookback, resolution, fundamentalDataBySymbolDict):
    
    '''
    Description:
        Make required calculations using historical data for each symbol
    Args:
        symbols: The symbols to make calculations for
        lookback: Lookback period for historical data
        resolution: Resolution for historical data
        fundamentalDataBySymbolDict: Dictionary of symbols containing factors and the direction of the factor (for sorting)
    Return:
        calculations: Dictionary containing the calculations per symbol
    '''
    
    # store calculations
    calculations = {}

    if len(symbols) > 0:
        # get historical prices for new symbols
        history = GetHistory(algorithm, symbols,
                            lookbackPeriod = lookback,
                            resolution = resolution)
            
        for symbol in symbols:
            # if symbol has no historical data continue the loop
            if (symbol not in history.index
            or len(history.loc[symbol]['close']) < lookback
            or history.loc[symbol].get('close') is None
            or history.loc[symbol].get('close').isna().any()):
                algorithm.Log('no history found for: ' + str(symbol.Value))
                continue

            else:
                # add symbol to calculations
                calculations[symbol] = SymbolData(symbol)
                
                try:
                    calculations[symbol].CalculateFactors(history, fundamentalDataBySymbolDict)
                except Exception as e:
                    algorithm.Log('removing from calculations due to ' + str(e))
                    calculations.pop(symbol)
                    continue
                
    return calculations
    
def GetFundamentalDataDict(algorithm, securitiesData, module = 'universe'):
    
    ''' Create a dictionary of symbols and fundamental factors ready for sorting '''

    fundamentalDataBySymbolDict = {}
    
    # loop through data and get fundamental data
    for x in securitiesData:
        if module == 'alpha':
            if not x.Symbol in algorithm.ActiveSecurities.Keys:
                continue
            fundamental = algorithm.ActiveSecurities[x.Symbol].Fundamentals
        elif module == 'universe':
            fundamental = x
        else:
            raise ValueError('module argument must be either universe or alpha')
            
        # dictionary of symbols containing factors and the direction of the factor (1 for sorting descending and -1 for sorting ascending)
        fundamentalDataBySymbolDict[x.Symbol] = {
                                                    #fundamental.ValuationRatios.BookValuePerShare: 1,
                                                    #fundamental.FinancialStatements.BalanceSheet.TotalEquity.Value: -1,
                                                    #fundamental.OperationRatios.OperationMargin.Value: 1,
                                                    #fundamental.OperationRatios.ROE.Value: 1,
                                                    #fundamental.OperationRatios.TotalAssetsGrowth.Value: 1,
                                                    #fundamental.ValuationRatios.NormalizedPERatio: 1,
                                                    #fundamental.ValuationRatios.PBRatio: -1,
                                                    #fundamental.OperationRatios.TotalDebtEquityRatio.Value: -1,
                                                    #fundamental.ValuationRatios.FCFRatio: -1,
                                                    #fundamental.ValuationRatios.PEGRatio: -1,
                                                    #fundamental.MarketCap: 1,
                                                }
                                                    
        # check validity of data
        if None in list(fundamentalDataBySymbolDict[x.Symbol].keys()):
            fundamentalDataBySymbolDict.pop(x.Symbol)
                                                    
    return fundamentalDataBySymbolDict
    
def GetLongShortLists(self, algorithm, calculations, module = 'universe'):
    
    ''' Create lists of long/short stocks '''
            
    # get factors
    factorsDict = { symbol: symbolData.factorsList for symbol, symbolData in calculations.items() if symbolData.factorsList is not None }
    factorsDf = pd.DataFrame.from_dict(factorsDict, orient = 'index')
    
    # normalize factor
    normFactorsDf = factorsDf.apply(zscore)
    normFactorsDf.columns = ['Factor_' + str(x + 1) for x in normFactorsDf.columns]
    
    # combine factors using equal weighting
    #normFactorsDf['combinedFactor'] = normFactorsDf.sum(axis = 1)
    normFactorsDf['combinedFactor'] = normFactorsDf['Factor_1'] * 1 + normFactorsDf['Factor_2'] * 1
        
    # sort descending
    sortedNormFactorsDf = normFactorsDf.sort_values(by = 'combinedFactor', ascending = False) # descending
    
    # create long/short lists
    positionsEachSide = int(self.maxNumberOfPositions / 2)
    longs = list(sortedNormFactorsDf[:positionsEachSide].index)
    shorts = list(sortedNormFactorsDf[-positionsEachSide:].index)
    
    #algorithm.Debug({'longs': {x.Value: factorsDict[x] for x in longs}})
    algorithm.Debug({'shorts': {x.Value: factorsDict[x] for x in shorts}})
    
    if module == 'alpha' and algorithm.LiveMode:
        algorithm.Log({'longs': {x.Value: factorsDict[x] for x in longs}, 'shorts': {x.Value: factorsDict[x] for x in shorts}})
    
    return longs, shorts

def GetHistory(algorithm, symbols, lookbackPeriod, resolution):
    
    ''' Pull historical data in batches '''
    
    total = len(symbols)
    batchsize = 50
    
    if total <= batchsize:
        history = algorithm.History(symbols, lookbackPeriod, resolution)
    else:
        history = algorithm.History(symbols[0:batchsize], lookbackPeriod, resolution)
        for i in range(batchsize, total + 1, batchsize):
            batch = symbols[i:(i + batchsize)]
            historyTemp = algorithm.History(batch, lookbackPeriod, resolution)
            history = pd.concat([history, historyTemp])
            
    return history
    
def UpdateBenchmarkValue(self, algorithm):
        
    ''' Simulate buy and hold the Benchmark '''
    
    if self.initBenchmarkPrice == 0:
        self.initBenchmarkCash = algorithm.Portfolio.Cash
        self.initBenchmarkPrice = algorithm.Benchmark.Evaluate(algorithm.Time)
        self.benchmarkValue = self.initBenchmarkCash
    else:
        currentBenchmarkPrice = algorithm.Benchmark.Evaluate(algorithm.Time)
        self.benchmarkValue = (currentBenchmarkPrice / self.initBenchmarkPrice) * self.initBenchmarkCash
        
def UpdatePlots(self, algorithm):
    
    ''' Update Portfolio Exposure and Drawdown plots '''
    
    # simulate buy and hold the benchmark and plot its daily value --------------
    UpdateBenchmarkValue(self, algorithm)
    algorithm.Plot('Strategy Equity', self.benchmark, self.benchmarkValue)

    # get current portfolio value
    currentTotalPortfolioValue = algorithm.Portfolio.TotalPortfolioValue
    
    # plot the daily total portfolio exposure % --------------------------------
    longHoldings = sum([x.HoldingsValue for x in algorithm.Portfolio.Values if x.IsLong])
    shortHoldings = sum([x.HoldingsValue for x in algorithm.Portfolio.Values if x.IsShort])
    totalHoldings = longHoldings + shortHoldings
    totalPortfolioExposure = (totalHoldings / currentTotalPortfolioValue) * 100
    algorithm.Plot('Chart Total Portfolio Exposure %', 'Daily Portfolio Exposure %', totalPortfolioExposure)
    
    # plot the daily number of longs and shorts --------------------------------
    nLongs = sum(x.IsLong for x in algorithm.Portfolio.Values)
    nShorts = sum(x.IsShort for x in algorithm.Portfolio.Values)
    algorithm.Plot('Chart Number Of Longs/Shorts', 'Daily N Longs', nLongs)
    algorithm.Plot('Chart Number Of Longs/Shorts', 'Daily N Shorts', nShorts)
    
    # plot the drawdown % from the most recent high ---------------------------
    if not self.portfolioValueHighInitialized:
        self.portfolioHigh = currentTotalPortfolioValue # set initial portfolio value
        self.portfolioValueHighInitialized = True
        
    # update trailing high value of the portfolio
    if self.portfolioValueHigh < currentTotalPortfolioValue:
        self.portfolioValueHigh = currentTotalPortfolioValue

    currentDrawdownPercent = ((float(currentTotalPortfolioValue) / float(self.portfolioValueHigh)) - 1.0) * 100
    algorithm.Plot('Chart Drawdown %', 'Drawdown %', currentDrawdownPercent)

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import statsmodels.api as sm
import pandas as pd
import numpy as np

import seaborn as sns
sns.set_style('darkgrid')
pd.plotting.register_matplotlib_converters()

from statsmodels.regression.rolling import RollingOLS
from io import StringIO

class RiskAnalysis:
    
    def __init__(self, qb):
        
        # get Fama-French and industry factors
        industryFactorsUrl = 'https://www.dropbox.com/s/24bjtztzglo3eyf/12_Industry_Portfolios_Daily.CSV?dl=1'
        ffFiveFactorsUrl = 'https://www.dropbox.com/s/88m1nohi597et20/F-F_Research_Data_5_Factors_2x3_daily.CSV?dl=1'
        self.industryFactorsDf = self.GetExternalFactorsDf(qb, industryFactorsUrl)
        self.ffFiveFactorsDf = self.GetExternalFactorsDf(qb, ffFiveFactorsUrl)
        
    def GetExternalFactorsDf(self, qb, url):
        
        '''
        Description:
            Download a DataFrame with data from external sources
        Args:
            qb: QuantBook
            url: URL for the data source
        Returns:
            SingleIndex Dataframe
        '''
    
        strFile = qb.Download(url)
        df = pd.read_csv(StringIO(strFile), sep = ',')
        df['Date'] = pd.to_datetime(df['Date'], format = '%Y%m%d')
        df.set_index('Date', inplace = True)
        df = df.div(100)
        df.drop('RF', axis = 1, errors = 'ignore', inplace = True)

        return df
    
    def GetCombinedReturnsDf(self, returnsDf, externalFactorsDf = None):
        
        '''
        Description:
            Merge two DataFrames
        Args:
            returnsDf: SingleIndex Dataframe with returns from our strategy
            externalFactorsDf: SingleIndex Dataframe with returns from external factors
        Returns:
            SingleIndex Dataframe with returns
        '''
        
        # if no externalFactorsDf is provided, use the default Fama-French Five Factors
        if externalFactorsDf is None:
            externalFactorsDf = self.ffFiveFactorsDf
        
        # merge returnsDf with externalFactorsDf
        combinedReturnsDf = pd.merge(returnsDf, externalFactorsDf, left_index = True, right_index = True)
        
        return combinedReturnsDf
    
    def GetCumulativeReturnsDf(self, returnsDf):
        
        '''
        Description:
            Convert a DataFrame of returns into a DataFrame of cumulative returns
        Args:
            returnsDf: SingleIndex Dataframe with returns
        Returns:
            SingleIndex Dataframe with cumulative returns
        '''
        
        cumulativeReturnsDf = returnsDf.add(1).cumprod().add(-1)
        
        return cumulativeReturnsDf
        
    def RunRegression(self, returnsDf, dependentColumn = 'Strategy'):
        
        '''
        Description:
            Run Regression using the dependentColumn against the rest of the columns
        Args:
            returnsDf: SingleIndex Dataframe with returns
            dependentColumn: Name for the column to be used as dependent variable
        Returns:
            Summary of the model
        '''
        
        # create variables
        Y = returnsDf[[dependentColumn]]
        X = returnsDf[[x for x in returnsDf.columns if x != dependentColumn]]
        # adding a constant
        X = sm.add_constant(X)

        # fit regression model
        model = sm.OLS(Y, X).fit()
        
        # show summary from the model
        print(model.summary())
        
        return model
        
    def RunRollingRegression(self, returnsDf, dependentColumn = 'Strategy', lookback = 126):
        
        '''
        Description:
            Run Rolling Regression using the dependentColumn against the rest of the columns
        Args:
            returnsDf: SingleIndex Dataframe with returns
            dependentColumn: Name for the column to be used as dependent variable
            lookback: Number of observations for the lookback window
        Returns:
            Rolling Regression Model
        '''
        
        endog = returnsDf[[dependentColumn]]
        exogVariables = [x for x in returnsDf.columns if x != dependentColumn]
        exog = sm.add_constant(returnsDf[exogVariables])
        rollingModel = RollingOLS(endog, exog, window = lookback).fit()
        
        return rollingModel

    # ploting functions -----------------------------------------------------------------------------------------
        
    def PlotCumulativeReturns(self, returnsDf):

        '''
        Description:
            Plot cumulative returns
        Args:
            returnsDf: SingleIndex Dataframe with returns
        Returns:
            Plot cumulative returns
        '''
        
        # calculate cumulative returns
        cumulativeReturnsDf = self.GetCumulativeReturnsDf(returnsDf)
        # take logarithm for better visualization
        cumulativeReturnsDf = np.log(1 + cumulativeReturnsDf)
        
        # prepare plot
        fig, ax = plt.subplots(figsize = (12, 5))
        
        # plot portfolio
        colPortfolio = cumulativeReturnsDf.iloc[:, [0]].columns[0]
        ax.plot(cumulativeReturnsDf[colPortfolio], color = 'black', linewidth = 2)
            
        if len(cumulativeReturnsDf.columns) > 1:
            colFactors = cumulativeReturnsDf.iloc[:, 1:].columns
            # plot factors
            ax.plot(cumulativeReturnsDf[colFactors], alpha = 0.5)
            
        # formatting
        ax.axhline(y = 0, color = 'black', linestyle = '--', linewidth = 0.5)
        ax.set_title('Cumulative Log-Returns', fontdict = {'fontsize': 15})
        ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
        ax.legend(cumulativeReturnsDf.columns, loc = 'best')
        
        plt.show()
        
    def PlotRegressionModel(self, returnsDf, dependentColumn = 'Strategy'):
        
        '''
        Description:
            Run Regression and plot partial regression
        Args:
            returnsDf: SingleIndex Dataframe with returns
            dependentColumn: Name for the column to be used as dependent variable
        Returns:
            Summary of the regression model and partial regression plots
        '''
        
        # run regression
        model = self.RunRegression(returnsDf, dependentColumn)

        # plot partial regression
        exogVariables = [x for x in returnsDf.columns if x != dependentColumn]
        figsize = (10, len(exogVariables) * 2)
        fig = plt.figure(figsize = figsize)
        fig = sm.graphics.plot_partregress_grid(model, fig = fig)
        
        plt.show()
        
    def PlotRollingRegressionCoefficients(self, returnsDf, dependentColumn = 'Strategy', lookback = 126):
        
        '''
        Description:
            Run Rolling Regression and plot the time series of estimated coefficients for each predictor
        Args:
            returnsDf: SingleIndex Dataframe with returns
            dependentColumn: Name for the column to be used as dependent variable
            lookback: Number of observations for the lookback window
        Returns:
            Plot of time series of estimated coefficients for each predictor
        '''
        
        # run rolling regression
        rollingModel = self.RunRollingRegression(returnsDf, dependentColumn, lookback)
        exogVariables = [x for x in returnsDf.columns if x != dependentColumn]
        
        # plot
        figsize = (10, len(exogVariables) * 3)
        fig = rollingModel.plot_recursive_coefficient(variables = exogVariables, figsize = figsize)
        
        plt.show()
        
    def PlotBoxPlotRollingFactorExposure(self, returnsDf, dependentColumn = 'Strategy', lookback = 126):
        
        '''
        Description:
            Run Rolling Regression and make a box plot with the distributions of the estimated coefficients
        Args:
            returnsDf: SingleIndex Dataframe with returns
            dependentColumn: Name for the column to be used as dependent variable
            lookback: Number of observations for the lookback window
        Returns:
            Box plot with distributions of estimated coefficients during the rolling regression
        '''
        
        # run rolling regression
        rollingModel = self.RunRollingRegression(returnsDf, dependentColumn, lookback)
        
        fig, ax = plt.subplots(figsize = (10, 8))
        ax = sns.boxplot(data = rollingModel.params.dropna().drop('const', axis = 1), 
                        width = 0.5,
                        palette = "colorblind",
                        orient = 'h')
        ax.axvline(x = 0, color = 'black', linestyle = '--', linewidth = 0.5)
        ax.set_title('Distribution of Risk Factor Rolling Exposures', fontdict = {'fontsize': 15})
        
        plt.show()
    
    # run full risk analysis --------------------------------------------------------------------------------------
    
    def RunRiskAnalysis(self, returnsDf, externalFactorsDf = None, dependentColumn = 'Strategy', lookback = 126):
        
        # if no externalFactorsDf is provided, use the default Fama-French Five Factors
        if externalFactorsDf is None:
            externalFactorsDf = self.ffFiveFactorsDf
        
        # merge returnsDf with externalFactorsDf
        combinedReturnsDf = pd.merge(returnsDf, externalFactorsDf, left_index = True, right_index = True)
        
        # plot
        self.PlotCumulativeReturns(combinedReturnsDf)
        print('---------------------------------------------------------------------------------------------')
        print('---- Regression Analysis --------------------------------------------------------------------')
        print('---------------------------------------------------------------------------------------------')
        self.PlotRegressionModel(combinedReturnsDf, dependentColumn)
        print('---------------------------------------------------------------------------------------------')
        print('---- Rolling Regression Analysis (Rolling Coefficients) -------------------------------------')
        print('---------------------------------------------------------------------------------------------')
        self.PlotRollingRegressionCoefficients(combinedReturnsDf, dependentColumn, lookback)
        self.PlotBoxPlotRollingFactorExposure(combinedReturnsDf, dependentColumn, lookback)

from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Indicators")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect.Data.UniverseSelection import *
from Selection.FundamentalUniverseSelectionModel import FundamentalUniverseSelectionModel

from HelperFunctions import GetFundamentalDataDict, MakeCalculations, GetLongShortLists, UpdatePlots
import pandas as pd
import numpy as np

class FactorModelUniverseSelectionModel(FundamentalUniverseSelectionModel):

    def __init__(self,
                benchmark = 'SPY',
                nStocks = 500,
                lookback = 252,
                maxNumberOfPositions = 20,
                rebalancingFunc = Expiry.EndOfMonth,
                filterFineData = True,
                universeSettings = None,
                securityInitializer = None):
        
        self.benchmark = benchmark
        
        self.nStocks = nStocks
        self.lookback = lookback
        self.maxNumberOfPositions = maxNumberOfPositions

        self.rebalancingFunc = rebalancingFunc
        self.nextRebalance = None
        
        self.initBenchmarkPrice = 0
        self.portfolioValueHigh = 0 # initialize portfolioValueHigh for drawdown calculation
        self.portfolioValueHighInitialized = False # initialize portfolioValueHighInitialized for drawdown calculation
        
        super().__init__(filterFineData, universeSettings, securityInitializer)

    def SelectCoarse(self, algorithm, coarse):
        
        ''' Perform Universe selection based on price and volume '''
        
        # update plots -----------------------------------------------------------------------------------------------
        UpdatePlots(self, algorithm)
        
        # rebalancing logic -------------------------------------------------------------------------------------------
        if self.nextRebalance is not None and algorithm.Time < self.nextRebalance:
            return Universe.Unchanged
        self.nextRebalance = self.rebalancingFunc(algorithm.Time)
        
        # get new coarse candidates -----------------------------------------------------------------------------------

        # filtered by price and select the top dollar volume stocks
        filteredCoarse = [x for x in coarse if x.HasFundamentalData]
        sortedDollarVolume = sorted(filteredCoarse, key = lambda x: x.DollarVolume, reverse = True)
        coarseSymbols = [x.Symbol for x in sortedDollarVolume][:(self.nStocks * 2)]
        
        return coarseSymbols
        
    def SelectFine(self, algorithm, fine):
        
        ''' Select securities based on fundamental factor modelling '''
        
        sortedMarketCap = sorted(fine, key = lambda x: x.MarketCap, reverse = True)[:self.nStocks]

        # generate dictionary with factors -----------------------------------------------------------------------------
        fundamentalDataBySymbolDict = GetFundamentalDataDict(algorithm, sortedMarketCap, 'universe')
                    
        # make calculations to create long/short lists -----------------------------------------------------------------
        fineSymbols = list(fundamentalDataBySymbolDict.keys())
        calculations = MakeCalculations(algorithm, fineSymbols, self.lookback, Resolution.Daily, fundamentalDataBySymbolDict)
        
        # get long/short lists of symbols
        longs, shorts = GetLongShortLists(self, algorithm, calculations, 'universe')
        finalSymbols = longs + shorts

        return finalSymbols

import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis

class SymbolData:
    
    ''' Perform calculations '''
    
    def __init__(self, symbol):
        self.Symbol = symbol
        
        self.fundamentalDataDict = {}
        
        self.momentum = None
        self.volatility = None
        self.skewness = None
        self.kurt = None
        self.positionVsHL = None
        self.meanOvernightReturns = None
    
    def CalculateFactors(self, history, fundamentalDataBySymbolDict):
        
        self.fundamentalDataDict = fundamentalDataBySymbolDict[self.Symbol]
        self.momentum = self.CalculateMomentum(history)
        self.volatility = self.CalculateVolatility(history)
        #self.skewness = self.CalculateSkewness(history)
        #self.kurt = self.CalculateKurtosis(history)
        #self.distanceVsHL = self.CalculateDistanceVsHL(history)
        #self.meanOvernightReturns = self.CalculateMeanOvernightReturns(history)
    
    def CalculateMomentum(self, history):
        
        closePrices = history.loc[self.Symbol]['close']
        momentum = (closePrices[-1] / closePrices[-252]) - 1
        
        return momentum
        
    def CalculateVolatility(self, history):
        
        closePrices = history.loc[self.Symbol]['close']
        returns = closePrices.pct_change().dropna()
        volatility = np.nanstd(returns, axis = 0)
        
        return volatility
        
    def CalculateSkewness(self, history):
        
        closePrices = history.loc[self.Symbol]['close']
        returns = closePrices.pct_change().dropna()
        skewness = skew(returns)
        
        return skewness
        
    def CalculateKurtosis(self, history):
        
        closePrices = history.loc[self.Symbol]['close']
        returns = closePrices.pct_change().dropna()
        kurt = kurtosis(returns)
        
        return kurt
        
    def CalculateDistanceVsHL(self, history):
        
        closePrices = history.loc[self.Symbol]['close']
        annualHigh = max(closePrices)
        annualLow = min(closePrices)
        distanceVsHL = (closePrices[-1] - annualLow) / (annualHigh - annualLow)
        
        return distanceVsHL
        
    def CalculateMeanOvernightReturns(self, history):
        
        overnnightReturns = (history.loc[self.Symbol]['open'] / history.loc[self.Symbol]['close'].shift(1)) - 1
        meanOvernightReturns = np.nanmean(overnnightReturns, axis = 0)
        return meanOvernightReturns
            
    @property
    def factorsList(self):
        technicalFactors = [self.momentum, self.volatility]
        fundamentalFactors = [float(key) * value for key, value in self.fundamentalDataDict.items()]
        
        if all(v is not None for v in technicalFactors):
            return technicalFactors + fundamentalFactors
        else:
            return None

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import scipy.stats as stats
import pandas as pd
import numpy as np

import seaborn as sns
sns.set_style('darkgrid')
pd.plotting.register_matplotlib_converters()

from datetime import timedelta

class FactorAnalysis:
    
    def __init__(self, qb, tickers, startDate, endDate, resolution):
        
        # add symbols
        symbols = [qb.AddEquity(ticker, resolution).Symbol for ticker in tickers]
        
        # get historical data at initialization ----------------------------------------------------------
        ohlcvDf = qb.History(symbols, startDate, endDate, resolution)
        # when using daily resolution, QuantConnect uses the date at midnight after the trading day
        # hence skipping Mondays and showing Saturdays. We avoid this by subtracting one day from the index
        ohlcvDf.index = ohlcvDf.index.set_levels(ohlcvDf.index.levels[1] - timedelta(1), level = 'time')
        
        self.ohlcvDf = ohlcvDf.dropna()
        
    def GetFactorsDf(self, fct = None):

        '''
        Description:
            Apply a function to a MultiIndex Dataframe of historical data
            Group on symbol first to get a ohlcv series per symbol, and apply a custom function to it
            in order to get a factor value per symbol and day
        Args:
            fct: Function to calculate the custom factor
        Returns:
            MultiIndex Dataframe (symbol/time indexes) with the factor values
        '''
        
        if fct is None:
            raise ValueError('fct arguments needs to be provided to calculate factors')
        
        # group by symbol to get a timeseries of historical data per symbol and apply CustomFactor function
        factorsDf = self.ohlcvDf.groupby('symbol', group_keys = False).apply(lambda x: fct(x)).dropna()
        factorsDf.columns = ['Factor_' + str(i + 1) for i in range(len(factorsDf.columns))]
        # sort indexes
        factorsDf = factorsDf.sort_index(level = ['symbol', 'time'])

        return factorsDf
    
    def GetStandardizedFactorsDf(self, factorsDf):
        
        '''
        Description:
            Winsorize and standardize factors
        Args:
            factorsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values
        Returns:
            MultiIndex Dataframe (symbol/time indexes) with standardized factor values
        '''
        
        # winsorization
        winsorizedFactorsDf = factorsDf.apply(stats.mstats.winsorize, limits = [0.025, 0.025])
        # zscore standardization
        standardizedFactorsDf = winsorizedFactorsDf.apply(stats.zscore)
        
        return standardizedFactorsDf
    
    def GetCombinedFactorsDf(self, factorsDf, combinedFactorWeightsDict = None):
        
        '''
        Description:
            Create a combined factor as a linear combination of individual factors
        Args:
            factorsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values
            combinedFactorWeightsDict: Dictionary with factor names and weights to calculate a combined factor
        Returns:
            MultiIndex Dataframe (symbol/time indexes) with the individual factors and the combined factor
        '''
        
        # make a deep copy of the DataFrame
        combinedFactorsDf = factorsDf.copy(deep = True)
        
        # calculate a combined factor
        if combinedFactorWeightsDict is None:
            return combinedFactorsDf
        elif not combinedFactorWeightsDict:
            combinedFactorsDf['Combined_Factor'] = combinedFactorsDf.sum(axis = 1)
        else:
            combinedFactorsDf['Combined_Factor'] = sum(combinedFactorsDf[key] * value
                                                       for key, value in combinedFactorWeightsDict.items())
        
        return combinedFactorsDf
    
    def GetFinalFactorsDf(self, fct = None, combinedFactorWeightsDict = None, standardize = True):

        '''
        Description:
            - Apply a function to a MultiIndex Dataframe of historical data
              Group on symbol first to get a ohlcv series per symbol, and apply a custom function to it
                  in order to get a factor value per symbol and day
            - If required, standardize the factors and remove potential outliers
            - If required, add a combined factor as a linear combination of individual factors
        Args:
            fct: Function to calculate the custom factor
            standardize: Boolean to standardize data
            combinedFactorWeightsDict: Dictionary with factor names and weights to calculate a combined factor
        Returns:
            MultiIndex Dataframe (symbol/time indexes) with the factor values
        '''
        
        # get factorsDf
        factorsDf = self.GetFactorsDf(fct)
        
        # standardize
        if standardize:
            factorsDf = self.GetStandardizedFactorsDf(factorsDf)
        
        # add combined factor
        if combinedFactorWeightsDict is not None:
            factorsDf = self.GetCombinedFactorsDf(factorsDf, combinedFactorWeightsDict)

        return factorsDf
    
    def GetClosePricesDf(self):
    
        '''
        Description:
            Get a MultiIndex Dataframe of close prices
        Returns:
            MultiIndex Dataframe (symbol/time indexes) with close prices
        '''
        
        # select only close prices and turn into a dataframe
        closePricesDf = self.ohlcvDf['close'].to_frame()
        closePricesDf.columns = ['price']
        # forward fill nas and after that drop rows with some nas left
        closePricesDf = closePricesDf.sort_index(level = ['symbol', 'time'])
        closePricesDf = closePricesDf.groupby('symbol').fillna(method = 'ffill').dropna()

        return closePricesDf
    
    def GetFactorsPricesDf(self, factorsDf):
    
        '''
        Description:
            Get a MultiIndex Dataframe (symbol/time indexes) with all the factors and close prices
        Args:
            factorsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values
        Returns:
            MultiIndex Dataframe (symbol/time indexes) with all the factors and chosen prices
        '''

        # get the closePricesDf
        closePricesDf = self.GetClosePricesDf()
        
        # merge factorsDf and closePricesDf and fill forward nans by symbol
        factorsPricesDf = pd.merge(factorsDf, closePricesDf, how = 'right', left_index = True, right_index = True)
        factorsPricesDf = factorsPricesDf.sort_index(level = ['symbol', 'time'])
        factorsPricesDf = factorsPricesDf.groupby('symbol').fillna(method = 'ffill').dropna()

        return factorsPricesDf
    
    def GetFactorsForwardReturnsDf(self, factorsPricesDf, forwardPeriods = [1, 5, 21]):
    
        '''
        Description:
            Generate a MultiIndex Dataframe (symbol/time indexes) with all previous info plus forward returns
        Args:
            factorsPricesDf:  MultiIndex Dataframe (symbol/time indexes) with all the factors and close prices
            forwardPeriods: List of integers defining the different periods for forward returns
        Returns:
            MultiIndex Dataframe (symbol/time indexes) with the factor values and forward returns
        '''

        # make sure 1 day forward returns are calculated even if not provided by user
        if 1 not in forwardPeriods:
            forwardPeriods.append(1)
        
        # calculate forward returns per period
        for period in forwardPeriods:
            factorsPricesDf[str(period) + 'D'] = (factorsPricesDf.groupby('symbol', group_keys = False)
                                                    .apply(lambda x: x['price'].pct_change(period).shift(-period)))
        
        # drop column price
        factorsForwardReturnsDf = factorsPricesDf.dropna().drop('price', axis = 1)

        return factorsForwardReturnsDf
    
    def GetFactorQuantilesForwardReturnsDf(self, factorsDf,
                                            forwardPeriods = [1, 5, 21],
                                            factor = 'Factor_1', q = 5):
        
        '''
        Description:
            Create a MultiIndex Dataframe (symbol/time indexes) with the factor values,
            forward returns and the quantile groups
        Args:
            factorsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values
            forwardPeriods: List of integers defining the different periods for forward returns
            factor: Chosen factor to create quantiles for
            q: Number of quantile groups
        Returns:
            MultiIndex Dataframe (symbol/time indexes) with the factor values, forward returns and the quantile groups
        '''
        
        # get factorsForwardReturnsDf
        factorsPricesDf = self.GetFactorsPricesDf(factorsDf)
        factorsForwardReturnsDf = self.GetFactorsForwardReturnsDf(factorsPricesDf, forwardPeriods)
        
        # reorder index levels to have time and then symbols so we can then create quantiles per day
        factorsForwardReturnsDf = factorsForwardReturnsDf.reorder_levels(['time', 'symbol'])
        factorsForwardReturnsDf = factorsForwardReturnsDf.sort_index(level = ['time', 'symbol'])
        
        # calculate quintiles given the chosen factor and rename columns
        factorsForwardReturnsDf['Quantile'] = factorsForwardReturnsDf[factor].groupby('time').apply(lambda x: pd.qcut(x, q, labels = False, duplicates = 'drop')).add(1)
        factorsForwardReturnsDf['Quantile'] = 'Group_' + factorsForwardReturnsDf['Quantile'].astype(str)
        
        # remove the other factor columns
        factorCols = [x for x in factorsForwardReturnsDf.columns if 'Factor' not in x or x == factor]
        factorQuantilesForwardReturnsDf = factorsForwardReturnsDf[factorCols]
        
        return factorQuantilesForwardReturnsDf
    
    def GetReturnsByQuantileDf(self, factorQuantilesForwardReturnsDf, forwardPeriod = 1, weighting = 'mean'):
    
        '''
        Description:
            Generate a SingleIndex Dataframe with period forward returns by quantile and time
        Args:
            factorQuantilesForwardReturnsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values,
                                             forward returns and the quantile groups
            forwardPeriod: The period of forward returns
            weighting: The weighting to apply to the returns in each quantile after grouping:
                        - mean: Take the average of all the stock returns within each quantile
                        - factor: Take a factor-weighted return within each quantile
        Returns:
            SingleIndex Dataframe with period forward returns by quantile and time
        '''

        # we drop the symbols and convert to a MultiIndex Dataframe with Quantile and time as indexes and forward returns
        df = factorQuantilesForwardReturnsDf.droplevel(['symbol'])
        df.set_index('Quantile', append = True, inplace = True)
        df = df.reorder_levels(['Quantile', 'time'])
        df = df.sort_index(level = ['Quantile', 'time'])
        
        # get the column name for the factor and period
        factorCol = [x for x in df.columns if 'Factor' in x][0]
        periodCol = [str(forwardPeriod) + 'D'][0]
        
        if weighting == 'mean':
            df = df[[periodCol]]
            # group by Quantile and time and get the mean returns (equal weight across all stocks within each quantiles)
            returnsByQuantileDf = df.groupby(['Quantile', 'time']).mean()
        elif weighting == 'factor':
            relevantCols = [factorCol, periodCol]
            df = df[relevantCols]
            # group by Quantile and time and create a column with weights based on factor values
            df['Factor_Weights'] = (df.groupby(['Quantile', 'time'], group_keys = False)
                                    .apply(lambda x: x[factorCol].abs() / x[factorCol].abs().sum()))
            # group by Quantile and time and calculate the factor weighted average returns
            returnsByQuantileDf = (df.groupby(['Quantile', 'time'], group_keys = False)
                                    .apply(lambda x: (x['Factor_Weights'] * x[periodCol]).sum())).to_frame()

        # unstack to convert to SingleIndex Dataframe
        returnsByQuantileDf = returnsByQuantileDf.unstack(0).fillna(0)
        returnsByQuantileDf.columns = returnsByQuantileDf.columns.droplevel(0)
        returnsByQuantileDf.columns.name = None
        
        # finally keep every nth row to match with the forward period returns
        returnsByQuantileDf = returnsByQuantileDf.iloc[::forwardPeriod, :]

        return returnsByQuantileDf
    
    def GetMeanReturnsByQuantileDf(self, factorQuantilesForwardReturnsDf):
            
        '''
        Description:
            Generate a SingleIndex Dataframe with mean returns by quantile and time
        Args:
            factorQuantilesForwardReturnsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values,
                                             forward returns and the quantile groups
        Returns:
            SingleIndex Dataframe with mean returns by quantile and time
        '''
    
        # remove factor columns, group by quantile and take the average return
        factorCol = [x for x in factorQuantilesForwardReturnsDf.columns if 'Factor' in x]
        quantileMeanReturn = factorQuantilesForwardReturnsDf.drop(factorCol, axis = 1).groupby('Quantile').mean()
        
        return quantileMeanReturn
    
    def GetPortfolioLongShortReturnsDf(self, returnsByQuantileDf, portfolioWeightsDict = None):
        
        '''
        Description:
            Generate a SingleIndex Dataframe with the returns of a Long-Short portfolio
        Args:
            returnsByQuantileDf: SingleIndex Dataframe with period forward returns by quantile and time
            portfolioWeightsDict: Dictionary with quantiles and weights to create a portfolio of returns
        Returns:
            SingleIndex Dataframe with the returns of Long-Short portfolio
        '''
        
        # if no portfolioWeightsDict are provided, create a default one
        # going 100% long top quintile and 100% short bottom quintile
        if portfolioWeightsDict is None:
            quantileGroups = sorted(list(returnsByQuantileDf.columns))
            topQuantile = quantileGroups[-1]
            bottomQuantile = quantileGroups[0]
            portfolioWeightsDict = {topQuantile: 1, bottomQuantile: -1}
        
        # we calculate the weighted average portfolio returns based on given weights for each quintile
        col = list(portfolioWeightsDict.keys())
        portfolioLongShortReturnsDf = returnsByQuantileDf.loc[: , col]
        portfolioLongShortReturnsDf[col[0]] = portfolioLongShortReturnsDf[col[0]] * portfolioWeightsDict[col[0]]
        portfolioLongShortReturnsDf[col[1]] = portfolioLongShortReturnsDf[col[1]] * portfolioWeightsDict[col[1]]
        portfolioLongShortReturnsDf['Strategy'] = portfolioLongShortReturnsDf.sum(axis = 1)
        portfolioLongShortReturnsDf = portfolioLongShortReturnsDf[['Strategy']]

        return portfolioLongShortReturnsDf
    
    def GetCumulativeReturnsDf(self, returnsDf):
        
        '''
        Description:
            Convert a DataFrame of returns into a DataFrame of cumulative returns
        Args:
            returnsDf: SingleIndex Dataframe with returns
        Returns:
            SingleIndex Dataframe with cumulative returns
        '''
        
        cumulativeReturnsDf = returnsDf.add(1).cumprod().add(-1)

        return cumulativeReturnsDf

    # ploting functions -----------------------------------------------------------------------------------------
    
    def PlotFactorsCorrMatrix(self, factorsDf):
        
        '''
        Description:
            Plot the factors correlation matrix
        Args:
            factorsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values
        Returns:
            Plot the factors correlation matrix
        '''

        corrMatrix = round(factorsDf.corr(), 2)
        
        nCol = len(list(factorsDf.columns))
        plt.subplots(figsize = (nCol, nCol))
        sns.heatmap(corrMatrix, annot = True)
        
        plt.show()
        
    def PlotHistograms(self, factorsDf):
        
        '''
        Description:
            Plot the histogram for each factor
        Args:
            factorsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values
        Returns:
            Plot the histogram for each factor
        '''
        
        nCol = len(list(factorsDf.columns))
        factorsDf.hist(figsize = (nCol * 3, nCol * 2), bins = 50)
        
        plt.show()
        
    def PlotBoxPlotQuantilesCount(self, factorQuantilesForwardReturnsDf):
                
        '''
        Description:
            Plot a box plot with the distributions of number of stocks in each quintile.
            The objective is to make sure each quintile has an almost equal number of stocks most of the time
        Args:
            factorQuantilesForwardReturnsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values,
                                             forward returns and the quantile groups
        Returns:
            Plot a box plot with the distributions of number of stocks in each quintile
        '''
        
        factorCol = [x for x in factorQuantilesForwardReturnsDf.columns if 'Factor' in x]
        df = factorQuantilesForwardReturnsDf.groupby(['Quantile', 'time'])[factorCol].count()
        df = df.unstack(0)
        df.columns = df.columns.droplevel(0)
        df.name = None
        
        ax = sns.boxplot(data = df, width = 0.5, palette = "colorblind", orient = 'h')
        ax.set_title('Distribution Of Number Of Assets Within Quintiles')
        
        plt.show()
    
    def PlotMeanReturnsByQuantile(self, factorQuantilesForwardReturnsDf):
        
        '''
        Description:
            Plot the mean return for each quantile group and forward return period
        Args:
            factorQuantilesForwardReturnsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values,
                                             forward returns and the quantile groups
        Returns:
            Plot with the mean return for each quantile group and forward return period
        '''
        
        meanReturnsByQuantileDf = self.GetMeanReturnsByQuantileDf(factorQuantilesForwardReturnsDf)
        # plot
        ax = meanReturnsByQuantileDf.plot(kind = 'bar', figsize = (12, 5))
        ax.set_title('Mean Returns By Quantile Group And Forward Period Return', fontdict = {'fontsize': 15})
        ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
        
        plt.show()    
        
    def PlotCumulativeReturnsByQuantile(self, factorQuantilesForwardReturnsDf,
                                        forwardPeriod = 1, weighting = 'mean'):
        
        '''
        Description:
            Plot cumulative returns per quantile group
        Args:
            factorQuantilesForwardReturnsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values,
                                             forward returns and the quantile groups
            forwardPeriod: The period of forward returns
            weighting: The weighting to apply to the returns in each quantile after grouping:
                        - mean: Take the average of all the stock returns within each quantile
                        - factor: Take a factor-weighted return within each quantile
        Returns:
            Plot with the cumulative returns per quantile group
        '''
        
        # get returns by quantile
        returnsByQuantileDf = self.GetReturnsByQuantileDf(factorQuantilesForwardReturnsDf, forwardPeriod, weighting)
        cumulativeReturnsByQuantileDf = self.GetCumulativeReturnsDf(returnsByQuantileDf)
        
        # take logarithm for better visualization
        cumulativeReturnsByQuantileDf = np.log(1 + cumulativeReturnsByQuantileDf)
        
        # get the relevant columns
        colTop = cumulativeReturnsByQuantileDf.iloc[:, [-1]].columns[0]
        colBottom = cumulativeReturnsByQuantileDf.iloc[:, [0]].columns[0]
        colMiddle = cumulativeReturnsByQuantileDf.drop([colTop, colBottom], axis = 1).columns
        
        # plot
        fig, ax = plt.subplots(figsize = (12, 5))
        ax.plot(cumulativeReturnsByQuantileDf[colBottom], color = 'red', linewidth = 2)
        ax.plot(cumulativeReturnsByQuantileDf[colMiddle], alpha = 0.3)
        ax.plot(cumulativeReturnsByQuantileDf[colTop], color = 'green', linewidth = 2)
        # formatting
        ax.axhline(y = 0, color = 'black', linestyle = '--', linewidth = 0.5)
        ax.set_title('Cumulative Log-Returns By Quantile Group', fontdict = {'fontsize': 15})
        ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
        ax.legend(cumulativeReturnsByQuantileDf.columns, loc = 'best')
        
        plt.show()
        
    def PlotPortfolioLongShortCumulativeReturns(self, factorQuantilesForwardReturnsDf,
                                                forwardPeriod = 1, weighting = 'mean',
                                                portfolioWeightsDict = None):

        '''
        Description:
            Plot cumulative returns for a long-short portfolio
        Args:
            factorQuantilesForwardReturnsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values,
                                             forward returns and the quantile groups
            forwardPeriod: The period of forward returns
            weighting: The weighting to apply to the returns in each quantile after grouping:
                        - mean: Take the average of all the stock returns within each quantile
                        - factor: Take a factor-weighted return within each quantile
        Returns:
            Plot cumulative returns for a long-short portfolio
        '''
        
        # get returns by quantile
        returnsByQuantileDf = self.GetReturnsByQuantileDf(factorQuantilesForwardReturnsDf, forwardPeriod, weighting)
        # calculate returns for a long-short portolio
        portfolioLongShortReturnsDf = self.GetPortfolioLongShortReturnsDf(returnsByQuantileDf, portfolioWeightsDict)
        portfolioLongShortCumulativeReturnsDf = self.GetCumulativeReturnsDf(portfolioLongShortReturnsDf)
        
        # prepare plot
        fig, ax = plt.subplots(figsize = (12, 5))
        
        # plot portfolio
        colPortfolio = portfolioLongShortCumulativeReturnsDf.iloc[:, [0]].columns[0]
        ax.plot(portfolioLongShortCumulativeReturnsDf[colPortfolio], color = 'black', linewidth = 2)
            
        if len(portfolioLongShortCumulativeReturnsDf.columns) > 1:
            colFactors = portfolioLongShortCumulativeReturnsDf.iloc[:, 1:].columns
            # plot factors
            ax.plot(portfolioLongShortCumulativeReturnsDf[colFactors], alpha = 0.3)
            
        # formatting
        ax.axhline(y = 0, color = 'black', linestyle = '--', linewidth = 0.5)
        ax.set_title('Cumulative Returns Long-Short Portfolio', fontdict = {'fontsize': 15})
        ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
        ax.legend(portfolioLongShortCumulativeReturnsDf.columns, loc = 'best')
        
        plt.show()
        
    def PlotIC(self, factorQuantilesForwardReturnsDf):
        
        '''
        Description:
            Plot the Information Coefficient (Spearman Rank Correlation) for different periods along with a moving average
        Args:
            factorQuantilesForwardReturnsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values,
                                             forward returns and the quantile groups
        Returns:
            Plot of the Information Coefficient (Spearman Rank Correlation) for different periods along with a moving average
        '''
        
        # get the forward periods and factor columns
        forwardPeriods = [int(x.split('D', 1)[0]) for x in factorQuantilesForwardReturnsDf.columns if 'D' in x]
        factorCol = [x for x in factorQuantilesForwardReturnsDf.columns if 'Factor' in x]
        
        # iterate over the periods
        for period in forwardPeriods:
            col = str(period) + 'D'
            # calculate the spearman rank coefficient for each day between the factor values and forward returns
            icDf = (factorQuantilesForwardReturnsDf.groupby('time')
                    .apply(lambda x: stats.spearmanr(x[factorCol], x[col])[0]).to_frame().dropna())
            icDf.columns = ['IC']
            # apply a moving average for smoothing
            icDf['21D Moving Average'] = icDf.rolling(21).apply(lambda x: np.mean(x))
            
            # plot
            fig, ax = plt.subplots(figsize = (12, 5))
            ax.plot(icDf['IC'], alpha = 0.5)
            ax.plot(icDf['21D Moving Average'])
            ax.axhline(y = 0, color = 'black', linestyle = '--', linewidth = 0.5)

            mu = icDf['IC'].mean()
            sigma = icDf['IC'].std()
            textstr = '\n'.join((
                        r'$\mu=%.2f$' % (mu, ),
                        r'$\sigma=%.2f$' % (sigma, )))
            props = dict(boxstyle = 'round', facecolor = 'white', alpha = 0.5)
            ax.text(0.05, 0.95, textstr, transform = ax.transAxes, fontsize = 14,
                    verticalalignment = 'top', bbox = props)

            ax.set_title(col + ' Forward Return Information Coefficient (IC)', fontdict = {'fontsize': 15})
            ax.legend(icDf.columns, loc = 'upper right')
            
            plt.show()
            
    # run full factor analysis --------------------------------------------------------------------------------------
    
    def RunFactorAnalysis(self, factorQuantilesForwardReturnsDf, forwardPeriod = 1,
                        weighting = 'mean', portfolioWeightsDict = None, makePlots = True):
                                
        '''
        Description:
            Run all needed functions and generate relevant DataFrames and plots for analysis
        Args:
            factorQuantilesForwardReturnsDf: MultiIndex Dataframe (symbol/time indexes) with the factor values,
                                             forward returns and the quantile groups
            forwardPeriod: The period of forward returns
            weighting: The weighting to apply to the returns in each quantile after grouping:
                        - mean: Take the average of all the stock returns within each quantile
                        - factor: Take a factor-weighted return within each quantile
            portfolioWeightsDict: Dictionary with quantiles and weights to create a portfolio of returns
        Returns:
            Plots for factor analysis
        '''
        
        # plotting
        if makePlots:
            self.PlotMeanReturnsByQuantile(factorQuantilesForwardReturnsDf)
            self.PlotCumulativeReturnsByQuantile(factorQuantilesForwardReturnsDf)
            self.PlotPortfolioLongShortCumulativeReturns(factorQuantilesForwardReturnsDf)
            self.PlotIC(factorQuantilesForwardReturnsDf)
        
        # keep DataFrames
        self.returnsByQuantileDf = self.GetReturnsByQuantileDf(factorQuantilesForwardReturnsDf, forwardPeriod, weighting)
        self.cumulativeReturnsByQuantileDf = self.GetCumulativeReturnsDf(self.returnsByQuantileDf)
        self.portfolioLongShortReturnsDf = self.GetPortfolioLongShortReturnsDf(self.returnsByQuantileDf, portfolioWeightsDict)
        self.portfolioLongShortCumulativeReturnsDf = self.GetCumulativeReturnsDf(self.portfolioLongShortReturnsDf)

from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Indicators")
AddReference("QuantConnect.Algorithm.Framework")

from datetime import timedelta
from QuantConnect.Data.UniverseSelection import *
from Selection.FundamentalUniverseSelectionModel import FundamentalUniverseSelectionModel

from HelperFunctions import GetFundamentalDataDict, MakeCalculations, GetLongShortLists, UpdatePlots
#from RiskManagement import *
from ManualInputs import ManualInputs

import pandas as pd
import numpy as np

# Define the Universe Model Class
class SMIDGrowth(FundamentalUniverseSelectionModel):
    
    def __init__(self,
                benchmark = 'SPY',
                nStocks = 500,
                lookback = 252,
                maxNumberOfPositions = 20,
                rebalancingFunc = Expiry.EndOfMonth,
                filterFineData = True,
                universeSettings = None,
                securityInitializer = None):
        
        self.benchmark = benchmark
        
        self.nStocks = nStocks
        self.lookback = lookback
        self.maxNumberOfPositions = maxNumberOfPositions

        self.rebalancingFunc = rebalancingFunc
        self.nextRebalance = None
        
        self.initBenchmarkPrice = 0
        self.portfolioValueHigh = 0 # initialize portfolioValueHigh for drawdown calculation
        self.portfolioValueHighInitialized = False # initialize portfolioValueHighInitialized for drawdown calculation
        
        super().__init__(filterFineData, universeSettings)#, securityInitializer)

        #Declare Variables
        tickers = ManualInputs.m_tickers
        self.averages = { }
        self.hist = RollingWindow[float](390*22)
        self.contract = None
        #self.SetSecurityInitializer(self.security_initializer)
        self.buys = []
        self.sells = []
        self.contract_by_equity = {}        
        for x in ManualInputs.m_tickers:
            self.AddEquity(x, Resolution.Daily)  
    
    #SelectCoarse() method with its parameters    
    def SelectCoarse(self, algorithm, coarse):
        # update plots -----------------------------------------------------------------------------------------------
        UpdatePlots(self, algorithm)

        #If it isn't time to update data, return the previous symbols
        if self.nextRebalance is not None and algorithm.Time < self.nextRebalance:
            return Universe.Unchanged
        self.nextRebalance = self.rebalancingFunc(algorithm.Time)    
        
        
        # filtered by price and select the top dollar volume stocks
        filteredCoarse = [x for x in coarse if x.HasFundamentalData and x.Price > 10 and x.DollarVolume > 5e6]
        sortedDollarVolume = sorted(filteredCoarse, key = lambda x: x.DollarVolume, reverse = True)
        coarseSymbols = [x.Symbol for x in sortedDollarVolume][:(self.nStocks * 2)]
        
        return coarseSymbols
    
    #Add an empty SelectFine() method with is parameters
    def SelectFine(self, algorithm, fine):
        #Sort by SMID Cap Growth Criteria
        sortedByRatios = sorted([f for f in fine if 5e6 < f.MarketCap < 1e10 
                                                and f.OperationRatios.TotalDebtEquityRatio.OneMonth < 0.7
                                                and f.ValuationRatios.PEGRatio > 5
                                                and f.ValuationRatios.PBRatio >= 1
                                                #and f.OperationRatios.FCFGrowth.Value > 10
                                                #and f.OperationRatios.RevenueGrowth > 5
                                                and f.Symbol not in ManualInputs.restrictedList
                                                or f.Symbol in ManualInputs.m_tickers],
            key=lambda f: f.ValuationRatios.PBRatio, reverse=True)[:self.nStocks]
            
        # generate dictionary with factors -----------------------------------------------------------------------------
        fundamentalDataBySymbolDict = GetFundamentalDataDict(algorithm, sortedByRatios, 'universe')
                    
        # make calculations to create long/short lists -----------------------------------------------------------------
        fineSymbols = list(fundamentalDataBySymbolDict.keys())
        calculations = MakeCalculations(algorithm, fineSymbols, self.lookback, Resolution.Daily, fundamentalDataBySymbolDict)
        
        # get long/short lists of symbols
        longs, shorts = GetLongShortLists(self, algorithm, calculations, 'universe')
        finalSymbols = longs + shorts
        
        return finalSymbols



    #Method for monitoring if universe has changed
    def OnSecuritiesChanged(self, changes):
        self.Log(f'New Securities Added: {[security.Symbol.Value for security in changes.AddedSecurities]}')
        self.Log(f'Securities Removed{[security.Symbol.Value for security in changes.RemovedSecurities]}')
        
        for security in changes.AddedSecurities:
            self.buys.append(security)
            self.contract_by_equity[security.Symbol] = self.BuyPut(security.Symbol)
        
        for security in changes.RemovedSecurities:
            self.sells.append(security)
            


    #Sell Put on equity assets
    def BuyPut(self, symbol):
        #security.SetDataNormalizationMode(DataNormalizationMode.Raw)
        #symbol = security.Symbol
        contracts = self.OptionChainProvider.GetOptionContractList(symbol, self.Time)
        self.Debug(f"BuyPut: {symbol} {len(contracts)}")
        #contracts = self.OptionChainProvider.GetOptionChains(self.Symbol, self.Time.date())
        if len(contracts) == 0: return
        min_expiry = 0
        max_expiry = 40
        
        filtered_contracts = [i for i in contracts if min_expiry <= (i.ID.Date.date() - self.Time.date()).days <= max_expiry]
        put = [x for x in filtered_contracts if x.ID.OptionRight == 1] 
        
        if len(put) == 0: return
        price = self.Securities[symbol].Price
        # sorted the contracts according to their expiration dates and choose the ATM options
        self.contract = sorted(sorted(put, key = lambda x: abs(price - x.ID.StrikePrice)), 
                                        key = lambda x: x.ID.Date, reverse=True)[0]
      
        self.AddOptionContract(self.contract, Resolution.Minute)
        self.MarketOrder(self.contract, 1)
        #security.SetDataNormalizationMode(DataNormalizationMode.Adjusted)
        return self.contract
        
    def BuyCall(self, symbol):
        #security.SetDataNormalizationMode(DataNormalizationMode.Raw)
        #symbol = security.Symbol
        contracts = self.OptionChainProvider.GetOptionContractList(symbol, self.Time)
        self.Debug(f"BuyPut: {symbol} {len(contracts)}")
        #contracts = self.OptionChainProvider.GetOptionChains(self.Symbol, self.Time.date())
        if len(contracts) == 0: return
        min_expiry = 0
        max_expiry = 40
        
        filtered_contracts = [i for i in contracts if min_expiry <= (i.ID.Date.date() - self.Time.date()).days <= max_expiry]
        call = [x for x in filtered_contracts if x.ID.OptionRight == 0] 
        
        if len(call) == 0: return
        price = self.Securities[symbol].Price
        # sorted the contracts according to their expiration dates and choose the ATM options
        self.contract = sorted(sorted(call, key = lambda x: abs(price - x.ID.StrikePrice)), 
                                        key = lambda x: x.ID.Date, reverse=True)[0]
      
        self.AddOptionContract(self.contract, Resolution.Minute)
        self.MarketOrder(self.contract, 1)
        #security.SetDataNormalizationMode(DataNormalizationMode.Adjusted)
        return self.contract

class ManualInputs:

    #m_tickers = ['AAPL', 'AMZN', 'NFLX', 'GOOG','FB']
    #m_tickers = ['AMZN']
    m_tickers = []
    
    restrictedList = ["GME"]