Backtest

Overall Statistics
Total Trades 1117 Average Win 0.92% Average Loss -0.83% Compounding Annual Return 10.768% Drawdown 34.000% Expectancy 0.429 Net Profit 587.682% Sharpe Ratio 0.775 Probabilistic Sharpe Ratio 10.711% Loss Rate 32% Win Rate 68% Profit-Loss Ratio 1.10 Alpha 0.096 Beta 0.001 Annual Standard Deviation 0.124 Annual Variance 0.015 Information Ratio 0.038 Tracking Error 0.215 Treynor Ratio 116.662 Total Fees $81447.59
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pandas as pd
import statsmodels.api as sm
import sklearn as sk
from sklearn import linear_model
import math
from datetime import datetime, timedelta, date
from pandas.tseries.offsets import MonthEnd
from dateutil.relativedelta import relativedelta
from io import StringIO

class FINA4803(QCAlgorithm):

    def Initialize(self):
        
        ### The below is required for self.History, but can't get it to work yet
        #historydate = datetime.date(datetime.now()) - datetime.date(datetime(1998,12,23))
        #self.history_days = int(historydate.days)+1
        #self.alpha_file_df = self.getRegressionCoefficients()
        
        #The alpha_file_df's first date, change accordingly
        self.First_Trading_Date = datetime(2002,2,28)
        
        self.Start_Date = datetime(2002, 2, 28) #Backtest period begin - earliest date is 2002/01/31 for FF5 Portfolio, 2002/02/28 for ETF
        self.End_Date = datetime(2020,12,31) #Backtest period end - latest is last month's last day (e.g. if now is 2021/02/25, then latest is 2021/01/31)
        self.SetCash(1000000) #Starting Cash
        
        #Number of months between backtest period begin and the ETF's first start trading dates
        self.counter = (self.Start_Date.year - self.First_Trading_Date.year) * 12 + (self.Start_Date.month - self.First_Trading_Date.month)
        
        #Start Date, End Date - don't change these
        self.SetStartDate(self.Start_Date.year,self.Start_Date.month,self.Start_Date.day)
        self.SetEndDate(self.End_Date.year,self.End_Date.month,self.End_Date.day)
        
        #self.SetBrokerageModel(BrokerageName.InteractiveBrokersBrokerage, AccountType.Cash)
        self.Settings.FreePortfolioValuePercentage = 0.02 # Set Cash %age of Portfolio
        
        #Benchmark
        self.benchmark = "SPY"
        self.AddEquity(self.benchmark) # Add SPY for Benchmark
        self.SetBenchmark(self.benchmark) # Set Benchmark
        
        #Download regression results from dropbox, relevant links below:
        
        # 36 Months Rolling Alpha, Monthly Trading
        FF5_Against_FF_Portfolio = "https://www.dropbox.com/s/a64gl0yxgx285xl/Alphas%20-%20FF%20Portfolio%20Against%20FF5%20Factors.csv?dl=1"
        FF3_Against_FF_Portfolio = "https://www.dropbox.com/s/aqeqt8yag7cpydi/Alphas%20-%20FF%20Portfolio%20Against%20FF3%20Factors.csv?dl=1"
        FF5_Against_6_ETFs = "https://www.dropbox.com/s/cn4v6oqhvvm3g5t/Alphas%20-%206%20ETF%20Against%20FF5%20Factors.csv?dl=1"
        FF3_Against_6_ETFs = "https://www.dropbox.com/s/622igj5olkys3sv/Alphas%20-%206%20ETF%20Against%20FF3%20Factors.csv?dl=1"
        FF5_Against_9_ETFs = "https://www.dropbox.com/s/x2c915qgyjmcgpw/Alphas%20-%209%20ETFs%20Against%20FF5%20Factors.csv?dl=1"
        FF3_Against_9_ETFs = "https://www.dropbox.com/s/377ig3soc14oo65/Alphas%20-%209%20ETFs%20Against%20FF3%20Factors.csv?dl=1"
        
        alpha_file = self.Download(FF5_Against_6_ETFs)
        self.alpha_file_df = pd.DataFrame(pd.read_csv(StringIO(alpha_file)))
        self.alpha_file_df = self.alpha_file_df.rename({'Unnamed: 0':'Date'}, axis=1) #Added "Date" column name
        self.alpha_file_df.drop('Date', axis=1, inplace=True)
        
        #Download NBER US Reccession Index data
        nber_link = "https://www.dropbox.com/s/rxigxh2fi0hb8si/USREC%20%28New%29.csv?dl=1"
        
        nber_file = self.Download(nber_link)
        self.nber_df = pd.DataFrame(pd.read_csv(StringIO(nber_file)))
        self.nber_df['DATE'] = pd.to_datetime(self.nber_df['DATE']) - MonthEnd(1) #Changed dates to datetime format, added MonthEnd(1) so that the dates are consistent with the ETF's dates
        self.nber_df = self.nber_df.loc[self.nber_df['DATE']>=self.First_Trading_Date]
        self.nber_df = self.nber_df.reset_index(drop=True) #Reset Index to start from 0
        self.nber_df["USREC"] = self.nber_df["USREC"].replace([0],'NO')
        self.nber_df["USREC"] = self.nber_df["USREC"].replace([1],'YES')
        
        #Adding tickers and safe-haven asset to portfolio
        self.safe_haven_status = True
        self.safe_haven = "TLT"
        
        tickers = ["XLB","XLE","XLF","XLI","XLK","XLP","XLU","XLV","XLY",self.safe_haven,self.benchmark]
        
        for ticker in tickers:
            symbol = self.AddEquity(ticker, Resolution.Daily).Symbol #Add equity to portfolio and assigns symbol
            self.Securities[symbol].SetDataNormalizationMode(DataNormalizationMode.TotalReturn) # Total Return adjusts the price according to the dividend received
            #self.Consolidate(symbol, Calendar.Monthly, self.CalendarTradeBarHandler) #Consolidates daily data into monthly data https://github.com/QuantConnect/Lean/blob/master/Algorithm.Python/DataConsolidationAlgorithm.py
            self.Securities[symbol].SetLeverage(1.0) #Leverage is set to 1 to ensure no margin used
        
        #Runs the self.RegressionandTrade on the last trading day each month right after market open, according to whether XLB (one of the tickers) is trading or not
        self.Schedule.On(self.DateRules.MonthStart("XLB"),self.TimeRules.AfterMarketOpen("XLB"),self.RegressionandTrade)
        
        #Plot Portfolio's Cash
        self.Schedule.On(self.DateRules.MonthEnd("XLB"),self.TimeRules.AfterMarketOpen("XLB"),self.PlotStuff)

    def CalendarTradeBarHandler(self, tradeBar):
        return
    
    def RegressionandTrade(self):
        self.DefaultOrderProperties.TimeInForce = TimeInForce.Day

        datapoint = self.alpha_file_df.iloc[[int(self.counter)]]
        nber_datapoint = self.nber_df.iloc[[int(self.counter)]]
        num_of_pos = int(datapoint.gt(0).sum(axis=1))
        
        first_in_list = False
        
        if self.safe_haven_status == False:
        
            if nber_datapoint["USREC"].all() == "NO":
                for ticker in datapoint.columns:
                    if first_in_list == False:
                        if float(datapoint[ticker]) >0:
                            self.SetHoldings(ticker,1/num_of_pos,True)
                            #self.SetHoldings([PortfolioTarget(ticker, 1/num_of_pos)])
                            first_in_list = True
                    elif first_in_list == True:
                        if float(datapoint[ticker]) >0:
                            self.SetHoldings(ticker,1/num_of_pos)
                            #self.SetHoldings([PortfolioTarget(ticker, 1/num_of_pos)])
            elif nber_datapoint["USREC"].all() == "YES":
                #self.SetHoldings(self.safe_haven,1,True)
                for ticker in datapoint.columns:
                    if first_in_list == False:
                        if float(datapoint[ticker]) >0:
                            self.SetHoldings(ticker,1/num_of_pos,True)
                            #self.SetHoldings([PortfolioTarget(ticker, 1/num_of_pos)])
                            first_in_list = True
                    elif first_in_list == True:
                        if float(datapoint[ticker]) >0:
                            self.SetHoldings(ticker,1/num_of_pos)
                            #self.SetHoldings([PortfolioTarget(ticker, 1/num_of_pos)])
                            
        elif self.safe_haven_status == True:
            
            if nber_datapoint["USREC"].all() == "NO":
                for ticker in datapoint.columns:
                    if first_in_list == False:
                        if float(datapoint[ticker]) >0:
                            self.SetHoldings(ticker,1/num_of_pos,True)
                            #self.SetHoldings([PortfolioTarget(ticker, 1/num_of_pos)])
                            first_in_list = True
                    elif first_in_list == True:
                        if float(datapoint[ticker]) >0:
                            self.SetHoldings(ticker,1/num_of_pos)
                            #self.SetHoldings([PortfolioTarget(ticker, 1/num_of_pos)])
                            
            elif nber_datapoint["USREC"].all() == "YES":
                self.SetHoldings(self.safe_haven,1,True)

        self.counter = self.counter+1







    def Testfunct(self): #This is a test function only, feel free to modify
        etf_tickers = ["XLB","XLE","XLF","XLI","XLK","XLP","XLU","XLV","XLY"]
        
        for symbol in etf_tickers:
            self.SetHoldings(symbol, 1/len(etf_tickers))
            
    def PlotStuff(self):
        self.Plot('Trade Plot', 'Cash', self.Portfolio.Cash)
    
    #Unused / Draft Functions
    
    def OnData(self, data):
        '''OnData event is the primary entry point for your algorithm. Each new data point will be pumped in here.
            Arguments:
                data: Slice object keyed by symbol containing the stock data
        '''
    def getRegressionCoefficients(self):
        # .py version of Regression.ipynb 
        # each qb instance is changed to self
        
        ### FF5 Factors
        ff5_path = self.Download("https://www.dropbox.com/s/8dyjtlyf1g4ulvn/F-F_Research_Data_5_Factors_2x3.CSV?dl=1")
        
        #Below are standard code just to modify the data:
        ff5_df = pd.DataFrame(pd.read_csv(StringIO(ff5_path), skiprows = 3)) #skiprows since I'm skipping the text and directly to the data
        ff5_df = ff5_df.rename({'Unnamed: 0':'Date'}, axis=1) #Added "Date" column name
        ff5_df['Date'] = pd.to_datetime(ff5_df['Date'],format='%Y%m') + MonthEnd(1) #Changed dates to datetime format, added MonthEnd(1) so that the dates are consistent with the ETF's dates
        ff5_df = ff5_df.loc[ff5_df['Date']>='1998-12-22'] #Cut off is 22 Dec 1998 since ETFs only have data after this date
        ff5_df.reset_index(drop=True,inplace=True) #Reset Index to reflect the date cutoff
        ff5_df.drop(ff5_df.index[:1],inplace=True) #Drops the first date since it's not required for the regression
        ff5_df.reset_index(drop=True,inplace=True) #Reset Index to start from 0
        
        ff5_date = ff5_df.at[len(ff5_df)-1,'Date'] #Variable that is used later
    
        ### etf_ticker_list & history
        etf_ticker_list = ['XLB', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XLV', 'XLY'] #etf ticker list, doesn't change
        
        self.history_df = {}
        
        for ticker in etf_ticker_list:
            ticker_symbol = self.AddEquity(ticker).Symbol #QuantConnect's way of adding securities. The securities will be added to self.Securities
            self.Securities[ticker_symbol].SetDataNormalizationMode(DataNormalizationMode.TotalReturn) #This adjusts the price data so that all the prices have dividends reinvested and splits are adjusted
            self.history_df[ticker] = self.History(ticker_symbol,self.history_days,Resolution.Daily)
            
        startDate = datetime(2019,12,22) #First trading dates of all of the ETFs
        endDate = datetime(ff5_date.year, ff5_date.month, ff5_date.day) #Taken from a variable that is defined in the previous cell
        
        #self.history_df = self.History([self.Securities.Keys],timedelta(days=self.history_days),Resolution.Daily)
        
        #tester = True
        
        #while tester == True:
             #if self.history_df["XLB"].empty == True:
                #pass
             #elif self.history_df["XLB"].empty == False:
                 #tester=False
        
        ticker_data = {}

        ### close_prices
        
        close_prices={}
        #modifies the dataframe above into a dictionary containing each ETF's closing price

        for ticker in etf_ticker_list:
            close_prices[ticker] = self.history_df[ticker]["close"]
        
        ### close_prices_monthly
        
        close_prices_monthly = {}
        #changes daily closing data into monthly using resample
        
        for ticker in close_prices:
            close_prices_monthly[ticker] = pd.DataFrame(close_prices[ticker].resample("1M").last()) #simply takes the last trading price of each month
            close_prices_monthly[ticker]['Price Change'] = close_prices_monthly[ticker]['close'].pct_change(periods = 1) #price change from each month
            close_prices_monthly[ticker]['Price Change'] = close_prices_monthly[ticker]['Price Change'].fillna(0) #fills NaN's data with 0 (there shouldn't be any NaNs, but just in case)
            close_prices_monthly[ticker]['Price Change'] = close_prices_monthly[ticker]['Price Change']*100 #multiply by 100 so it's consistent with Fama French's RF
            close_prices_monthly[ticker].drop(close_prices_monthly[ticker].index[:1],inplace=True) #drops the first datapoint, since we can't find the price change because it's the first trading month
            close_prices_monthly[ticker]["Price Change - RF"] = close_prices_monthly[ticker]['Price Change'] - ff5_df["RF"].values
        
        ### etf_alphas
        etf_alphas = {}
        #dictionary to store the alphas (or intercepts) of each ETF, regressed with Fama French's 5 factors
        
        for ticker in etf_ticker_list:
        
            counter = 0
            counter1 = 36
            #counters are 0 to 36 (36 months worth of data is regressed)
            
            etf_alphas[ticker] = pd.DataFrame()
            placeholder = []
            
            while counter1 < len(close_prices_monthly[ticker].index): #Runs the while loop as long as data is available
                X = ff5_df[['Mkt-RF','SMB','HML','RMW','CMA']].iloc[counter:counter1,] #Dependent variable, which are Fama French's 5 factors
                Y = close_prices_monthly[ticker]["Price Change - RF"].iloc[counter:counter1,] #Independent variable
                
                regr = linear_model.LinearRegression()
                regr.fit(X,Y)
                placeholder.append(regr.intercept_)
                
                counter = counter+1
                counter1 = counter1+1
            
            etf_alphas[ticker][ticker] = placeholder
        
        ### combined_etf_alpha
        
        #Basically combines all of the etf_alphas dictionary into one dataframe
        combined_etf_alpha = pd.DataFrame() 
        
        for ticker in etf_ticker_list:
            combined_etf_alpha = pd.concat([combined_etf_alpha,etf_alphas[ticker]],axis=1)
        
        #The below is probably bad practice since I set my own dates
        #It starts at 2002/02/28, since the data begins from 1999/01/31 + 36 months (3 years) = 2002/01/31 (call this date t)
        #This signal is practically only generated during t+1 month, that's why I began at 2002/02/28.
        combined_etf_alpha = combined_etf_alpha.set_index(pd.date_range(start='2/28/2002', periods=len(etf_alphas["XLE"]),freq="M"),"Trading Dates")
        
        return combined_etf_alpha