Rearrange Quandl DataFrame

Hello

I wrote an algo about pair trading (which is attached) but each time I want to use quandl datas to extract the historical prices of crude oil I receive an error because my quandl dataframe doesn't fit the dataframe of the equity

import numpy as np
import pandas as pd
import statsmodels.api as sm
from Selection.QC500UniverseSelectionModel import QC500UniverseSelectionModel
from QuantConnect.Python import PythonQuandl

class Oilsensibiltiy(QCAlgorithm):

    def Initialize(self):
        
        self.SetStartDate( 2012 , 1, 1)       # Set Start Date
        self.SetEndDate( 2020 , 10, 10)
        self.SetCash(100000)                # Set Strategy Cash

        self.lookback = 61                 # Length(days) of historical data
        self.weights_long,self.weights_short = pd.DataFrame(),pd.DataFrame()      # Pandas data frame (index: symbol) that stores the weight
        self.Portfolio.MarginModel = PatternDayTradingMarginModel()
        self.AGG = self.AddEquity("AGG", Resolution.Daily).Symbol
        self.nextLiquidate = self.Time   # Initialize last trade time
        self.rebalance_days = 30
        
        self.UniverseSettings.Resolution = Resolution.Daily   # Use hour resolution for speed
        self.oil = self.AddData(QuandlOil, 'FRED/DCOILBRENTEU', Resolution.Daily).Symbol
        self.AddUniverse(self.CoarseSelection, self.SelectFine)
        self.selectedequity = 1000
        self.numberOfSymbolsFine = 25
        self.Symbols_long = []
        self.Symbols_short = []
        self.zscore_keep_buy = []
        self.zscore_keep_short = []

    def CoarseSelection(self, coarse):
        
        if self.Time < self.nextLiquidate:
            return Universe.Unchanged
        
        selected = sorted([x for x in coarse if x.HasFundamentalData and x.Price > 5],
                          key=lambda x: x.DollarVolume, reverse=True)

        symbols = [x.Symbol for x in selected[:self.selectedequity ] ]

        return symbols
        
    def SelectFine(self, fine):
        
        filtered = [x.Symbol for x in fine if  x.AssetClassification.MorningstarSectorCode == 309]
        
        self.Symbols_long = filtered[:self.numberOfSymbolsFine]
        
        self.Symbols_short = self.Symbols_long
        
        return self.Symbols_long + self.Symbols_short

    def GetWeights(self, history , crudeoil_history,Long):
        
        crudeoil_history = crudeoil_history.pct_change().dropna()
        
        sample = history.dropna(axis=1).pct_change().dropna()
        
        zscore = self.ZscoreGrade(sample,crudeoil_history)

        # Train Ordinary Least Squares linear model for each stock
        OLSmodels = {ticker: sm.OLS(sample[ticker], crudeoil_history).fit() for ticker in sample.columns}
        
        if Long:
            
            zscore_buy = zscore[zscore>1.75].dropna(axis=1)
            
            zscore_keep = zscore[zscore>0.50].dropna(axis=1)
            
            try :
                
                weights = (zscore_buy * (1 / len(zscore_buy.columns))/zscore_buy).iloc[0,:].sort_values()
                
            except : 
                
                weights = pd.DataFrame()
        else:
            
            zscore_short = zscore[ zscore < - 1.50 ].dropna(axis=1)
            
            zscore_keep = zscore[ zscore < - 0.50 ].dropna(axis=1)
            
            try :
                
                weights = (zscore_short* (-1 / len(zscore_short.columns))/zscore_short).sort_values()
                
            except:
                
                weights = pd.DataFrame()
        
        return weights,zscore_keep
        
    def ZscoreGrade(self,sample, factors) :
        
        factors = sm.add_constant(factors)
        
        # Train Ordinary Least Squares linear model for each stock
        
        OLSmodels = {ticker: sm.OLS(sample[ticker], factors).fit() for ticker in sample.columns}
        
        # Get the residuals from the linear regression after PCA for each stoc
        
        resids = pd.DataFrame({ticker: model.resid for ticker, model in OLSmodels.items()})
        
        #Get the OU parameters 
        
        shifted_residuals = resids.cumsum().iloc[1:,:]
        
        resids = resids.cumsum().iloc[:-1,:]
        
        resids.index = shifted_residuals.index
        
        OLSmodels2 = {ticker: sm.OLS(resids[ticker],sm.add_constant(shifted_residuals[ticker])).fit() for ticker in resids.columns} 
        
        # Get the new residuals
        
        resids2 = pd.DataFrame({ticker: model.resid for ticker, model in OLSmodels2.items()})
        
        # Get the mean reversion parameters 
        
        a = pd.DataFrame({ticker : model.params[0] for ticker , model in OLSmodels2.items()},index=["a"])
    
        b = pd.DataFrame({ticker: model.params[1] for ticker , model in OLSmodels2.items()},index=["a"])
        
        e = (resids2.std())/(252**(-1/2))
    
        k = -np.log(b) * 252
        
        #Get the z-score
        var = (e**2 /(2 * k) )*(1 - np.exp(-2 * k * 252))
    
        num = -a * np.sqrt(1 - b**2)
    
        den = ( 1-b ) * np.sqrt( var )
    
        m  = ( a / ( 1 - b ) )
    
        zscores= num / den # zscores of the most recent day
    
        return zscores
        
    def OnData(self, data):
        
        history_long = self.History(self.Symbols_long, self.lookback, Resolution.Daily).close.unstack()
        
        crudeoil_history = self.changeDataFrame(history_long)
        
        self.weights_long,self.zscore_keep_buy = self.GetWeights(history_long,crudeoil_history,Long=True)
        
        #history_short = self.History(self.Symbols_short, self.lookback, Resolution.Daily).close.unstack(level=0)
        
        self.weight_short,self.zscore_keep_short = self.GetWeights(history_long,crudeoil_history,Long=False)
        
        for holding in self.Portfolio.Values:
            if holding.Symbol in self.zscore_keep_short.index or holding.Symbol in self.zscore_keep_buy.index or holding.Symbol == self.AGG :
                continue
            if holding.Invested:
                self.Liquidate(holding.Symbol)

        for symbol, weight in self.weights_long.items():
            self.SetHoldings(symbol,1*weight)
            
        for symbol, weight in self.weights_short.items():
            self.SetHoldings(symbol,-1*weight)
        
        if self.Time < self.nextLiquidate:
            return 
        
        self.SetHoldings('AGG', 0.70 )
        
        self.nextLiquidate = self.Time + timedelta(self.rebalance_days)
    
    def changeDataFrame(self,history_long):
        
        idxb = history_long.T.index[0]
        
        idxe = history_long.T.index[-1]
        
        crudeoil_history = self.History(QuandlOil,self.oil , 300, Resolution.Daily).droplevel(level=0)
        
        crudeoil_history = crudeoil_history[~crudeoil_history.index.duplicated(keep='last')].loc[idxb:idxe]
        
        return crudeoil_history
        
    def OnSecuritiesChanged(self, changes):

        for security in changes.RemovedSecurities:
            if security.Invested:
                self.Liquidate(security.Symbol, 'Removed from Universe')
        
class QuandlOil(PythonQuandl):
    def __init__(self):
        self.ValueColumnName = 'Value'

Can someone help me because I am stuck (everything else work)

Thank you .

The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.

Hi Wawes23,

The error that the code above produces ("Runtime Error: ValueError : The indices for endog and exog are not aligned") is caused when trying to fit the OLS model. If my understanding is correct that the intention is to fit the model such that the oil return is the independent variable and the return of a single security in the universe is the dependent variable, the error surfaces because the model is being passed transposed data. That is, instead of passing the return of stock X from day Y to day Z, the OLS model is being passed return of all the stocks in the universe on day Y. To format the data correctly, see the attached backtest.

The algorithm still needs further work. Specifically, the timestamps of the DataFrames used to train the OLS model are still slightly different (see the logs). To resolve this, the definition of `changeDataFrame` needs to be adjusted as mentioned in this related thread.

Best,
Derek Melchin

Derek Melchin

STAFF ,

Laurent Crouzet

4.9k ,

Derek Melchin : thanks for having (better) explained the issue(s) than I did in the related thread!

wawes23 : is it clearer from now on?

Elyes Mahjoubi

3.8k ,

This can be adopted as a solution

Wawes23 INVESTOR

Update Backtest

Notebook

person upvoted this people upvoted this

To unlock posting to the community forums please complete at least 30% of Boot Camp.
You can continue your Boot Camp training progress from the terminal. We hope to see you in the community soon!

Platform

Radically Open-Source Algorithmic Trading Engine

Join Our Discord Channel

Quarterly Open-Source Trading Competition

Draft Discussions

Bookmarked Discussions

SEARCH DISCUSSIONS

TOP 5 Research PUblications

About Quant League

competition rules

previous competitions

335,200 Quants.

VOTE FOR UPCOMING FEATURES

Rearrange Quandl DataFrame

Organization

Team

Clone Strategy

Previous Ranking

IN THIS RESEARCH

PARTICIPANTS

Discussion Awards

Actions

Join QuantConnect for Free

Platform

SIGN IN

Radically Open-Source Algorithmic Trading Engine

Join Our Discord Channel

Quarterly Open-Source Trading Competition

Draft Discussions

Bookmarked Discussions

SEARCH DISCUSSIONS

TOP 5 Research PUblications

About Quant League

competition rules

previous competitions

335,200 Quants.

VOTE FOR UPCOMING FEATURES

Rearrange Quandl DataFrame

Organization

Team

Clone Strategy

Previous Ranking

IN THIS RESEARCH

PARTICIPANTS

Discussion Awards

SHARE RESEARCH

SHARE DISCUSSION

SHARE ARTICLE

SHARE

Actions

Join QuantConnect for Free