from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime
from datetime import timedelta
import pandas as pd
import numpy as np

class TransdimensionalTachyonCompensator(QCAlgorithm):
    month = 0
    def Initialize(self):
        self.SetStartDate(2000, 1, 1)  # Set Start Date
        self.SetEndDate(2000, 3, 1)
        # self.SetEndDate(2020, 6, 1)
        self.SetCash(100000)  # Set Strategy Cash
        self.trading_symbols = []
        self.UniverseSettings.Resolution = Resolution.Minute
        # self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x)))
        self.lookback_period = 20 
        self.spy = self.AddEquity("SPY").Symbol
        # self.Train(self.DateRules.MonthStart(), self.TimeRules.AfterMarketOpen(self.spy, 0), self.train)
        self.Schedule.On(self.DateRules.MonthStart(), self.TimeRules.AfterMarketOpen(self.spy, 15), self.train)
        # self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.AfterMarketOpen(self.spy, 30), self.invest)

    def avg_true_range(self, df): 
      ind = range(0,len(df))
      indexlist = list(ind)
      df.index = indexlist
      for index, row in df.iterrows():
        if index != 0:
          tr1 = row["high"] - row["low"]
          tr2 = abs(row["high"] - df.iloc[index-1]["close"])
          tr3 = abs(row["low"] - df.iloc[index-1]["close"])
          true_range = max(tr1, tr2, tr3)
          df.set_value(index,"True Range", true_range)
      df["Avg TR"] = df["True Range"].rolling(min_periods=14, window=14, center=False).mean()
      return df        

    def CoarseSelectionFunction(self, coarse):
        if self.Time.month == self.month:
            return Universe.Unchanged
        self.month = self.Time.month
        sortedByDollarVolume = sorted(coarse, key=lambda x: x.DollarVolume, reverse=True)
        self.trading_symbols = [ x.Symbol for x in sortedByDollarVolume if x.HasFundamentalData ][:20]
        return self.trading_symbols
    def train(self):
        if not self.trading_symbols: return
        today = self.Time
        price_history = self.History(self.trading_symbols, self.lookback_period, Resolution.Daily)
        symbol1 = [self.trading_symbols[0] for i in range(self.lookback_period)]
        symbol2 = [self.trading_symbols[1] for i in range(self.lookback_period)]
        symbol3 = [self.trading_symbols[2] for i in range(self.lookback_period)]
        symbol4 = [self.trading_symbols[3] for i in range(self.lookback_period)]
        symbol5 = [self.trading_symbols[4] for i in range(self.lookback_period)]
        symbol6 = [self.trading_symbols[5] for i in range(self.lookback_period)]
        symbol7 = [self.trading_symbols[6] for i in range(self.lookback_period)]
        symbol8 = [self.trading_symbols[7] for i in range(self.lookback_period)]
        symbol9 = [self.trading_symbols[8] for i in range(self.lookback_period)]
        symbol10 = [self.trading_symbols[9] for i in range(self.lookback_period)]
        symbol11 = [self.trading_symbols[10] for i in range(self.lookback_period)]
        symbol12 = [self.trading_symbols[11] for i in range(self.lookback_period)]
        symbol13 = [self.trading_symbols[12] for i in range(self.lookback_period)]
        symbol14 = [self.trading_symbols[13] for i in range(self.lookback_period)]
        symbol15 = [self.trading_symbols[14] for i in range(self.lookback_period)]
        symbol16 = [self.trading_symbols[15] for i in range(self.lookback_period)]
        symbol17 = [self.trading_symbols[16] for i in range(self.lookback_period)]
        symbol18 = [self.trading_symbols[17] for i in range(self.lookback_period)]
        symbol19 = [self.trading_symbols[18] for i in range(self.lookback_period)]
        symbol20 = [self.trading_symbols[19] for i in range(self.lookback_period)]
        close_list = []
        open_list = []
        high_list = []
        low_list = []
        volume_list = []
        self.chandelier_long = []
        self.chandelier_short = []
        date_list = [ str(price_history.index[i][1]).split(" ")[0] for i in range(price_history.shape[0]) ]
        for i in range(price_history.shape[0]):
        d = {'symbol': symbol1 + symbol2 + symbol3 + symbol4 + symbol5 + symbol6 + symbol7 + symbol8 + symbol9 \
        + symbol10 + symbol11 + symbol12 + symbol13 + symbol14 + symbol15 + symbol16 + symbol17 + symbol18 + \
        symbol19 + symbol20,
            'datetime': date_list,
            'close': close_list,
            'high': high_list,
            'low': low_list,
            'open': open_list,
            'volume': volume_list
        lists = [x[1] for x in d.items()]
        if not all(len(lists[0]) == len(x) for x in lists):
        self.price_data = pd.DataFrame(data=d)
        # sort the values by symbol and then date
        # self.price_data.sort_values(by = ['datetime'], inplace = True)
        # calculate the change in price
        self.price_data['change_in_price'] = self.price_data['close'].diff()

        # identify rows where the symbol changes
        mask = self.price_data['symbol'] != self.price_data['symbol'].shift(1)
        # For those rows, let's make the value null
        self.price_data['change_in_price'] = np.where(mask == True, np.nan, self.price_data['change_in_price'])
        # print the rows that have a null value, should have 20
        self.price_data[self.price_data.isna().any(axis = 1)]
        # Calculate the 14 day RSI
        n = 14
        # First make a copy of the data frame twice
        up_df, down_df = self.price_data[['symbol','change_in_price']].copy(), self.price_data[['symbol','change_in_price']].copy()
        # For up days, if the change is less than 0 set to 0.
        # up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0
        # For down days, if the change is greater than 0 set to 0.
        # down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price'] = 0
        up_df['change_in_price'] = up_df['change_in_price'].apply(lambda x: max(x, 0))
        down_df['change_in_price'] = down_df['change_in_price'].apply(lambda x: min(0, x))

        # We need change in price to be absolute.
        down_df['change_in_price'] = down_df['change_in_price'].abs()
        # self.price_data["RSI"] = self.RSI("SPY", 14, MovingAverageType.Simple)

        # Calculate the EWMA (Exponential Weighted Moving Average), meaning older values are given less weight compared to newer values.
        ewma_up = up_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
        ewma_down = down_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
        # Calculate the Relative Strength
        relative_strength = ewma_up / ewma_down
        # Calculate the Relative Strength Index
        relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))
        # Add the info to the data frame.
        self.price_data['down_days'] = down_df['change_in_price']
        self.price_data['up_days'] = up_df['change_in_price']
        self.price_data['RSI'] = relative_strength_index

        # Calculate the Stochastic Oscillator
        # Make a copy of the high and low column.
        low_14, high_14 = self.price_data[['symbol','low']].copy(), self.price_data[['symbol','high']].copy()
        # Group by symbol, then apply the rolling function and grab the Min and Max.
        low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min())
        high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max())
        # Calculate the Stochastic Oscillator.
        k_percent = 100 * ((self.price_data['close'] - low_14) / (high_14 - low_14))
        # Add the info to the data frame.
        self.price_data['low_14'] = low_14
        self.price_data['high_14'] = high_14
        self.price_data['k_percent'] = k_percent
        # Make a copy of the high and low column.
        low_14, high_14 = self.price_data[['symbol','low']].copy(), self.price_data[['symbol','high']].copy()
        # Group by symbol, then apply the rolling function and grab the Min and Max.
        low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min())
        high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max())
        # Calculate William %R indicator.
        r_percent = ((high_14 - self.price_data['close']) / (high_14 - low_14)) * - 100
        # Add the info to the data frame.
        self.price_data['r_percent'] = r_percent
        # Calculate the MACD
        ema_26 = self.price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 26).mean())
        ema_12 = self.price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 12).mean())
        macd = ema_12 - ema_26
        # Calculate the EMA
        ema_9_macd = macd.ewm(span = 9).mean()
        # Store the data in the data frame.
        self.price_data['MACD'] = macd
        self.price_data['MACD_EMA'] = ema_9_macd
        # Calculate the Price Rate of Change
        roc_n = 9
        # Calculate the Rate of Change in the Price, and store it in the Data Frame.
        self.price_data['Price_Rate_Of_Change'] = self.price_data.groupby('symbol')['close'].transform(lambda x: x.pct_change(periods = roc_n))
        # apply the function to each group
        # obv_groups = self.price_data.groupby('symbol').apply(self.obv)

        # self.Debug(f"show obv groups length")
        # self.Debug(len(obv_groups))

        # obv_groups going in one by one
        # self.Debug(f"show obv_groups")
        # self.Debug(len(obv_groups)) 

        # add to the data frame, but drop the old index, before adding it.
        # self.price_data['On Balance Volume'] = obv_groups.reset_index(level=0, drop=True)

        # self.price_data["On Balance Volume"] = obv_groups
        # self.Debug(f"show obv groups length")
        # self.Debug(len(obv_groups))

        # self.Debug(f"show on balance volume")

        # self.Debug(self.price_data.head())
        # Create a column we wish to predict
            In this case, let's create an output column that will be 1 if the closing price at time 't' is greater than 't-1' and 0 otherwise.
            In other words, if the today's closing price is greater than yesterday's closing price it would be 1.
        # Group by the `Symbol` column, then grab the `Close` column.
        close_groups = self.price_data.groupby('symbol')['close']
        # Apply the lambda function which will return -1.0 for down, 1.0 for up and 0.0 for no change.
        close_groups = close_groups.transform(lambda x : np.sign(x.diff()))
        # add the data to the main dataframe.
        self.price_data['Prediction'] = close_groups
        # for simplicity in later sections I'm going to make a change to our prediction column. To keep this as a binary classifier I'll change flat days and consider them up days.
        self.price_data.loc[self.price_data['Prediction'] == 0.0] = 1.0
        self.price_data = self.avg_true_range(self.price_data)

        # Any row that has a `NaN` value will be dropped.
        self.price_data = self.price_data.dropna()

        self.price_data.reset_index(drop = True)
        for i in range(len(self.price_data)):
            self.chandelier_long.append(self.price_data["high"][i:i+22].max() - (self.price_data["Avg TR"][i:i+22].mean() * 3))
            self.chandelier_short.append(self.price_data["low"][i:i+22].max() + (self.price_data["Avg TR"][i:i+22].mean() * 3))

        self.price_data["chandelier long"] = self.chandelier_long
        self.price_data["chandelier short"] = self.chandelier_short
        # Grab our X & Y Columns.
        # X_Cols = self.price_data[['RSI','k_percent','r_percent','Price_Rate_Of_Change','MACD','On Balance Volume']]
        X_Cols = self.price_data[['RSI','k_percent','r_percent','Price_Rate_Of_Change','MACD']]

        Y_Cols = self.price_data['Prediction']
        # Split X and y into X_
        X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state = 0)
        # Create a Random Forest Classifier
        rand_frst_clf = RandomForestClassifier(n_estimators = 100, oob_score = True, criterion = "gini", random_state = 0)
        # Fit the data to the model
        rand_frst_clf.fit(X_train, y_train)
        # Make predictions
        y_pred = rand_frst_clf.predict(X_test)
        # sort the values by symbol and then date
        # self.price_data.sort_values(by = ['symbol','datetime'], inplace = True)
        # self.Debug(f"Correct Prediction (%): ")
        # self.Debug(accuracy_score(y_test, rand_frst_clf.predict(X_test), normalize = True) * 100.0)
        # self.price_data['Prediction'] = self.price_data['Prediction'].shift(periods=1)
        self.price_data = self.price_data.dropna()

        self.test_df = self.price_data[ ["symbol", "close", "datetime", "Prediction", "chandelier long", "chandelier short"]]

    # def invest (self):
        today = self.Time
        date = str(today).split()[0]

        # self.Debug(self.test_df["datetime"])
        select_indices = list(np.where(self.test_df["datetime"] == date)[0])
        for i in select_indices:
            # self.Debug(self.test_df["symbol"].iloc[i])
            if self.test_df["Prediction"].iloc[i] == 1:
                self.SetHoldings( self.test_df["symbol"].iloc[i] , 1/len(self.trading_symbols))
                if self.test_df["close"].iloc[i] < self.test_df["chandelier long"].iloc[i]:
            elif self.test_df["Prediction"].iloc[i] == -1:
                self.SetHoldings(self.test_df["symbol"].iloc[i] , -1.0/len(self.trading_symbols))
                if self.test_df["close"].iloc[i] < self.test_df["chandelier short"].iloc[i]:
        # self.Debug(self.test_df)