Overall Statistics
Total Trades
355
Average Win
0.00%
Average Loss
0.00%
Compounding Annual Return
1.858%
Drawdown
0.000%
Expectancy
0.262
Net Profit
0.197%
Sharpe Ratio
5.086
Probabilistic Sharpe Ratio
90.982%
Loss Rate
58%
Win Rate
42%
Profit-Loss Ratio
1.99
Alpha
0.007
Beta
0.021
Annual Standard Deviation
0.003
Annual Variance
0
Information Ratio
-4.828
Tracking Error
0.075
Treynor Ratio
0.701
Total Fees
$355.00
Estimated Strategy Capacity
$220000.00
Lowest Capacity Asset
FXF TJSL8DEZVWBP
import datetime
import time
#from typeguard import typechecked
import tensorflow as tf
from tensorflow.keras.callbacks import Callback
from json import JSONEncoder
import numpy as np
import pandas as pd
import random
#@tf.keras.utils.register_keras_serializable(package="Addons")

# should i be using this class instead 
# https://stackoverflow.com/questions/62031302/how-to-apply-monte-carlo-dropout-in-tensorflow-for-an-lstm-if-batch-normalizat
class MCLSTM(tf.keras.layers.LSTM):
    def __init__(self, units, **kwargs):
        super(MCLSTM, self).__init__(units, **kwargs)
    def call(self, inputs, mask=None, training=None, initial_state=None):
        return super(MCLSTM, self).call(
            inputs,
            mask=mask,
            training=True,
            initial_state=initial_state,
        )

class TimeStopping(Callback):
    """Stop training when a specified amount of time has passed.
    Args:
        seconds: maximum amount of time before stopping.
            Defaults to 86400 (1 day).
        verbose: verbosity mode. Defaults to 0.
        from utils import TimeStopping
        time_stopping_callback = TimeStopping(seconds=5, verbose=1)
        time_stopping_callback 
    """

#    @typechecked
    def __init__(self, seconds: int = 86400, verbose: int = 0):
        super().__init__()

        self.seconds = seconds
        self.verbose = verbose
        self.stopped_epoch = None

    def on_train_begin(self, logs=None):
        self.stopping_time = time.time() + self.seconds

    def on_epoch_end(self, epoch, logs={}):
        if time.time() >= self.stopping_time:
            self.model.stop_training = True
            self.stopped_epoch = epoch

    def on_train_end(self, logs=None):
        if self.stopped_epoch is not None and self.verbose > 0:
            formatted_time = datetime.timedelta(seconds=self.seconds)
            msg = "Timed stopping at epoch {} after training for {}".format(
                self.stopped_epoch + 1, formatted_time
            )
            print(msg)

    def get_config(self):
        config = {
            "seconds": self.seconds,
            "verbose": self.verbose,
        }

        base_config = super().get_config()
        return {**base_config, **config}

# should i be inheriting the qc algorithm in this class...
# https://www.quantconnect.com/forum/discussion/12160/keltner-channel-universe/p1
# should i move the trading object over here as well so all over here..
class generateSimData(object):
    def __init__(self, dataset_train, y_predict, y_predict_pnorm, y_predict_volnorm, sim_strat):
        self.y_predict = y_predict
        self.y_predict_pnorm = y_predict_pnorm
        self.y_predict_volnorm = y_predict_volnorm
        self.sim_strat = sim_strat
        self.dataset_train = dataset_train
        
    @property
    def Data(self):
        dataset_yproba_reshape = self.y_predict
        dataset_yproba_reshape_pnorm = self.y_predict_pnorm
        dataset_yproba_reshape_volnorm = self.y_predict_volnorm
        len_yproba = len(dataset_yproba_reshape)
        yproba_max = []
        yproba_min = []
        yproba_open = []
        yproba_close = [] # note sure which of these should be static open or close so will do both and can choose
        yproba_vol = []
        # or time could be the value that is moving as well..
        for x in range(0, len_yproba, 10):
        #    x_old_plus1 = x + 1
            x_plus10 = x + 10
            if(x_plus10 != len_yproba):
                temp_yproba_ = dataset_yproba_reshape[x:x_plus10]
                temp_yproba_pnorm = dataset_yproba_reshape_pnorm[x:x_plus10]
                temp_yproba_volnorm = dataset_yproba_reshape_volnorm[x:x_plus10]

                temp_yproba = temp_yproba_pnorm

                temp_yproba_open = temp_yproba[0]
                temp_yproba_close = temp_yproba[-1]
                temp_yproba_max = np.amax(temp_yproba,axis=0,keepdims=False)
                temp_yproba_min = np.amin(temp_yproba,axis=0,keepdims=False)
#                temp_yproba_vol = 
        
                temp_yproba_max = np.repeat(temp_yproba_max, 10, axis=0)
                temp_yproba_min = np.repeat(temp_yproba_min, 10, axis=0)
                temp_yproba_open = np.repeat(temp_yproba_open, 10, axis=0)
                temp_yproba_close = np.repeat(temp_yproba_close, 10, axis=0)
        
                yproba_max.append(temp_yproba_max)
                yproba_min.append(temp_yproba_min)
                yproba_open.append(temp_yproba_open)
                yproba_close.append(temp_yproba_close)

        # do mean of these values
        # can improve logic better by understanding effect of close/open as currently just some randomness involved in this
        yproba_max_simmax = np.mean(temp_yproba_max,axis=0,keepdims=False)
        yproba_min_simmin = np.mean(temp_yproba_min,axis=0,keepdims=False)
        yproba_open_simopen = np.mean(temp_yproba_open,axis=0,keepdims=False)
        yproba_close_simclose = np.mean(temp_yproba_close,axis=0,keepdims=False)
        yproba_price_simprice = np.mean(temp_yproba_pnorm,axis=0,keepdims=False)
        yproba_price_simvolume = np.mean(temp_yproba_volnorm,axis=0,keepdims=False)
        
        dataset_simlive = pd.DataFrame()
        dataset_simlive['Time'] = yproba_close_simclose
        dataset_simlive['Open'] = yproba_open_simopen
        dataset_simlive['Close'] = yproba_price_simprice[0]
        dataset_simlive['High'] = yproba_max_simmax
        dataset_simlive['Low'] = yproba_min_simmin
#        dataset_simlive['Volume'] = 0
        dataset_simlive['Volume'] = yproba_price_simvolume[0]
        
        self.dataset_simlive = dataset_simlive
        
        dataset_live_agg_arr = []
#        for i in range(0,10000):
        for i in range(0,5000):
#        for i in range(0,100):
#        for i in range(0,10):
#        for i in range(0,1):
            sample_shape = self.dataset_train.shape[1] - 1
            p1 =random.randint(0,sample_shape)
            p2 =random.randint(0,sample_shape)
            p3 =random.randint(0,sample_shape)
        
            random_dim = random.randint(1,len(yproba_max)-1)
        
            dataset_live = pd.DataFrame()
            df_yproba_close = pd.DataFrame(yproba_close[random_dim])
            df_yproba_open = pd.DataFrame(yproba_open[random_dim])
            df_yproba_high = pd.DataFrame(yproba_max[random_dim])
            df_yproba_low = pd.DataFrame(yproba_min[random_dim])
            
            # changed this recently should i have stuck to previous implementation...
 #           if(self.sim_strat == 'average'):
 #               dataset_live['Time'] = (df_yproba_close[p3] + df_yproba_close[p2] + df_yproba_close[p1]) / 3
 #               dataset_live['Open'] = (df_yproba_open[p3] + df_yproba_open[p2] + df_yproba_open[p1]) / 3
 #               dataset_live['High'] = (df_yproba_high[p3] + df_yproba_high[p2] + df_yproba_high[p1]) / 3
 #               dataset_live['Low'] = (df_yproba_low[p3] + df_yproba_low[p2] + df_yproba_low[p1]) / 3
 #           else:
            dataset_live['Time'] = df_yproba_close[p1]
            dataset_live['Open'] = df_yproba_open[p1]
            dataset_live['High'] = df_yproba_high[p1]
            dataset_live['Low'] = df_yproba_low[p1]
    
            #df_yproba = pd.DataFrame(dataset_yproba.reshape(10000,13))
            # change this to a for loop save 30to50 lines
            price1 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
            price2 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
            price3 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
            price4 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
            price5 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
            price6 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
            price7 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
            price8 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
            price9 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
            price10 = random.randint(1,len(dataset_yproba_reshape_pnorm)-1)
        
            price = []
            volume = []
            if(self.sim_strat == 'average'):
#                self.Debug(str('using averages of simmed data'))
                price.append((dataset_yproba_reshape_pnorm[price1][0][p3] + dataset_yproba_reshape_pnorm[price1][0][p2] + dataset_yproba_reshape_pnorm[price1][0][p1])/3)
                price.append((dataset_yproba_reshape_pnorm[price2][0][p3] + dataset_yproba_reshape_pnorm[price2][0][p2] + dataset_yproba_reshape_pnorm[price2][0][p1])/3)
                price.append((dataset_yproba_reshape_pnorm[price3][0][p3] + dataset_yproba_reshape_pnorm[price3][0][p2] + dataset_yproba_reshape_pnorm[price3][0][p1])/3)
                price.append((dataset_yproba_reshape_pnorm[price4][0][p3] + dataset_yproba_reshape_pnorm[price4][0][p2] + dataset_yproba_reshape_pnorm[price4][0][p1])/3)
                price.append((dataset_yproba_reshape_pnorm[price5][0][p3] + dataset_yproba_reshape_pnorm[price5][0][p2] + dataset_yproba_reshape_pnorm[price5][0][p1])/3)
                price.append((dataset_yproba_reshape_pnorm[price6][0][p3] + dataset_yproba_reshape_pnorm[price6][0][p2] + dataset_yproba_reshape_pnorm[price6][0][p1])/3)
                price.append((dataset_yproba_reshape_pnorm[price7][0][p3] + dataset_yproba_reshape_pnorm[price7][0][p2] + dataset_yproba_reshape_pnorm[price7][0][p1])/3)
                price.append((dataset_yproba_reshape_pnorm[price8][0][p3] + dataset_yproba_reshape_pnorm[price8][0][p2] + dataset_yproba_reshape_pnorm[price8][0][p1])/3)
                price.append((dataset_yproba_reshape_pnorm[price9][0][p3] + dataset_yproba_reshape_pnorm[price9][0][p2] + dataset_yproba_reshape_pnorm[price9][0][p1])/3)
                price.append((dataset_yproba_reshape_pnorm[price10][0][p3] + dataset_yproba_reshape_pnorm[price10][0][p2] + dataset_yproba_reshape_pnorm[price10][0][p1])/3)
            else:
#                self.Debug(str('using singles of simmed data'))
                price.append(dataset_yproba_reshape_pnorm[price1][0][p1])
                price.append(dataset_yproba_reshape_pnorm[price2][0][p1])
                price.append(dataset_yproba_reshape_pnorm[price3][0][p1])
                price.append(dataset_yproba_reshape_pnorm[price4][0][p1])
                price.append(dataset_yproba_reshape_pnorm[price5][0][p1])
                price.append(dataset_yproba_reshape_pnorm[price6][0][p1])
                price.append(dataset_yproba_reshape_pnorm[price7][0][p1])
                price.append(dataset_yproba_reshape_pnorm[price8][0][p1])
                price.append(dataset_yproba_reshape_pnorm[price9][0][p1])
                price.append(dataset_yproba_reshape_pnorm[price10][0][p1])

            if(self.sim_strat == 'average'):
#                self.Debug(str('using averages of simmed data'))
                volume.append((dataset_yproba_reshape_volnorm[price1][0][p3] + dataset_yproba_reshape_volnorm[price1][0][p2] + dataset_yproba_reshape_volnorm[price1][0][p1])/3)
                volume.append((dataset_yproba_reshape_volnorm[price2][0][p3] + dataset_yproba_reshape_volnorm[price2][0][p2] + dataset_yproba_reshape_volnorm[price2][0][p1])/3)
                volume.append((dataset_yproba_reshape_volnorm[price3][0][p3] + dataset_yproba_reshape_volnorm[price3][0][p2] + dataset_yproba_reshape_volnorm[price3][0][p1])/3)
                volume.append((dataset_yproba_reshape_volnorm[price4][0][p3] + dataset_yproba_reshape_volnorm[price4][0][p2] + dataset_yproba_reshape_volnorm[price4][0][p1])/3)
                volume.append((dataset_yproba_reshape_volnorm[price5][0][p3] + dataset_yproba_reshape_volnorm[price5][0][p2] + dataset_yproba_reshape_volnorm[price5][0][p1])/3)
                volume.append((dataset_yproba_reshape_volnorm[price6][0][p3] + dataset_yproba_reshape_volnorm[price6][0][p2] + dataset_yproba_reshape_volnorm[price6][0][p1])/3)
                volume.append((dataset_yproba_reshape_volnorm[price7][0][p3] + dataset_yproba_reshape_volnorm[price7][0][p2] + dataset_yproba_reshape_volnorm[price7][0][p1])/3)
                volume.append((dataset_yproba_reshape_volnorm[price8][0][p3] + dataset_yproba_reshape_volnorm[price8][0][p2] + dataset_yproba_reshape_volnorm[price8][0][p1])/3)
                volume.append((dataset_yproba_reshape_volnorm[price9][0][p3] + dataset_yproba_reshape_volnorm[price9][0][p2] + dataset_yproba_reshape_volnorm[price9][0][p1])/3)
                volume.append((dataset_yproba_reshape_volnorm[price10][0][p3] + dataset_yproba_reshape_volnorm[price10][0][p2] + dataset_yproba_reshape_volnorm[price10][0][p1])/3)

            # not sure which is best way to do this
            # can either unnorm yproba before simming or not
            # currently unnorming after simming here, which means i include some of interaction effect in this simmed value..
            dataset_live['Close'] = price
            dataset_live['Volume'] = volume
            
            dataset_live_agg_arr.append(dataset_live)
            # end logic
            
        self.dataset_live_temp = pd.concat(dataset_live_agg_arr)
        return self.dataset_live_temp   

    @property
    def TradeData(self):
        return self.dataset_simlive



class SymbolDataPNorm(object):
    
    def __init__(self, algo, symbol, barPeriod, indicPeriod, windowSize, resolutionTime):
        self.Symbol = symbol
        #self.warmup = warmup
        self.BarPeriod = barPeriod
        self.Bars = RollingWindow[IBaseDataBar](windowSize)
        self.consolidator = TradeBarConsolidator(barPeriod) 
        self.consolidator.DataConsolidated += algo.OnDataConsolidated
        algo.SubscriptionManager.AddConsolidator(symbol, self.consolidator)
    
        self.Volume = None

        self.SMA_ = SimpleMovingAverage(algo.CreateIndicatorName(symbol, "SMA" + str(1), resolutionTime), 1)
        sma_lookback = 60
        self.HistoricalSMA_ = RollingWindow[float](sma_lookback)

        self.SMA = SimpleMovingAverage(algo.CreateIndicatorName(symbol, "SMA" + str(indicPeriod), resolutionTime), indicPeriod)
        sma_lookback = 60
        self.HistoricalSMA = RollingWindow[float](sma_lookback)

        self.STD = StandardDeviation(algo.CreateIndicatorName(symbol, "STD" + str(indicPeriod), resolutionTime), indicPeriod)
        std_lookback = 60
        self.HistoricalSTD = RollingWindow[float](std_lookback)
        
        self.price_minus_sma = IndicatorExtensions.Minus(self.SMA_, self.SMA)
        price_minus_sma = 60
        self.HistoricalPriceMinusSMA = RollingWindow[float](price_minus_sma)

        self.PNorm = IndicatorExtensions.Over(self.price_minus_sma, self.STD)
        pnorm_lookback = 60
        self.HistoricalPNorm = RollingWindow[float](pnorm_lookback)
        
        volsma_lookback = 60
#        epsilon = 0.000000001
#        self.VOLSMA = IndicatorExtensions.Plus(SimpleMovingAverage(algo.CreateIndicatorName(symbol, "VOLSMA" + str(indicPeriod), resolutionTime), 1),epsilon)
        self.VOLSMA_ = SimpleMovingAverage(algo.CreateIndicatorName(symbol, "VOLSMA" + str(1), resolutionTime), 1)
        self.HistoricalVOLSMA_ = RollingWindow[float](volsma_lookback)
#        self.VOLSMA = SimpleMovingAverage(algo.CreateIndicatorName(symbol, "VOLSMA" + str(indicPeriod), resolutionTime), 1)
        self.VOLSMA = SimpleMovingAverage(algo.CreateIndicatorName(symbol, "VOLSMA" + str(indicPeriod), resolutionTime), 3)
        self.HistoricalVOLSMA = RollingWindow[float](volsma_lookback)

        self.VOLSTD = StandardDeviation(algo.CreateIndicatorName(symbol, "VOLSTD" + str(indicPeriod), resolutionTime), 3)
        self.HistoricalVOLSTD = RollingWindow[float](volsma_lookback)

#        self.VOLMIN = IndicatorExtensions.MIN(self.VOLSMA_, indicPeriod)
        self.VOLMIN = IndicatorExtensions.MIN(self.VOLSMA, indicPeriod)
        self.HistoricalVOLMIN = RollingWindow[float](volsma_lookback)
#        self.VOLMAX = IndicatorExtensions.MAX(self.VOLSMA_, indicPeriod)
        self.VOLMAX = IndicatorExtensions.MAX(self.VOLSMA, indicPeriod)
        self.HistoricalVOLMAX = RollingWindow[float](volsma_lookback)
#        (x-min(x))/(max(x)-min(x))
#        self.volminusmin = IndicatorExtensions.Minus(self.VOLSMA_, self.VOLMIN)
        self.volminusmin = IndicatorExtensions.Minus(self.VOLSMA, self.VOLMIN)
        self.HistoricalVOLMINUSMIN = RollingWindow[float](volsma_lookback)
        self.maxvolminusminvol = IndicatorExtensions.Minus(self.VOLMAX, self.VOLMIN)
        self.HistoricalMAXVOLMINUSMINVOL = RollingWindow[float](volsma_lookback)
#        self.volnorm = IndicatorExtensions.Over(self.volminusmin, self.maxvolminusminvol)
#        self.HistoricalVOLNORM = RollingWindow[float](volsma_lookback)

        self.volminussma = IndicatorExtensions.Minus(self.VOLSMA_, self.VOLSMA)
        self.HistoricalVOLMINUSSMA = RollingWindow[float](volsma_lookback)
        self.volnorm = IndicatorExtensions.Over(self.volminussma, self.VOLSTD)
        self.HistoricalVOLNORM = RollingWindow[float](volsma_lookback)
        
#        self.RegisterIndicator(self.Symbol, self.HistoricalVOLNORM, self.consolidator)
#        RegisterIndicator # https://www.quantconnect.com/docs/algorithm-reference/indicators
       # .RegisterIndicator(security.Symbol, self.HistoricalVOLNORM, self.consolidator)
        #if(self.warmup == True):
        '''
        history = algo.History(self.Symbol,60,Resolution.Hour)
        if not history.empty:
            for time, row in history.loc[symbol].iterrows():
                self.PNorm.Update(time,row['close'])
                self.STD.Update(time,row['close'])
                self.SMA.Update(time,row['close'])
                self.SMA_.Update(time,row['close'])
                self.VOLSMA.Update(time,row['close'])
                self.VOLSTD.Update(time,row['close'])
                self.VOLMAX.Update(time,row['close'])
                self.VOLMAX.Update(time,row['close'])
                self.VOLMIN.Update(time,row['close'])
                self.volnorm.Update(time,row['close'])
        '''                        
    # Returns true if all the data in this instance is ready (indicators, rolling windows, ect...)
#    @property
    def IsReady(self):
#        return self.Bars.IsReady and self.PNorm.IsReady and self.HistoricalPNorm.IsReady and self.STD.IsReady and self.HistoricalSTD.IsReady and self.SMA.IsReady and self.HistoricalSMA.IsReady
        return self.Bars.IsReady and self.PNorm.IsReady and self.HistoricalPNorm.IsReady
        return self.Bars.IsReady and self.STD.IsReady and self.HistoricalSTD.IsReady
        return self.Bars.IsReady and self.SMA.IsReady and self.HistoricalSMA.IsReady
        return self.Bars.IsReady and self.SMA_.IsReady and self.HistoricalSMA_.IsReady
#        return self.Bars.IsReady and self.price_minus_sma.IsReady and self.HistoricalPriceMinusSMA.IsReady
        return self.Bars.IsReady and self.VOLSMA.IsReady and self.HistoricalVOLSMA.IsReady
#        return self.Bars.IsReady and self.VOLSMA_.IsReady and self.HistoricalVOLSMA_.IsReady
        return self.Bars.IsReady and self.VOLSTD.IsReady and self.HistoricalVOLSTD.IsReady
        return self.Bars.IsReady and self.VOLMAX.IsReady and self.HistoricalVOLMIN.IsReady
        return self.Bars.IsReady and self.VOLMIN.IsReady and self.HistoricalVOLMAX.IsReady
#        return self.Bars.IsReady and self.volminusmin.IsReady and self.HistoricalVOLMINUSMIN.IsReady
#        return self.Bars.IsReady and self.maxvolminusminvol.IsReady and self.HistoricalMAXVOLMINUSMINVOL.IsReady
        return self.Bars.IsReady and self.volnorm.IsReady and self.HistoricalVOLNORM.IsReady

    # Returns true if the most recent trade bar time matches the current time minus the bar's period, this
    # indicates that update was just called on this instance
    def WasJustUpdated(self, current):
        return self.Bars.Count > 0 and self.Bars[0].Time == current - self.BarPeriod
        
        


# could move this to library as use it enough
class EncodeNumpyArray(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return JSONEncoder.default(self, obj)
        
        
        
        
# build out a class for this if you want it...   
#def get_mem_usage(self):                                                                                                                               
#    process = psutil.Process(os.getpid())                                                                                                          
#    return process.memory_info()      
from QuantConnect.Orders import *
from QuantConnect.Algorithm import *
from QuantConnect.Algorithm.Framework import *
from QuantConnect.Algorithm.Framework.Alphas import *
from QuantConnect.Algorithm.Framework.Execution import *
from QuantConnect.Algorithm.Framework.Portfolio import *
from QuantConnect.Algorithm.Framework.Selection import *
from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Indicators")
AddReference("QuantConnect.Algorithm.Framework")
from QuantConnect.Orders.Fees import ConstantFeeModel
from QuantConnect.Data.UniverseSelection import *
from QuantConnect.Indicators import *
from Selection.FundamentalUniverseSelectionModel import FundamentalUniverseSelectionModel
from System import *
from QuantConnect import *
from QuantConnect.Data.Consolidators import *
from QuantConnect.Data.Market import *
from QuantConnect.Orders import OrderStatus
from QuantConnect.Algorithm import QCAlgorithm
from datetime import timedelta, datetime
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, GaussianDropout, BatchNormalization, Flatten, LSTM
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import serialize_keras_object
import tensorflow as tf
from tensorflow.keras.constraints import max_norm, min_max_norm
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
import numpy as np
from numpy import hstack
import pandas as pd
import json
import time
from typing import Optional, Union
import heapq
from itertools import count
import random
from System.Drawing import Color
from kerasutils import TimeStopping, SymbolDataPNorm, EncodeNumpyArray, generateSimData
from stocksenv import Actions, Positions, StocksEnv, TradingEnv
import cloudpickle
import stable_baselines
from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines.deepq.policies import MlpPolicy
from stable_baselines import DQN, A2C
from stable_baselines.bench import Monitor
import os
from tensorflow.python.client import device_lib
import gc
import timeit
import psutil

class MultipleSymbolConsolidationAlgorithm(QCAlgorithm):
    
    def Initialize(self):
        # since running out of money and haven't optimized trade strat, want to continue to see hwo this performs in limitless mode.        
#        self.SetCash(10000)
        self.SetCash(10000000)
        self.model = None
        self.model_rl = None
        self.modelIsTraining = False
        self.quick_mape = 'none'
        self.dataset_live_append = []
        self.dataset_simlive_append = []
        self.sim_strat = 'average' # combine in avg's of three to mask simulated data further e.g. avgs of 3 combined simmed variables
#        self.sim_strat = 'single' # just a lot of individual single variables simmed so more likely to hit same data simmed again so less fluctuations, 
        self.SetStartDate(2021, 8, 1)  # Set Start Date\
        self.DataPNorm = {}
        self.Indicators = {}
        self.HistoricalIndicators = {}
       #self.bars_store = 10080
        self.train_range = 50 # set how many bars/minutes to lookback
        self.dataset_train = []
        self.generator_length = 3
        
#        self.AddAlpha(MultipleSymbolConsolidationAlgorithm)
#        self.dataset_train = []
#        self.dataset_test = []
        # Plot Chart 
        stockPlot = Chart('Trade Plot')
        stockPlot.AddSeries(Series('Avg Predict Price', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Avg Actual Price', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('MAE Error', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Example1 Predicted', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Example1 Actual', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Example2 Predicted', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Example2 Actual', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Delta Negative', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Delta Positive', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Example D--', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Example D++', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Model Shape', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Example1 Predicted VOL', SeriesType.Line, 0))
        stockPlot.AddSeries(Series('Example1 Actual VOL', SeriesType.Line, 0))

        self.AddChart(stockPlot)
        self.easy_symbols = ['SPY','XLI','SHY','GLD','SLV','XLU','XLI','FXF','FXA','DBB','IGE','RINF','TIP','UUP','MSFT','AAPL','FB','AMZN','TSLA','NVDA','GOOG','AMD']
        self.key = str(len(self.easy_symbols)) # e.g. '13' or '21' or '103'

        self.symbols = [Symbol.Create(x, SecurityType.Equity, Market.USA) for x in self.easy_symbols]
        self.num_macros = len(self.symbols)
        self.SetUniverseSelection(ManualUniverseSelectionModel(self.symbols))
#        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.Resolution = Resolution.Hour
        self.UniverseSettings.SetDataNormalizationMode = DataNormalizationMode.Raw
        self.UniverseSettings.Leverage = 1 
        self.SetBrokerageModel(AlphaStreamsBrokerageModel())
        self.SetExecution(ImmediateExecutionModel())
        self.SetPortfolioConstruction(EqualWeightingPortfolioConstructionModel())
#        self.SetUniverseSelection(LiquidETFUniverse())
    
        self.Train(self.DateRules.Every(DayOfWeek.Monday, DayOfWeek.Tuesday, DayOfWeek.Wednesday, DayOfWeek.Thursday, DayOfWeek.Friday), self.TimeRules.At(3,0), self.CreateModel)
        self.energy = LiquidETFUniverse.Energy
        self.metals = LiquidETFUniverse.Metals
        self.technology = LiquidETFUniverse.Technology
        self.treasuries = LiquidETFUniverse.Treasuries
        self.volatility = LiquidETFUniverse.Volatility
        self.sp500Sectors = LiquidETFUniverse.SP500Sectors
        
        self.AddEquity("SPY")
        # every 6 months delete the model
#        self.Schedule.On(self.DateRules.On(2021, 8, 2), self.TimeRules.At(13, 0), self.DeleteModel)
#        self.Schedule.On(self.DateRules.MonthEnd("SPY"), self.TimeRules.AfterMarketOpen("SPY", 300), self.DeleteModel)

        self.Train(self.DateRules.Every(DayOfWeek.Monday, DayOfWeek.Tuesday, DayOfWeek.Wednesday, DayOfWeek.Thursday, DayOfWeek.Friday), self.TimeRules.AfterMarketOpen("SPY", 60), self.Predict)
        # moved this function to scratchpad.p.py as going to develop RL trade logic
#        self.Train(self.DateRules.Every(DayOfWeek.Friday), self.TimeRules.AfterMarketOpen("SPY", 180), self.TradeDeltaField)
        self.Train(self.DateRules.Every(DayOfWeek.Monday, DayOfWeek.Tuesday, DayOfWeek.Wednesday, DayOfWeek.Thursday, DayOfWeek.Friday), self.TimeRules.BeforeMarketClose("SPY", 60), self.TradeRL)
        # wait until RL pieces from stable baselines are fixed to restart this.
        self.Schedule.On(self.DateRules.Every(DayOfWeek.Friday), self.TimeRules.At(13, 0), self.WriteResearchData)
#        self.Schedule.On(self.DateRules.MonthEnd("SPY"), self.TimeRules.AfterMarketOpen("SPY", 300), self.DeleteModel)

        self.Schedule.On(self.DateRules.MonthStart("SPY"), self.TimeRules.AfterMarketOpen("SPY", 300), self.CacheRL)
#        self.Schedule.On(self.DateRules.MonthStart("SPY"), self.TimeRules.AfterMarketOpen("SPY", 300), self.CacheLSTM)

        self.delta_threshold_positive = []
        self.delta_threshold_positive_append = []
        self.delta_threshold_negative = []
        self.delta_threshold_negative_append = []
        self.predict = []
        self.epsilon = 0.000000001

#        self.AutomaticIndicatorWarmUp = True
        self.SetWarmUp(TimeSpan.FromDays(21))
#        self.SetWarmUp(TimeSpan.FromDays(100))
#        self.SetWarmUp(20, Resolution.Hour)
#        self.SetWarmUp(60, Resolution.Hour)
    
            
    def OnSecuritiesChanged(self, changes):
        # This is the period of bars we'll be creating
        BarPeriod = TimeSpan.FromMinutes(60)
##Doesn't work use above instead even if switching to hours#        BarPeriod = TimeSpan.FromHours(60)
        # This is the period of our sma indicators
#        SimpleMovingAveragePeriod = 20
        SimpleMovingAveragePeriod = 3
        # This is the number of consolidated bars we'll hold in symbol data for reference
        RollingWindowSize = 60   
        for security in changes.AddedSecurities:
            symbol = security.Symbol
            self.DataPNorm[symbol] = SymbolDataPNorm(self, symbol, BarPeriod, SimpleMovingAveragePeriod, RollingWindowSize, self.UniverseSettings.Resolution)
            
        for security in changes.RemovedSecurities:
            symbol = security.Symbol
            if symbol in self.DataPNorm:
                self.SubscriptionManager.RemoveConsolidator(symbol, self.DataPNorm[symbol].consolidator)
                symbolData = self.DataPNorm.pop(symbol, None)
                if security.Invested:
                    self.Liquidate(symbol, "Universe Removed Security")



    def OnDataConsolidated(self, sender, bar):
        if bar.Symbol in self.DataPNorm:
            self.DataPNorm[bar.Symbol].STD.Update(bar.EndTime, bar.Close)
            if self.DataPNorm[bar.Symbol].STD.IsReady:
                std = self.DataPNorm[bar.Symbol].STD.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalSTD.Add(std)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)

            self.DataPNorm[bar.Symbol].SMA.Update(bar.EndTime, bar.Close)
            if self.DataPNorm[bar.Symbol].SMA.IsReady:
                sma = self.DataPNorm[bar.Symbol].SMA.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalSMA.Add(sma)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)
            
            self.DataPNorm[bar.Symbol].SMA_.Update(bar.EndTime, bar.Close)
            if self.DataPNorm[bar.Symbol].SMA_.IsReady:
                SMA_ = self.DataPNorm[bar.Symbol].SMA_.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalSMA_.Add(SMA_)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)
            
            self.DataPNorm[bar.Symbol].PNorm.Update(bar.EndTime, bar.Close)
            if self.DataPNorm[bar.Symbol].PNorm.IsReady:
                pnorm = self.DataPNorm[bar.Symbol].PNorm.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalPNorm.Add(pnorm)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)

            self.DataPNorm[bar.Symbol].VOLSMA.Update(bar.EndTime, bar.Volume)
            if self.DataPNorm[bar.Symbol].VOLSMA.IsReady:
                VOLSMA = self.DataPNorm[bar.Symbol].VOLSMA.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalVOLSMA.Add(VOLSMA)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)
        
            self.DataPNorm[bar.Symbol].VOLMIN.Update(bar.EndTime, bar.Volume)
            if self.DataPNorm[bar.Symbol].VOLMIN.IsReady:
                VOLMIN = self.DataPNorm[bar.Symbol].VOLMIN.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalVOLMIN.Add(VOLMIN)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)

            self.DataPNorm[bar.Symbol].VOLMAX.Update(bar.EndTime, bar.Volume)
            if self.DataPNorm[bar.Symbol].VOLMAX.IsReady:
                VOLMAX = self.DataPNorm[bar.Symbol].VOLMAX.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalVOLMAX.Add(VOLMAX)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)

            self.DataPNorm[bar.Symbol].volnorm.Update(bar.EndTime, bar.Volume)
            if self.DataPNorm[bar.Symbol].volnorm.IsReady:
                volnorm = self.DataPNorm[bar.Symbol].volnorm.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalVOLNORM.Add(volnorm)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)

            self.DataPNorm[bar.Symbol].VOLSTD.Update(bar.EndTime, bar.Volume)
            if self.DataPNorm[bar.Symbol].VOLSTD.IsReady:
                volstd = self.DataPNorm[bar.Symbol].VOLSTD.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalVOLSTD.Add(volstd)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)

            self.DataPNorm[bar.Symbol].VOLSMA_.Update(bar.EndTime, bar.Volume)
            if self.DataPNorm[bar.Symbol].VOLSMA_.IsReady:
                VOLSMA_ = self.DataPNorm[bar.Symbol].VOLSMA_.Current.Value
                self.DataPNorm[bar.Symbol].HistoricalVOLSMA_.Add(VOLSMA_)
            self.DataPNorm[bar.Symbol].Bars.Add(bar)


    def OnDataPrep(self,indicator):
        # SMA train and test....
        _tr = []
        _tst = []
        new_value=[]
        for i in reversed(range(0,self.generator_length)):
             value  = np.float32(indicator[i])
             if len(new_value) == 0:
                new_value = value
            
             new_value = np.append(new_value, value)
        _tst = new_value.reshape((len(new_value)),1)
        _tst = _tst.reshape((len(_tst)),1)
        new_value=[]
        for i in reversed(range(1,self.train_range)):
             value  = np.float32(indicator[i])
             if len(new_value) == 0:
                new_value = value
            
             new_value = np.append(new_value, value)
        _tr = new_value.reshape((len(new_value)),1)
        return(_tst,_tr)

    def OnData(self,data):
        if self.IsWarmingUp: 
            self.Debug('returning because still warming up')
            return
    
        self.test = []
        self.train = []
        self.actual = []
        self.actual_sma = []
        self.actual_std = []
        self.actual_pnorm = []
        self.train_c = []
        self.train_std = []
        self.train_sma = []
        self.train_pnorm = []
        self.actual_symbols = []
        
        self.actual_volsma = []
        self.train_volsma = []
        self.train_volstd = []
        self.actual_volstd = []
        self.actual_volmin = []
        self.train_volmin = []
        self.actual_volmax = []
        self.train_volmax = []
        self.actual_volnorm = []
        self.train_volnorm = []

        train = []
        test = []
        actual = []
        # maybe these need to become dicts, if i'm going to not mix up symbols
        for symbol, symbolData in self.DataPNorm.items():
            # this check proves that this symbol was JUST updated prior to this OnData function being called
            if symbolData.IsReady() and symbolData.WasJustUpdated(self.Time):

                c_tst = []
                new_value=[]
                for i in reversed(range(0,self.generator_length)):
                     value  = np.float32(symbolData.Bars[0].Close)
                     if len(new_value) == 0:
                        new_value = value
                    
                     new_value = np.append(new_value, value)
                c_tst = new_value.reshape((len(new_value)),1)
                c_tst = c_tst.reshape((len(c_tst)),1)
                
                # i'm not actually iterating on close here or above, this needs to be fixed
                # only passing [0] as some issue where not rolling these yet so should come up with better strategy..
                c_tr = []
                new_value=[]
                for i in reversed(range(1,self.train_range)):
                     value  = np.float32(symbolData.Bars[0].Close)
                     if len(new_value) == 0:
                        new_value = value
                    
                     new_value = np.append(new_value, value)
                c_tr = new_value.reshape((len(new_value)),1)
                c_tr = c_tr.reshape((len(c_tr)),1)
                
                
                sma_tst, sma_tr = self.OnDataPrep(symbolData.HistoricalSMA)
                std_tst, std_tr = self.OnDataPrep(symbolData.HistoricalSTD)
                ma_tst, ma_tr = self.OnDataPrep(symbolData.HistoricalPNorm)
                volsma_tst, volsma_tr = self.OnDataPrep(symbolData.HistoricalVOLSMA)
                volstd_tst, volstd_tr = self.OnDataPrep(symbolData.HistoricalVOLSTD)
                volmin_tst, volmin_tr = self.OnDataPrep(symbolData.HistoricalVOLMIN)
                volmax_tst, volmax_tr = self.OnDataPrep(symbolData.HistoricalVOLMAX)
                volnorm_tst, volnorm_tr = self.OnDataPrep(symbolData.HistoricalVOLNORM)
                
                # this will cause dupes as self. never gets overwritten
                # but for some reason self is not storing in append otherwise...
#                train.hstack(c_tr)
#                train.append(c_tr)

#                self.train.append(c_tr)
#                self.train.append(sma_tr)
#                self.train.append(std_tr)
#                self.train.append(ma_tr)
                self.train.append(np.multiply(ma_tr,volnorm_tr))
                
#                self.test.append(c_tst)
#                self.test.append(sma_tst)
#                self.test.append(std_tst)
#                self.test.append(ma_tst)
                self.test.append(np.multiply(ma_tst, volnorm_tst))
                
                # actual price data                
                self.actual.append(c_tst)
                self.actual_std.append(std_tst)
                self.actual_sma.append(sma_tst)
                self.actual_pnorm.append(ma_tst)
                self.actual_volsma.append(volsma_tst)
                self.actual_volstd.append(volstd_tst)
                self.actual_volmin.append(volmin_tst)
                self.actual_volmax.append(volmax_tst)
                self.actual_volnorm.append(volnorm_tst)

                self.train_c.append(c_tr)
                self.train_std.append(std_tr)
                self.train_sma.append(sma_tr)
                self.train_pnorm.append(ma_tr)
                self.train_volsma.append(volsma_tr)
                self.train_volstd.append(volstd_tr)
                self.train_volmin.append(volmin_tr)
                self.train_volmax.append(volmax_tr)
                self.train_volnorm.append(volnorm_tr)

                self.actual_symbols.append(str(symbol))


        if(len(self.train)>0):
            self.dataset_train = hstack(self.train)
            self.dataset_test = hstack(self.test)
            self.dataset_actual = hstack(self.actual)
            self.dataset_actual_std = hstack(self.actual_std)
            self.dataset_actual_sma = hstack(self.actual_sma)
            self.dataset_actual_pnorm = hstack(self.actual_pnorm)
            self.dataset_actual_volsma = hstack(self.actual_volsma)
            self.dataset_actual_volstd = hstack(self.actual_volstd)
            self.dataset_actual_volmin = hstack(self.actual_volmin)
            self.dataset_actual_volmax = hstack(self.actual_volmax)
            self.dataset_actual_volnorm = hstack(self.actual_volnorm)

            self.dataset_train_c = hstack(self.train_c)
            self.dataset_train_std = hstack(self.train_std)
            self.dataset_train_sma = hstack(self.train_sma)
            self.dataset_train_pnorm = hstack(self.train_pnorm)
            self.dataset_train_volsma = hstack(self.train_volsma)
            self.dataset_train_volstd = hstack(self.train_volstd)
            self.dataset_train_volmin = hstack(self.train_volmin)
            self.dataset_train_volmax = hstack(self.train_volmax)
            self.dataset_train_volnorm = hstack(self.train_volnorm)

        if self.model != None:
            if self.delta_threshold_positive != []:
                if self.delta_threshold_negative != []:
                    ### WORKOUTLOGIC - need to change how this algebra to work with new volume interaction
                    # self.predict[0]
#                    self.unpnorm = ((self.predict[0] * self.dataset_actual_std[0]) + self.dataset_actual_sma[0])
                    self.pnorm = self.predict/(self.dataset_actual_volnorm + self.epsilon)
                    self.volnorm = self.predict/self.dataset_actual_pnorm
                    self.unpnorm = ((self.pnorm[0] * self.dataset_actual_std[0]) + self.dataset_actual_sma[0])
                    self.unvolnorm = ((self.volnorm[0] * self.dataset_actual_volstd[0]) + self.dataset_actual_volsma[0])
                    # depends on if you did minmax norm or z-score norm
#                    self.unvolnorm = (self.volnorm[0] * (self.dataset_actual_volmax[0] - self.dataset_actual_volmin[0])) + self.dataset_actual_volmin[0]
                    # update these more frequently, both now in the predict, and once a day.
                    temp_delta = ( self.dataset_actual[0]/self.unpnorm) - 1
#                    temp_delta = ( self.unpnorm / self.dataset_actual[0] ) - 1
#                    temp_delta = self.unpnorm - self.dataset_actual[0]
                    delta = np.where(np.isinf(temp_delta),0,temp_delta)
                    self.delta_threshold_positive = (self.delta_threshold_positive.clip(min=0) + delta.clip(min=0)) / 2
                    self.delta_threshold_negative = (self.delta_threshold_negative.clip(max=0) + delta.clip(max=0)) / 2
    def quickSave(self, save_name, save_data):
        self.ObjectStore.Save(save_name, json.dumps(save_data, cls=EncodeNumpyArray))

    def quickRead(self, read_name):
        if self.ObjectStore.ContainsKey(read_name):
            some_dataset = np.float32(json.loads(self.ObjectStore.Read(read_name)))
        else:
            some_dataset = []
        return some_dataset
        

    def WriteResearchData(self):
        # write out the self.dataset_train
        if self.model == None:
            return
        
        self.quickSave('dataset_train', self.dataset_train)
        self.quickSave('dataset_test', self.dataset_test)
        self.quickSave('dataset_actual', self.dataset_actual)
        self.quickSave('dataset_actual_std', self.dataset_actual_std)
        self.quickSave('dataset_actual_sma', self.dataset_actual_sma)
        self.quickSave('dataset_actual_pnorm', self.dataset_actual_pnorm)
        self.quickSave('dataset_actual_volsma', self.dataset_actual_volsma)
        self.quickSave('dataset_actual_volstd', self.dataset_actual_volstd)
        self.quickSave('dataset_actual_volmin', self.dataset_actual_volmin)
        self.quickSave('dataset_actual_volmax', self.dataset_actual_volmax)
        self.quickSave('dataset_actual_volnorm', self.dataset_actual_volnorm)
        self.quickSave('delta_threshold_negative', self.delta_threshold_negative)
        self.quickSave('delta_threshold_positive', self.delta_threshold_positive)
        self.quickSave('dataset_train_c', self.dataset_train_c)
        self.quickSave('dataset_train_std', self.dataset_train_std)
        self.quickSave('dataset_train_sma', self.dataset_train_sma)
        self.quickSave('dataset_train_pnorm', self.dataset_train_pnorm)
        self.quickSave('dataset_train_volsma', self.dataset_train_volsma)
        self.quickSave('dataset_train_volstd', self.dataset_train_volstd)
        self.quickSave('dataset_train_volmin', self.dataset_train_volmin)
        self.quickSave('dataset_train_volmax', self.dataset_train_volmax)
        self.quickSave('dataset_train_volnorm', self.dataset_train_volnorm)
        self.quickSave('dataset_predict', self.predict)
        self.quickSave('dataset_predict_pnorm', self.y_predict_pnorm)
        self.quickSave('dataset_predict_volnorm', self.y_predict_volnorm)
        self.quickSave('dataset_symbols', self.actual_symbols)
#        self.quickSave('dataset_live', self.dataset_live.values)
#        self.quickSave('dataset_simlive', self.dataset_simlive)
#        self.quickSave('dataset_simlive', self.dataset_live_temp.values)
        self.Debug('write out some datasets for RL research')
        
    def CacheRL(self):
        if self.model_rl == None:
            return
        
        self.model_rl_params = self.model_rl.get_parameters()
        self.model_rl_params_list = self.model_rl.get_parameter_list()
        # model/shared_fc0/w:0
        self.Debug('write out weights to persist between backtests')
        self.ObjectStore.Save('0a2c_persist', json.dumps(self.model_rl_params, cls=EncodeNumpyArray))

    def TradeRL(self):
        ### long way to go on the below
        # data is not very clean, but made progress on creating a simulated env based on predicted montecarlo values
        # still need live dataset to be fixed.
        self.Debug('got into traderl')
        if self.model == None:
            self.Debug('returning because model is none')
            return
        if self.delta_threshold_positive == []:
            self.Debug('returning because threshold positive is none')
            return
        if self.delta_threshold_negative == []:
            self.Debug('returning because threshold negative is none')
            return        
#        self.Debug('starting generation of sim data')
        dataset_live_agg_arr = []
        dataset_simlive = []
        for i in range(0,1):
            some_data = generateSimData(self.dataset_train, self.y_predict, self.y_predict_pnorm, self.y_predict_volnorm, self.sim_strat)
            dataset_live_agg_arr.append(some_data.Data)
            dataset_simlive.append(some_data.TradeData)
    #        self.Debug('finished generation of sim data')
    #        self.Debug('starting return of some data')
#            self.dataset_live_temp = some_data.Data
    #        self.Debug('starting return of trade data')
#            dataset_simlive = some_data.TradeData

#        dataset_live_agg_arr = generateSimData(self.y_predict, self.y_predict_pnorm, self.y_predict_volnorm, self.sim_strat)
        # can further this do to some kind of limited experience replay, but for now doing full experience replay of all sets seen
#        self.dataset_live_temp = pd.concat(dataset_live_agg_arr)
        self.dataset_live_append.append(pd.concat(dataset_live_agg_arr))
        self.dataset_simlive_append.append(pd.concat(dataset_simlive))
        self.dataset_live = pd.concat(self.dataset_live_append)
        self.dataset_simlive = pd.concat(self.dataset_simlive_append)
#        self.dataset_live['Volume'] = 0
        self.Debug(str(self.dataset_live.shape)) # shape of training env dataset
        self.Debug(len(self.dataset_live)) # shape of training env dataset
        

        window_size = 2
        start_index = window_size
        end_index = len(self.dataset_live)

        env = StocksEnv(self.dataset_live, window_size, (start_index, end_index))
#        env = StocksEnv(self.dataset_live, window_size, (start_index, end_index), self.actual_symbols,  self.Transactions, self.Portfolio, self.LimitOrder, self.MarketOrder)

        policy_kwargs = dict(net_arch=[64, 'lstm', dict(vf=[128, 128, 128], pi=[64, 64])])
        if(self.model_rl==None):
            if self.ObjectStore.ContainsKey("0a2c_persist"):
                self.Debug('found model between backtests')
                ac_weights = json.loads(self.ObjectStore.Read('0a2c_persist'))
                self.model_rl = A2C('MlpLstmPolicy', env, verbose=0, policy_kwargs=policy_kwargs)
                self.model_rl.load_parameters(ac_weights)
                #self.model_rl= A2C.load("ac")
                self.model_rl.set_env(DummyVecEnv([lambda: StocksEnv(self.dataset_live,window_size,frame_bound = (start_index, end_index))]))
                self.model_rl.learn(total_timesteps=10000)
                self.model_rl.save("ac") # why save even if not reloading....
            else:
                self.Debug('tried overwriting model')
                # figure out how to pickle the model and save it once
                # then can read that in between backtests
                #https://readthedocs.org/projects/stable-baselines/downloads/pdf/master/
                #https://readthedocs.org/projects/stable-baselines/downloads/pdf/master/#section*.119
                # do get paramaters, then load_parameters
                # will have to be a dictionary
                self.model_rl = A2C('MlpLstmPolicy', env, verbose=0, policy_kwargs=policy_kwargs)
                self.model_rl.learn(total_timesteps=10000)
                self.model_rl.save("ac") # why save even if not reloading....
#                self.model_rl_params = self.model_rl.get_parameters()
#                encoded_json_params = json.dumps(self.model_rl_params, cls=EncodeNumpyArray)
#                self.ObjectStore.Save('0a2c_persist', encoded_json_params)
            
        else:
            self.Debug('found model between weeks')
            self.model_rl= A2C.load("ac")
            self.model_rl.set_env(DummyVecEnv([lambda: StocksEnv(self.dataset_live,window_size,frame_bound = (start_index, end_index))]))
            self.model_rl.learn(total_timesteps=10000)
            self.model_rl.save("ac") # why save even if not reloading....
        

        insights = []
        for iter_start, iter_symbol in enumerate(self.easy_symbols):

            # tradeXLI
            start_here = iter_start
            stop_here = iter_start+1
            if(stop_here>=len(self.easy_symbols)):
                return
            
#                self.dataset_simlive = pd.concat([dataset_simlive.iloc[stop_here:stop_here],dataset_simlive.iloc[stop_here:stop_here],dataset_simlive.iloc[stop_here:stop_here],dataset_simlive.iloc[stop_here:stop_here],dataset_simlive.iloc[stop_here:stop_here],dataset_simlive.iloc[stop_here:stop_here],dataset_simlive.iloc[stop_here:stop_here],dataset_simlive.iloc[stop_here:stop_here]])
#            else:
            #self.dataset_simlive = pd.concat([dataset_simlive.iloc[start_here:stop_here],dataset_simlive.iloc[start_here:stop_here],dataset_simlive.iloc[start_here:stop_here],dataset_simlive.iloc[start_here:stop_here],dataset_simlive.iloc[start_here:stop_here],dataset_simlive.iloc[start_here:stop_here],dataset_simlive.iloc[start_here:stop_here],dataset_simlive.iloc[start_here:stop_here]])

            #self.Debug(str(self.dataset_test.shape))
            env_maker = lambda: StocksEnv(self.dataset_simlive, window_size, (start_index, end_index))
            env = env_maker()
            observation = env.reset()
            
            # do logic with cash available get 1% etc...
    
            # trade xli
            while True:
                observation = observation[np.newaxis, ...]
            
                # action = env.action_space.sample()
                action, _states = self.model_rl.predict(observation)
                observation, reward, done, info = env.step(action)
                
                env.render()
                if done:
                    # this needs to be moved to use more real time data...
                    # plot this.
                    #self.Debug(str(info))
                    self.total_reward = info['total_reward']
                    self.total_profit = info['total_profit']
                    self.position = info['position']
                    # if training = False 
                    # then lets actually buy..
                    if(action[0]==1):
                        # then buy
                        symbol = self.easy_symbols[start_here]
                        self.MarketOrder(symbol, 25) 
#                        insights += [Insight.Price(symbol, timedelta(1), InsightDirection.Up)]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Up) for tempsymbol in self.sp500Sectors.Long if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Up) for tempsymbol in self.energy.Long if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Up) for tempsymbol in self.metals.Long if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Up) for tempsymbol in self.technology.Long if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Up) for tempsymbol in self.treasuries.Long if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Up) for tempsymbol in self.volatility.Long if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights.append(Insight.Price(self.symbols[iter_start], timedelta(1), InsightDirection.Up))
                        #self.EmitInsights(Insight.Price(symbol, timedelta(1), InsightDirection.Up))
                        
                    elif(action[0]==0):
    #                    symbol = 'GLD' 
                        symbol = self.easy_symbols[start_here]
                        self.MarketOrder(symbol, -25)
#                        insights += [Insight.Price(symbol, timedelta(1), InsightDirection.Down)]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Down) for tempsymbol in self.sp500Sectors.Inverse if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Down) for tempsymbol in self.energy.Inverse if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Down) for tempsymbol in self.metals.Inverse if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Down) for tempsymbol in self.technology.Inverse if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Down) for tempsymbol in self.treasuries.Inverse if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights += [Insight.Price(tempsymbol, timedelta(1), InsightDirection.Down) for tempsymbol in self.volatility.Inverse if ((self.Securities.ContainsKey(tempsymbol)) & (self.Securities[tempsymbol].IsDelisted == False))]
#                        insights.append(Insight.Price(self.symbols[iter_start], timedelta(1), InsightDirection.Down))
                        #self.EmitInsights(Insight.Price(symbol, timedelta(1), InsightDirection.Down))
                        # then sell
                        
#                    self.Plot('RL Plot', 'total_reward', self.total_reward)
#                    self.Plot('RL Plot', 'total_profit', self.total_profit)
#                    self.Plot('RL Plot', 'position Actual', self.position)
    
                    break

        #self.EmitInsights(insights)

    def Predict(self):
        if self.model == None:
            return
        
        # since introducing this, then i will need to 
        # use dictionary of symbols and have clipping class strategy for this before mering deltas as size will change if symbol goes missing.
        features_set=self.dataset_train[0:]
        if(str(features_set.shape[1])!=self.key):
            if(self.ObjectStore.ContainsKey(str(features_set.shape[1]))):
                self.LoadCachedModel()
            else:
                return


        self.predict  = self.model.predict(np.array([self.dataset_test[0:self.generator_length]]))
        self.y_predict = np.stack([self.model.predict(np.array([self.dataset_test[0:self.generator_length]])) for x in range(10000)])
        self.y_predict_pnorm  = self.y_predict/(self.dataset_actual_volnorm[0] + self.epsilon)
        self.y_predict_volnorm  = self.y_predict/self.dataset_actual_pnorm[0]
        self.ObjectStore.Save('dataset_yproba', json.dumps(hstack(self.y_predict), cls=EncodeNumpyArray))
        self.predict_proba = self.y_predict.mean(axis=0)
        self.predict_std = self.y_predict.std(axis=0)
#        self.predict = y_proba # comment this out later during refactoring

#                predict = self.model.predict(self.dataset_train, verbose=0)
        self.mae = np.mean(np.abs(self.predict[0] - self.dataset_test[0:self.generator_length] ))
        self.avg_test = np.mean(self.dataset_test[0:self.generator_length])
        #self.avg_predict = np.mean(predict)
#                (pnorm * std) + sma
#                unpnorm = (predict * std_tst) + sma_tst
        self.pnorm = self.predict/(self.dataset_actual_volnorm + self.epsilon)
        self.volnorm = self.predict/self.dataset_actual_pnorm
        self.unpnorm = ((self.pnorm[0] * self.dataset_actual_std[0]) + self.dataset_actual_sma[0])
        self.unvolnorm = ((self.volnorm[0] * self.dataset_actual_volstd[0]) + self.dataset_actual_volsma[0])
#        self.unvolnorm = (self.volnorm[0] * (self.dataset_actual_volmax[0] - self.dataset_actual_volmin[0])) + self.dataset_actual_volmin[0]
#        self.unpnorm = ((predict[0] * self.dataset_actual_std[0]) + self.dataset_actual_sma[0])
#        self.unpnorm = ((predict * self.dataset_actual_std[0]) + self.dataset_actual_sma[0])
        self.avg_predict = np.mean(np.abs(self.unpnorm))
        self.diff_predict2 = self.dataset_actual[0] - self.unpnorm
        self.avg_actual=np.mean(np.abs(self.dataset_actual[0]))
        self.mae2 = np.mean(np.abs(self.diff_predict2))
        
        # pop off a few and check their perforamnce
        self.ex1_actual = self.unpnorm[0]
        self.ex1_predicted = self.dataset_actual[0][0]
        self.ex1_actualVOL = self.unvolnorm[0]
        self.ex1_predictedVOL = self.dataset_actual_volsma[0][0]
        
#        self.SPY_actual = self.unpnorm[80]
#        self.SPY_predicted = self.dataset_actual[0][80]
        self.Plot('Trade Plot', 'Example1 Predicted VOL', self.ex1_predictedVOL)
        self.Plot('Trade Plot', 'Example1 Actual VOL', self.ex1_actualVOL)
        self.Plot('Trade Plot', 'Avg Predict Price', self.avg_predict)
        self.Plot('Trade Plot', 'Avg Actual Price', self.avg_actual)
        self.Plot('Trade Plot', 'MAE Error', self.mae*100)
        self.Plot('Trade Plot', 'Example1 Predicted Price', self.ex1_predicted)
        self.Plot('Trade Plot', 'Example1 Actual Price', self.ex1_actual)
#        self.Plot('Trade Plot', 'SPY Predicted', self.SPY_predicted)
#        self.Plot('Trade Plot', 'SPY Actual', self.SPY_actual)

        # working from this one...
        # still need to grab the thresholds
        ### FIX LOGIC UNPNORM interaction VOLUME and price
        temp_delta = ( self.dataset_actual[0]/self.unpnorm) - 1
#        temp_delta = ( self.unpnorm / self.dataset_actual[0] ) - 1
#        temp_delta = self.unpnorm - self.dataset_actual[0]

#                    temp_delta = ( self.dataset_actual[0] / self.unpnorm ) - 1
#                    temp_delta = ( pred / act ) - 1
        delta = np.where(np.isinf(temp_delta),0,temp_delta)
        if(self.delta_threshold_positive == []):
            self.delta_threshold_positive = np.zeros(len(self.unpnorm))
        
        self.delta_threshold_positive = (self.delta_threshold_positive.clip(min=0) + delta.clip(min=0)) / 2
        # self.Plot()
        
        if(self.delta_threshold_negative == []):
            self.delta_threshold_negative = np.zeros(len(self.unpnorm))
            
        self.delta_threshold_negative = (self.delta_threshold_negative.clip(max=0) + delta.clip(max=0)) / 2
        # self.Plot()
        
        self.Plot('Trade Plot', 'Delta Positive', np.mean(np.abs(self.delta_threshold_positive)) * 100)
        self.Plot('Trade Plot', 'Delta Negative', np.mean(np.abs(self.delta_threshold_negative)) * 100)
        self.Plot('Trade Plot', 'AGQ D--', self.delta_threshold_negative[0] * 100)
        self.Plot('Trade Plot', 'AGQ D++', self.delta_threshold_positive[0] * 100)
        self.Plot('Trade Plot', 'Model Shape', self.key)
        
 
    
    def DeleteModel(self):
        self.model = None
        self.Debug('deleting model')
        keys = [str(j).split(',')[0][1:] for _, j in enumerate(self.ObjectStore.GetEnumerator())]
        for key in keys:
#            print(key)
            if(('dataset_' in key) or ('delta_' in key)):
                '''dont delete datasets'''
            elif('0a2c_persist' in key):
                self.Debug('persisting rl model between backtests, create delete rule later based on memory size or some way to randomly prune')
                ac_weights = json.loads(self.ObjectStore.Read('0a2c_persist'))
                sum_this = []
                for item_name, x in ac_weights.items():
                    x = np.float32(x)
                    temp_x = x.size * x.itemsize * 1e-6
                    sum_this.append(temp_x)
                
                self.ac_weights_mb = sum(sum_this)
                self.Debug('currently rl weights in mb is: ' + str(self.ac_weights_mb))
                self.Log('currently rl weights in mb is: ' + str(self.ac_weights_mb))
            else:
                self.ObjectStore.Delete(key)
                self.Debug('not persisting any models including RL for some time')
#            if(key == '0a2c_persist'):
#                self.Debug('persisting RL model')
    
    
    def CreateModel(self):
        # maybe dims are wrong on this, but basically need to be able to start stop here so can quickly ramp up boot up time and relaunch maybe every friday or every 2 weeks etc..
        
        if(self.dataset_train==[]):
            self.Debug('using cached dataset_train because it is empty still')
            self.dataset_train = self.quickRead('dataset_train')
            self.dataset_test = self.quickRead('dataset_test')
            self.dataset_actual = self.quickRead('dataset_actual')
            self.dataset_actual_std = self.quickRead('dataset_actual_std')
            self.dataset_actual_sma = self.quickRead('dataset_actual_sma')
            self.dataset_actual_pnorm = self.quickRead('dataset_actual_pnorm')
            self.dataset_actual_volsma = self.quickRead('dataset_actual_volsma')
            self.dataset_actual_volstd = self.quickRead('dataset_actual_volstd')
            self.dataset_actual_volmin = self.quickRead('dataset_actual_volmin')
            self.dataset_actual_volmax = self.quickRead('dataset_actual_volmax')
            self.dataset_actual_volnorm = self.quickRead('dataset_actual_volnorm')
            self.delta_threshold_negative = self.quickRead('delta_threshold_negative')
            self.delta_threshold_positive = self.quickRead('delta_threshold_positive')
            self.dataset_train_c = self.quickRead('dataset_train_c')
            self.dataset_train_std = self.quickRead('dataset_train_std')
            self.dataset_train_sma = self.quickRead('dataset_train_sma')
            self.dataset_train_pnorm = self.quickRead('dataset_train_pnorm')
            self.dataset_train_volsma = self.quickRead('dataset_train_volsma')
            self.dataset_train_volstd = self.quickRead('dataset_train_volstd')
            self.dataset_train_volmin = self.quickRead('dataset_train_volmin')
            self.dataset_train_volmax = self.quickRead('dataset_train_volmax')
            self.dataset_train_volnorm = self.quickRead('dataset_train_volnorm')
            self.predict = self.quickRead('dataset_predict')
            self.y_predict_pnorm = self.quickRead('dataset_predict_pnorm')
            self.y_predict_volnorm = self.quickRead('dataset_predict_volnorm')
            self.actual_symbols = self.quickRead('dataset_symbols')
        
        
        if(self.dataset_train==[]):
            self.Debug('cached datasets were empty, must have recently cleared.')
            return
        
        # bring this logic in
        #https://www.quantconnect.com/project/8925074
        if(self.quick_mape == 'none'):
            train_it = True
        elif((self.quick_mape < self.quick_mape_floor)):
            train_it = False
        elif((self.quick_mape_status == 'increasing')):
            train_it = True
        else:
            train_it = False

        self.modelIsTraining = True

        self.generator = TimeseriesGenerator(self.dataset_train[0:], self.dataset_train[0:], length=self.generator_length, batch_size=128)
        features_set=self.dataset_train[0:]
        self.key = str(features_set.shape[1])
        self.Debug(self.key)
        num_macros=self.num_macros
        key=self.key
        store=self.ObjectStore
        _activation = 'elu'
        
        # shouldn't have this in here, should allow flexibility, but just quick adding this for quick test to see how well model is generalizing 
#        if(str(features_set.shape[1])!=self.key):
#            return 

        if(self.model==None):
            inp = tf.keras.layers.Input(shape=(3, features_set.shape[1]), name='input') 
            x = tf.keras.layers.LSTM(256, name='lstm256', return_sequences=True, activation=_activation, kernel_constraint=tf.keras.constraints.max_norm(max_value=2, axis=0))(inp, training=True)
            x = tf.keras.layers.Dropout(0.30, name='dropout30')(x, training=True)
            x = tf.keras.layers.GaussianDropout(0.20, name='dropout20')(x, training=True)
            x = tf.keras.layers.LSTM(128, name='lstm128', dropout=0.3, recurrent_dropout=0.3, return_sequences=True, activation=_activation)(x, training=True)
            x = tf.keras.layers.Dropout(0.30, name='dropouttwo30')(x, training=True)
            x = tf.keras.layers.GaussianDropout(0.20, name='dropouttwo20')(x, training=True)
            x = tf.keras.layers.LSTM(16, name='lstm16', dropout=0.1, return_sequences=False, activation=_activation)(x, training=True)
            out = tf.keras.layers.Dense(features_set.shape[1], name='dense1')(x)
           
            model = tf.keras.models.Model(inputs=inp, outputs=out)
            self.model_uncompiled = model
            optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005, beta_1=0.9, 
                beta_2=0.999, epsilon=1e-07, 
                amsgrad=False,name='Adam')
    
    #        optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.0, nesterov=False, name='SGD')
            huber = tf.keras.losses.Huber()

    #        '''
            if store.ContainsKey(key):
                modelStr = store.Read(key)
                config = json.loads(modelStr)['config']
                model = tf.keras.models.model_from_json(modelStr)
    #        '''    
            model.compile(optimizer=optimizer, loss=huber)
    #        '''
            if store.ContainsKey(key):
                layer_dict = dict([(layer.name, layer) for layer in model.layers])
                for layer in model.layers:
                    layer_name = layer.name
                    print(layer.name)
                    layer.set_weights(layer_dict[layer_name].get_weights())
                    layer_weights = layer_dict[layer_name].get_weights()
                    len_layer_weights = (len(layer_dict[layer_name].get_weights()))
                    for iter_num,iter_val in enumerate(layer_weights):
                        if(store.ContainsKey(f'{key}_{layer_name}_{iter_num}')):
                            decodedWeights = json.loads(store.Read(f'{key}_{layer_name}_{iter_num}'))
                            layer_weights[iter_num] = np.float32(decodedWeights)
                        
                    layer.set_weights(layer_weights)
    #        '''
            length = 2
            batch_size = 1
            n_features = 1
            some_epochs = 10
            
            # see if this helps timeout issues, else do more here to make sure total functions returns
            time_stopping_callback = TimeStopping(seconds=60, verbose=1) # Red
        
            for epoch in range(some_epochs):
                model.fit(self.generator, epochs = epoch, callbacks=[time_stopping_callback])

            self.model = model
            modelStr = json.dumps(serialize_keras_object(model))
            store.Save(key, modelStr)
            
            # i don't think this isright here... i don' need to resave these each time do i?
            layer_dict = dict([(layer.name, layer) for layer in model.layers])
            for layer in model.layers:
                layer_name = layer.name
                layer.set_weights(layer_dict[layer_name].get_weights())
                layer_weights = layer_dict[layer_name].get_weights()
                len_layer_weights = len(layer_weights)
                for iter_num,iter_val in enumerate(layer_weights):
                    encoded_json_params = json.dumps(layer_weights[iter_num], cls=EncodeNumpyArray)
                    store.Save(f'{key}_{layer_name}_{iter_num}', encoded_json_params)

            self.model = model
            self.Debug(str('not currently persisting LSTM, but can persist this across back tests if needed...'))
            del model

        else:
                # skipping training need to come up with way to inject noise or reduce memory usage on this model...
            #April 2019...
            #mem: pmem(rss=13220478976, vms=106624483328, shared=281235456, text=126976, lib=0, data=14504652800, dirty=0)
            #Dies about here June 2019
            #mem: pmem(rss=13866258432, vms=107773743104, shared=272175104, text=126976, lib=0, data=15169880064, dirty=0)
            
            self.Debug(str('found LSTM self.model so no need to cache/load weights'))
            some_epochs = 10
            time_stopping_callback = TimeStopping(seconds=60, verbose=1) # Red
            for epoch in range(some_epochs):
                #gc.collect()
#                if(self.mem.rss<=6000478976):
                self.model.fit(self.generator, epochs = epoch, callbacks=[time_stopping_callback])
#                    self.mem = self.get_mem_usage()
#                    self.Debug('mem: {}'.format(self.mem))
        
        self.modelIsTraining = False
        #gc.collect()
        #tf.keras.backend.clear_session()
        
        return self.model

    def LoadCachedModel(self):
#        generator = TimeseriesGenerator(self.dataset_train[0:100], self.dataset_train[0:100], length=1, batch_size=128)
        self.generator = TimeseriesGenerator(self.dataset_train[0:], self.dataset_train[0:], length=self.generator_length, batch_size=1)
#        features_set=self.dataset_train[0:]
#        features_set=self.dataset_train 
#        labels=self.dataset_test
        features_set=self.dataset_train[0:]
        self.key = str(features_set.shape[1])
        num_macros=self.num_macros
        key=self.key
        store=self.ObjectStore
        _activation = 'elu'
        inp = tf.keras.layers.Input(shape=(3, features_set.shape[1]), name='input') 
#        inp = tf.keras.layers.Input(shape=(1, features_set.shape[1]), name='input') 
#        inp = tf.keras.layers.Input(shape=(1,features_set.shape[2]), name='input') 
        x = tf.keras.layers.LSTM(256, name='lstm256', return_sequences=True, activation=_activation, kernel_constraint=tf.keras.constraints.max_norm(max_value=2, axis=0))(inp, training=True)
        x = tf.keras.layers.Dropout(0.30, name='dropout30')(x, training=True)
        x = tf.keras.layers.GaussianDropout(0.20, name='dropout20')(x, training=True)
        x = tf.keras.layers.LSTM(128, name='lstm128', dropout=0.3, recurrent_dropout=0.3, return_sequences=True, activation=_activation)(x, training=True)
        x = tf.keras.layers.Dropout(0.30, name='dropouttwo30')(x, training=True)
        x = tf.keras.layers.GaussianDropout(0.20, name='dropouttwo20')(x, training=True)
        x = tf.keras.layers.LSTM(16, name='lstm16', dropout=0.1, return_sequences=False, activation=_activation)(x, training=True)
        out = tf.keras.layers.Dense(features_set.shape[1], name='dense1')(x)
        
        model = tf.keras.models.Model(inputs=inp, outputs=out)
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005, beta_1=0.9, 
            beta_2=0.999, epsilon=1e-07, 
            amsgrad=False,name='Adam')

#        optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.0, nesterov=False, name='SGD')
        huber = tf.keras.losses.Huber()
        if store.ContainsKey(key):
            modelStr = store.Read(key)
            config = json.loads(modelStr)['config']
            model = tf.keras.models.model_from_json(modelStr)
            
        model.compile(optimizer=optimizer, loss=huber)

        if store.ContainsKey(key):
            layer_dict = dict([(layer.name, layer) for layer in model.layers])
            for layer in model.layers:
                layer_name = layer.name
                print(layer.name)
                layer.set_weights(layer_dict[layer_name].get_weights())
                layer_weights = layer_dict[layer_name].get_weights()
                len_layer_weights = (len(layer_dict[layer_name].get_weights()))
                for iter_num,iter_val in enumerate(layer_weights):
                    if(store.ContainsKey(f'{key}_{layer_name}_{iter_num}')):
                        decodedWeights = json.loads(store.Read(f'{key}_{layer_name}_{iter_num}'))
                        layer_weights[iter_num] = np.float32(decodedWeights)
                    
                layer.set_weights(layer_weights)
                
        self.model = model
        return self.model



        
        
# https://github.com/AminHP/gym-anytrading/blob/master/gym_anytrading/envs/trading_env.py
import numpy as np

#from .trading_env import TradingEnv, Actions, Positions
#from tradingenv import TradingEnv, Actions, Positions

import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from enum import Enum
import matplotlib.pyplot as plt

#from stocksenv import StocksEnv

class Actions(Enum):
    Sell = 0
    Buy = 1


class Positions(Enum):
    Short = 0
    Long = 1

    def opposite(self):
        return Positions.Short if self == Positions.Long else Positions.Long


class TradingEnv(gym.Env):

    metadata = {'render.modes': ['human']}

    def __init__(self, df, window_size):
        assert df.ndim == 2

        self.seed()
        self.df = df
        self.window_size = window_size
        self.prices, self.signal_features = self._process_data()
        self.shape = (window_size, self.signal_features.shape[1])

        # spaces
        self.action_space = spaces.Discrete(len(Actions))
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)

        # episode
        self._start_tick = self.window_size
        self._end_tick = len(self.prices) - 1
        self._done = None
        self._current_tick = None
        self._last_trade_tick = None
        self._position = None
        self._position_history = None
        self._total_reward = None
        self._total_profit = None
        self._first_rendering = None
        self.history = None


    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]


    def reset(self):
        self._done = False
        self._current_tick = self._start_tick
        self._last_trade_tick = self._current_tick - 1
        self._position = Positions.Short
        self._position_history = (self.window_size * [None]) + [self._position]
        self._total_reward = 0.
        self._total_profit = 1.  # unit
        self._first_rendering = True
        self.history = {}
        return self._get_observation()


    def step(self, action):
        self._done = False
        self._current_tick += 1

        if self._current_tick == self._end_tick:
            self._done = True

        step_reward = self._calculate_reward(action)
        self._total_reward += step_reward

        self._update_profit(action)

        trade = False
        if ((action == Actions.Buy.value and self._position == Positions.Short) or
            (action == Actions.Sell.value and self._position == Positions.Long)):
            trade = True

        if trade:
            self._position = self._position.opposite()
            self._last_trade_tick = self._current_tick

        self._position_history.append(self._position)
        observation = self._get_observation()
        info = dict(
            total_reward = self._total_reward,
            total_profit = self._total_profit,
            position = self._position.value
        )
        self._update_history(info)

        return observation, step_reward, self._done, info


    def _get_observation(self):
        return self.signal_features[(self._current_tick-self.window_size):self._current_tick]


    def _update_history(self, info):
        if not self.history:
            self.history = {key: [] for key in info.keys()}

        for key, value in info.items():
            self.history[key].append(value)


    def render(self, mode='human'):

        def _plot_position(position, tick):
            color = None
            if position == Positions.Short:
                color = 'red'
            elif position == Positions.Long:
                color = 'green'
            if color:
                plt.scatter(tick, self.prices[tick], color=color)

        if self._first_rendering:
            self._first_rendering = False
            plt.cla()
            plt.plot(self.prices)
            start_position = self._position_history[self._start_tick]
            _plot_position(start_position, self._start_tick)

        _plot_position(self._position, self._current_tick)

        plt.suptitle(
            "Total Reward: %.6f" % self._total_reward + ' ~ ' +
            "Total Profit: %.6f" % self._total_profit
        )

        plt.pause(0.01)


    def render_all(self, mode='human'):
        window_ticks = np.arange(len(self._position_history))
        plt.plot(self.prices)

        short_ticks = []
        long_ticks = []
        for i, tick in enumerate(window_ticks):
            if self._position_history[i] == Positions.Short:
                short_ticks.append(tick)
            elif self._position_history[i] == Positions.Long:
                long_ticks.append(tick)

        plt.plot(short_ticks, self.prices[short_ticks], 'ro')
        plt.plot(long_ticks, self.prices[long_ticks], 'go')

        plt.suptitle(
            "Total Reward: %.6f" % self._total_reward + ' ~ ' +
            "Total Profit: %.6f" % self._total_profit
        )
        
        
    def close(self):
        plt.close()


    def save_rendering(self, filepath):
        plt.savefig(filepath)


    def pause_rendering(self):
        plt.show()


    def _process_data(self):
        raise NotImplementedError


    def _calculate_reward(self, action):
        raise NotImplementedError


    def _update_profit(self, action):
        raise NotImplementedError


    def max_possible_profit(self):  # trade fees are ignored
        raise NotImplementedError

class StocksEnv(TradingEnv):

    def __init__(self, df, window_size, frame_bound):
        assert len(frame_bound) == 2

        self.frame_bound = frame_bound
        super().__init__(df, window_size)

        self.trade_fee_bid_percent = 0.01  # unit
        self.trade_fee_ask_percent = 0.005  # unit


    def _process_data(self):
        prices = self.df.loc[:, 'Close'].to_numpy()

        prices[self.frame_bound[0] - self.window_size]  # validate index (TODO: Improve validation)
        prices = prices[self.frame_bound[0]-self.window_size:self.frame_bound[1]]

        diff = np.insert(np.diff(prices), 0, 0)
        signal_features = np.column_stack((prices, diff))

        return prices, signal_features


    def _calculate_reward(self, action):
        step_reward = 0

        trade = False
        if ((action == Actions.Buy.value and self._position == Positions.Short) or
            (action == Actions.Sell.value and self._position == Positions.Long)):
            trade = True

        if trade:
            current_price = self.prices[self._current_tick]
            last_trade_price = self.prices[self._last_trade_tick]
            price_diff = current_price - last_trade_price

            if self._position == Positions.Long:
                step_reward += price_diff

        return step_reward


    def _update_profit(self, action):
        trade = False
        if ((action == Actions.Buy.value and self._position == Positions.Short) or
            (action == Actions.Sell.value and self._position == Positions.Long)):
            trade = True

        if trade or self._done:
            current_price = self.prices[self._current_tick]
            last_trade_price = self.prices[self._last_trade_tick]

            if self._position == Positions.Long:
                shares = (self._total_profit * (1 - self.trade_fee_ask_percent)) / last_trade_price
                self._total_profit = (shares * (1 - self.trade_fee_bid_percent)) * current_price


    def max_possible_profit(self):
        current_tick = self._start_tick
        last_trade_tick = current_tick - 1
        profit = 1.

        while current_tick <= self._end_tick:
            position = None
            if self.prices[current_tick] < self.prices[current_tick - 1]:
                while (current_tick <= self._end_tick and
                       self.prices[current_tick] < self.prices[current_tick - 1]):
                    current_tick += 1
                position = Positions.Short
            else:
                while (current_tick <= self._end_tick and
                       self.prices[current_tick] >= self.prices[current_tick - 1]):
                    current_tick += 1
                position = Positions.Long

            if position == Positions.Long:
                current_price = self.prices[current_tick - 1]
                last_trade_price = self.prices[last_trade_tick]
                shares = profit / last_trade_price
                profit = shares * current_price
            last_trade_tick = current_tick - 1

        return profit