Backtest

Overall Statistics
Total Trades 343 Average Win 0.30% Average Loss -0.17% Compounding Annual Return 9.961% Drawdown 5.100% Expectancy 0.363 Net Profit 10.843% Sharpe Ratio 1.04 Probabilistic Sharpe Ratio 50.278% Loss Rate 52% Win Rate 48% Profit-Loss Ratio 1.82 Alpha -0 Beta 0.288 Annual Standard Deviation 0.068 Annual Variance 0.005 Information Ratio -1.827 Tracking Error 0.096 Treynor Ratio 0.244 Total Fees $343.46 Estimated Strategy Capacity $1100000000.00 Lowest Capacity Asset SPY R735QTJ8XC9X

from sklearn.model_selection import train_test_split
from optimize import *
import numpy as np
import pandas as pd

class EnsembleModel():

    def __init__(self, algo):
        self.algo = algo
        self.models = None
        self.optimizer = None

    # Build a set of models
    def Train(self, X, y):
        self.optimizer = ModelOptimizer(self.algo)
        self.models = self.optimizer.BuildModels(X, y)
        

    # Return prediction as a +/- Z-Score of the prediction from the the threshold
    def Predict(self, X):
        
        total_weight = sum(model.test_score for model in self.models)
        
        z = 0
        for model in self.models:
            result = model.Predict(X)
            # Weight the result based on the models test score
            z = z + result * (model.test_score/total_weight)
            
        return z

import talib
from talib import func
import numpy as np
import pandas as pd

#
# This helper class allows us to easily add features to our dataset
# add_features(df, ['EMA_7'])
#
# Other examples
# EMA_7_28 gives you the ratio of these two EMA
# EMA_7_28_diff gives you the difference of the current ratio and the last ratio for those two EMA
#

def ichimoku_a(high, low, n1 = 9, n2 = 26, n3 = 52):
    conv = 0.5 * (high.rolling(n1, min_periods=0).max()
                  + low.rolling(n1, min_periods=0).min())
    base = 0.5 * (high.rolling(n2, min_periods=0).max()
                  + low.rolling(n2, min_periods=0).min())
    spana = 0.5 * (conv + base)
    return pd.Series(spana)

def ichimoku_b(high, low, n1 = 9, n2 = 26, n3 = 52):
    spanb = 0.5 * (high.rolling(n3, min_periods=0).max()
       + low.rolling(n3, min_periods=0).min())
    return pd.Series(spanb)

def obv_rolling_percent(close, volume, n, fillna=False):
    df = pd.DataFrame([close, volume]).transpose()
    df['OBV'] = 0
    c1 = close < close.shift(1)
    c2 = close > close.shift(1)
    if c1.any():
        df.loc[c1, 'OBV'] = - volume
    if c2.any():
        df.loc[c2, 'OBV'] = volume
    obv = df['OBV'].rolling(n).sum() / df[volume.name].rolling(n).sum()
    if fillna:
        obv = obv.replace([np.inf, -np.inf], np.nan).fillna(0)
    return pd.Series(obv, name='obv')

def kst(close, r1 = 3, r2 = 7, r3 = 14, r4 = 28, n1 = 10, n2 = 3, n3 = 7, n4 = 14):
    rocma1 = ((close - close.shift(r1))
            / close.shift(r1)).rolling(n1, min_periods=0).mean()
    rocma2 = ((close - close.shift(r2))
              / close.shift(r2)).rolling(n2, min_periods=0).mean()
    rocma3 = ((close - close.shift(r3))
              / close.shift(r3)).rolling(n3, min_periods=0).mean()
    rocma4 = ((close - close.shift(r4))
              / close.shift(r4)).rolling(n4, min_periods=0).mean()
    return 100 * (rocma1 + 2 * rocma2 + 3 * rocma3 + 4 * rocma4)
    
def kst_sig(close, r1 = 3, r2 = 7, r3 = 14, r4 = 28, n1 = 10, n2 = 3, n3 = 7, n4 = 14, nsig = 6):
    kst_val = kst(clse, r1, r2, r3, r4, n1, n2, n3, n4)
    kstsig = kst_val.rolling(nsig, minperiods=0).mean()
    return kstsig

def check_feature_cache(cache, name, f, args):
    if not name in cache:
        cache[name] = f(*args)
    return cache[name]

def add_features(df, features, close="close", high="high", low="low", volume="volume"):

    fmap = {'MFI':'money_flow_index','ICHA':'ichimoku_a','ICHB':'ichimoku_b','BOLLH':'bollinger_hband', \
            'BOLLL':'bollinger_lband','KCC':'keltner_channel_central','NVI':'negative_volume_index', \
            'OBVP' : 'obv_rolling_percent', 'KST' : 'kst', 'KST_SIG' : 'kst_sig'}

    cache = {}


    for feature in features:
        # parse string.  Style is func_period1_period2:COL=ColumnName1 (column name optional)

        col = None
        col_idx = feature.find(':')
        if col_idx > -1:
            col = feature[col_idx+1:]
            feature = feature[0:col_idx]

        p = feature.split('_')
        if not col is None:
            feature = feature + "_" + col

        fname = p[0].upper()

        # If DM, DI, or HT function will have underscore in name, need to add suffix back and shift params back
        if len(p) > 1 and (p[1] in ['DM','DI'] or p[0] in ['HT']):
            fname += '_' + p[1]
            for i in range(2,len(p)):
                p[i-1] = p[i]
            del p[-1]

        p1 = p[1].upper() if len(p) > 1 else None
        p2 = p[2].upper() if len(p) > 2 else None
        p3 = p[3].upper() if len(p) > 3 else None

        if fname in fmap:
            if fmap[fname].find('.') > -1:
                s = fmap[fname].split('.')
                cls = s[0]
                f = getattr(globals()[cls], s[1])
            else:
                f = globals()[fmap[fname].lower()]
        elif fname in talib.__TA_FUNCTION_NAMES__:
            f = getattr(func, fname)
        elif fname.lower() in globals():
            f = globals()[fname.lower()]
        else:
            raise Exception(f'Could not find function. fname: {fname} feature: {feature}')


        if fname.endswith('MA') or fname == 'T3':
            args = [df[close], int(p1)]
            if p2 is None:
                df[feature] = (check_feature_cache(cache, feature, f, args ))
            elif p2 == 'DIFF':
                ma1 = check_feature_cache(cache, fname + '_' + p1, f, args)
                df[feature] = ma1.pct_change()
                p2 = None # So it doesn't get diffed at end of method
            else:
                ma1 = check_feature_cache(cache, fname + '_' + p1, f, args)
                args = [df[close], int(p2)]
                ma2 = check_feature_cache(cache, fname + '_' + p2, f, args)
                df[feature] = (ma1 - ma2) / ma1
                df[feature].replace([np.inf, -np.inf], 0, inplace=True)


        elif fname in ['CMO','TRIX','ROCP','ROCR100','ROC','BOLLH','BOLLL', 'RSI','MOM']:
            df[feature] = f(df[close], int(p1))


        elif fname in ['MINUS_DM', 'PLUS_DM']:
            df[feature] = f(df[high], df[low], int(p1))

        elif fname in ['ADX','ATR','KCC','MINUS_DI','PLUS_DI','WILLR']:
            try:
                if df[close].isnull().all():
                    df[feature] = np.nan
                elif p2 is None or p2.upper() == 'DIFF':
                    df[feature] = f(df[high], df[low], df[close], int(p1))
                    # Change all to percent, except WILLR
                    if not fname in ['WILLR','ADX']:
                        df[feature] = df[feature]/df[close]
                else:
                    #params = {'close' : df[close], 'high' : df[high],'low' : df[low], 'timeperiod' : int(p1)}
                    args = [df[high], df[low], df[close], int(p1)]
                    f1 = check_feature_cache(cache, fname + '_' + p1, f, args)
                    args = [df[high], df[low], df[close], int(p2)]
                    f2 = check_feature_cache(cache, fname + '_' + p2, f, args)
                    df[feature] = (f1-f2)/f1
                    df[feature].replace([np.inf, -np.inf], 0, inplace=True)
            except Exception as ex:
                if str(ex) == 'inputs are all NaN':
                    df[feature] = np.nan
                else:
                    raise


        elif fname in ['MFI']:
            df[feature] = f(df[high], df[low], df[close], df[volume], int(p1))

        elif fname in ['MACD', 'PPO', 'TSI']:
             df[feature] = f(df[close], int(p1), int(p2))

        elif fname in ['ICHA', 'ICHB', 'AO']:
            # Normalize
            df[feature] = f(df[high], df[low], int(p1), int(p2))
#             if fname.startswith('ICH'):
#                 df[feature] = df[feature]/ df[close]

        elif fname in ['NVI']:
            df[feature] = f(df[close], df[volume])

        elif fname in ['SAR', 'SAREXT']:
            # Normalize
            df[feature] = f(df[high], df[low]) #/ df[close]

        elif fname == 'KST':
            df[feature] = f(df[close], r1=10, r2=15, r3=20, r4=30, n1=10, n2=10, n3=10, n4=15)

        elif fname == 'KST_SIG':
            df[feature] = f(df[close], r1=10, r2=15, r3=20, r4=30, n1=10, n2=10, n3=10, n4=15, nsig=9)

        elif fname == 'AROON':
            df[feature] = f(df[high], df[low], int(p1))[int(p2)-1]

        elif fname == 'ULTOSC':
            df[feature] = f(df[high], df[low], df[close])

        elif fname in ['OBVP']:
            df[feature] = f(df[close],df[volume],int(p1))

        elif fname in ['OBTP']:
            df[feature] = f(df[close],df["Trades"],int(p1))

        elif fname in ['HT_DCPERIOD','HT_DCPHASE','HT_PHASOR','HT_SINE', 'HT_TRENDMODE','STOCHRSI' ]:
            df[feature] = f(df[close])

        else:
            raise Exception('Feature not found: ' + feature + ' ' + fname )

        # Difference those that are 1 period diffs
        if p3 == 'DIFF' or p2 == 'DIFF':
            df[feature] = df[feature].diff()


    return df
    
def get_exhaustive_ma_features_list():

    features = []
    periods = [7, 14, 28, 42, 70]

    mas = ["SMA", "DEMA", "KAMA", "EMA", "TEMA", "T3", "TRIMA", "WMA"]
    for p in periods:
        for ma in mas:
            features.append(ma + "_" + str(p))

    for p1 in periods:
        for p2 in periods:
            if p2 <= p1:
                continue

            for ma in mas:
                ma1 = ma + str(p1)
                ma2 = ma + str(p2)
                features.append(ma + "_" + str(p1) + "_" + str(p2))
                features.append(features[-1] + "_diff")

    return features

def get_exhaustive_features_list(diffs=True):

    features = get_exhaustive_ma_features_list()

    periods = [3,7,14,20,30]
    band_periods = [7,14,21,28]
    hi_lo_periods = [[4,9],[7,14],[10,21],[12,26]]

    pfs = ['CMO','MINUS_DI','MINUS_DM','PLUS_DI','PLUS_DM','ROCP','ATR','ADX',
                       'TRIX','WILLR']
    # Left out: MOM, VIN, VIP, ROC,ROCR,OBVM,EOM,DPI,RSI,STOCH,STOCH_SIG,FI
    for p in periods:

        features.append('AROON_' + str(p) + '_1')
        features.append('AROON_' + str(p) + '_2')
        for pf in pfs:
            if pf == 'ADX' and p == 3:
                continue
            features.append(pf + '_' + str(p))
            if diffs:
                features.append(features[-1] + "_diff")
    if diffs:
        for p1 in periods:
            for p2 in periods:
                if p2 <= p1:
                    continue

                for pf in pfs:
                    pf1 = pf + str(p1)
                    pf2 = pf + str(p2)
                    features.append(pf + "_" + str(p1) + "_" + str(p2))
                    features.append(features[-1] + "_diff")


    hlfs = ['ICHA','ICHB']
    # Exclude PPO, same as our MA setups
    # hi lo Indicators
    for hl in hi_lo_periods:
        for hlf in hlfs:
            features.append(hlf + '_' + str(hl[0]) + '_' + str(hl[1]))

    # other indicators
    otfs = ['SAR','SAREXT','HT_DCPERIOD','ULTOSC','OBVP_10','OBVP_20','OBVP_40','OBVP_80']
    for otf in otfs:
        features.append(otf)

    return features

from AlgorithmImports import *
from feature import add_features
from ensemble import EnsembleModel
from datetime import timedelta
import pandas as pd


class TransdimensionalDynamicThrustAssembly(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2020, 11, 1)  # Set Start Date
        self.SetEndDate(2021, 12, 1)  # Set Start Date
        self.SetCash(100000)  # Set Strategy Cash
        self.SetBrokerageModel(BrokerageName.InteractiveBrokersBrokerage)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.SetWarmup(timedelta(days=15))
        self.SetBenchmark('SPY')

        # Manual selection of ETFs
        # self.symbols = ["SPY","DIA","IJR","MDY","IWM","QQQ","IYE","EEM","IYW","EFA","SLV","IEF","IYM","IYF","IYH","IYR","IYC","IBB","FEZ","USO","TLT"]
        self.symbols = ["SPY", "QQQ"]

        for symbol in self.symbols:
            self.AddEquity(symbol, Resolution.Minute)

        # Manually curated universe
        self.SetUniverseSelection(ManualUniverseSelectionModel())

        # Select the demonstration alpha model
        alpha = WeightedClassifierAlpha(self, self.symbols)
        self.SetAlpha(alpha)

        # Equally weigh securities in portfolio, based on insights
        self.SetPortfolioConstruction(InsightWeightingPortfolioConstructionModel())

        # Set Immediate Execution Model
        self.SetExecution(ImmediateExecutionModel())

        # Set Null Risk Management Model
        self.SetRiskManagement(NullRiskManagementModel())


class WeightedClassifierAlpha(AlphaModel):

    def __init__(self, algo, symbols):
        self.algo = algo
        self.symbols = symbols
        self.models = None
        self.month_count = 0

        # Train models monthly
        algo.Schedule.On(algo.DateRules.MonthStart(),
                         algo.TimeRules.At(1, 0),
                         self.Train)

        # Rebalance weekly
        algo.Schedule.On(algo.DateRules.WeekStart(),
                         algo.TimeRules.At(2, 0),
                         self.Rebalance)

        self.days_count = 5

    def Update(self, algo, data):
        return []

    # Predict values and create insights
    def Rebalance(self):

        insights = []

        if self.algo.IsWarmingUp:
            return insights

        if self.models is None:
            self.BuildModels()

        try:
            # Load History and Features
            end = self.algo.Time + timedelta(days=1)
            start = end - timedelta(days=450)
            history = self.GetHistoryAndFeatures(start, end)

            # Drop our Y columns before training
            idx = pd.IndexSlice
            X = history.drop(columns=['returns_1d', 'returns_1w', 'close','high','low','open','volume'])
            columns = X.columns[X.isna().any()].tolist()
            X = X.dropna()

            # Get last row of data
            time = X.index[-1][0]
            X = X.loc[idx[time, :], :]

            # Predict each symbols direction
            insights = []

            for symbol in self.symbols:
                symbol = SymbolCache.GetSymbol(symbol)
                idx = pd.IndexSlice
                X_symbol = X.loc[idx[:, [symbol.ID.ToString()]], :]

                model = self.models[symbol]
                result = model.Predict(X_symbol)
                band = 0
                direction = InsightDirection.Flat

                if result >= 0:
                    insights.append(
                        Insight.Price(symbol, timedelta(days=4), InsightDirection.Up, 1, None, None, result))

                elif result < 0:
                    # Notice the '-' before result.  Weight must be positive for this to work correctly
                    insights.append(
                        Insight.Price(symbol, timedelta(days=4), InsightDirection.Down, 1, None, None, -result))

            self.algo.EmitInsights(insights)

        except Exception as e:
            self.Log('Unexpected error:' + str(e))
            raise

    def Train(self):

        # To speed up backtest we just trained every 3 months
        self.month_count += 1
        if self.month_count == 3:
            self.month_count = 0

            self.BuildModels()

        # self.BuildModels()

    def BuildModels(self):

        try:
            self.models = {}
            # Load History and Features
            total_days = 365 * 4
            start = self.algo.Time - timedelta(days=total_days)
            end = self.algo.Time  # + timedelta(days=1)
            history = self.GetHistoryAndFeatures(start, end)

            # Set our Y to predict if symbols will be positive in 1 week
            history = history.dropna()
            y = history['returns_1w'] > 0

            # Drop Y and close
            X = history.drop(columns=['returns_1d', 'returns_1w', 'close','high','low','open','volume'])

            # Loop through symbols and build models for each
            for symbol in self.symbols:
                symbol = SymbolCache.GetSymbol(symbol)

                idx = pd.IndexSlice
                X_symbol = X.loc[idx[:, [symbol.ID.ToString()]], :]
                y_symbol = y.loc[X_symbol.index]

                model = EnsembleModel(self.algo)
                model.Train(X_symbol, y_symbol)
                self.models[symbol] = model

        except Exception as e:
            self.algo.Debug('Unexpected error:' + str(e))
            raise

    def GetHistoryAndFeatures(self, start, end):

        history = self.algo.History(self.symbols, start, end, Resolution.Daily)
        gb = history.groupby('symbol', group_keys=False)
        history['returns_1w'] = -gb.close.pct_change(-5)
        history['returns_1d'] = -gb.close.pct_change(-1)

        # Commented out a variety of different Y variables I tested
        # rgb = history.returns_1d.groupby('symbol', group_keys=False)
        # history['return_std'] = rgb.transform(lambda x: x.rolling(window=14).std())
        # history['return_std'] = rgb.transform(lambda x: x.rolling(window=200).std())
        # history['return_mean'] = rgb.transform(lambda x: x.rolling(window=200).mean())
        # history['return_last'] = gb.close.pct_change()
        # history['return_zscore'] = history.return_last/history.return_std;
        # history['zscore'] = (history.returns_1m-history.return_mean)/history.return_std
        # history['return_gap'] = gb.apply(lambda x: (x.shift(-1).open-x.close)/x.close)

        features = ['KAMA_50_200', 'KAMA_28_100', 'KAMA_7_25', 'KAMA_14_50',
                    'KAMA_50_200_diff', 'KAMA_14_50_diff', 'KAMA_28_100_diff', 'KAMA_7_25_diff']

        # Commented out a larger feature set.  Left as an example of what can be used.
        # features = ['ATR_7', 'ATR_21', 'ATR_3_7', 'ATR_3_7_diff', 'HT_DCPERIOD', 'HT_DCPHASE','ADX_7','ADX_14','ADX_20','ADX_30','ADX_50','ADX_60',
        #     'ADX_7_diff','ADX_14_diff','ADX_20_diff','ADX_30_diff','ADX_50_diff','ADX_60_diff','KAMA_50','KAMA_200',
        #         'KAMA_50_200','EMA_50_200','KAMA_28_100','KAMA_7_25','KAMA_14_50','EMA_14_50',
        #          'KAMA_50_200_diff','EMA_50_200_diff','KAMA_14_50_diff','EMA_14_50_diff','KAMA_28_100_diff','KAMA_7_25_diff',
        #         'EMA_7_28','EMA_7_28_diff', ]

        gb = history.groupby('symbol', group_keys=False)
        history = gb.apply(lambda x: add_features(x, features))

        # Commented out some advanced features applying HT_DCPERIOD to other features
        # gb = history.groupby('symbol', group_keys=False)
        # history['HT_DCPERIOD_KAMA_50_200'] = gb.apply(lambda x: add_features(x, ['HT_DCPERIOD'], close='KAMA_50_200'))['HT_DCPERIOD']
        # history['HT_DCPERIOD_KAMA_14_50'] = gb.apply(lambda x: add_features(x, ['HT_DCPERIOD'], close='KAMA_14_50'))['HT_DCPERIOD']

        # Convert datetime to date
        #history['datetime'] = history.index.get_level_values('time')
        history = history.reset_index().set_index('time').sort_index()
        history.index = history.index.normalize()
        history = history.set_index('symbol', append=True)

        return history

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

class ClassifierModel():

    def __init__(self, algo):
        self.algo = algo
        self.model = None
        self.params = None
        self.test_score = None
        self.test_dist = None
        self.threshold = None

    # Train the model
    def Train(self, X, y, params):
        self.params = params
        self.params['class_weight'] = self.GetClassWeight(y)
        self.model = ExtraTreesClassifier(**self.params)
        self.model.fit(X, y)
        self.test_dist = self.model.predict_proba(X)[:, 1]
        

    # Return prediction as a +/- Z-Score of the prediction from the the threshold
    def Predict(self, X):
        #self.algo.Debug(f'Number of columns before pca: {X.shape[1]}')
        idx = X.index
        #X = np.nan_to_num(X, nan=0, posinf=0, neginf=0)
        probs = pd.Series(self.model.predict_proba(X)[:, 1])
        d = self.test_dist
        return (probs[0] - self.threshold) / d.std()
        
    def GetClassWeight(self, y_train):
        pos = y_train.sum()/(len(y_train))
        neg = 1 - pos
        pw = neg/pos
        nw = pos/neg

        return {0:nw,1:pw}

from sklearn.ensemble import ExtraTreesClassifier
from scipy.stats.mstats import gmean
import numpy as np
import pandas as pd
import math
from sklearn.metrics import auc,roc_curve,precision_recall_curve, roc_auc_score, accuracy_score
from model import ClassifierModel


class ModelOptimizer():

    def __init__(self, algo):
        self.algo = algo
        self.validation_score = 0
        self.test_score = 0
        self.test_dist = None
        self.threshold = None

    def BuildModels(self, X, y):
        
        # Need to continue to play with min samples split and estimators and maybe class weights
        params = {'random_state':0,'n_estimators': 10, 'min_samples_split': 200, 
                  'class_weight' : {0:1,1:1},
                  'max_features': 'auto', 'max_depth': 3, 'criterion': 'gini', 'bootstrap': True}
        
        # Train and validate using walk forward validation
        max_score = -1
        max_y = None
        max_probs = None
        num_iter = 10
        num_test_records = 60
        num_records = len(X)
        num_train_records = num_records - num_test_records
        batch_size = 4
        models = []
        
        results = []
        
        # Iterate ove the dataset and create a number (num_iter) of models.
        for i in range(0, num_iter):
            
            probs = []
            y_test_all = []
            end_idx = num_records  
            
            # This is important.  Copy params so each model keeps a distinct copy with it's own random seed
            model_params = params.copy()
            model_params['random_state']= i

            # Loop through and do walk forward validation
            j = num_train_records
            while j < end_idx:
                
                # Add the next segment of data
                idx = j + batch_size
                if idx >= end_idx:
                    idx = end_idx-1
                X_train, X_test = X[0:j], X[j:idx]
                y_train, y_test = y[0:j], y[j:idx]
                j = j + batch_size
        
                # Set the class weight based positives vs negatives
                model_params['class_weight'] = self.get_class_weights(y_train)
        
                # Train the model
                clf = ExtraTreesClassifier(**model_params)
                clf.fit(X_train, y_train)
                
                # Predict using the model
                p = pd.Series(clf.predict_proba(X_test)[:, 1])
                
                # Add the predicions
                probs.extend(p)
                y_test_all.extend(y_test)
            
            #precisions, recalls, thresholds =  precision_recall_curve(y_test_all, probs)
            #score = auc(recalls, precisions)
            
            # Build model on all the data
            model = ClassifierModel(self.algo)
            model.Train(X, y, model_params)
            
            # Since each models predictive results tend to skew, we calculate a threshold for what is a positive result
            model.threshold = self.get_threshold(pd.Series(y), model.test_dist)
            
            # Give the model a score for use in weighting models
            score = self.score_results(y_test_all, probs, model.threshold)
            model.test_score = score
            model.test_y = y_test_all
            
            models.append(model)
            #model.test_dist = pd.Series(probs)

        return models
        
    # Calculate the model threshold for predicting positives
    def get_threshold(self, y_true, probs):
        
        # calculate percent positive
        pos = y_true.sum()/(len(y_true))
        
        # use that percent as threshold cutoff
        return sorted(probs, reverse=True)[int(round(len(probs)*pos))]
        
    def weighted_score(self, y_true, probs):
        # There are two ways we want to weight a prediction
        # 1. More recent predictions are weighted more heavily
        # 2. The larger the prediction value is weighted more heavily (positive or negative)
        
        # score = 0
        # threshold = self.get_threshold(pd.Series(y_true), probs)
        # dist = pd.Series(probs)
        
        # # First entries are older, last entries newer
        # for i in range(0,len(y_true)):
        #     pred = probs[i]
        #     y = y_true[i]
        #     zscore = (pred - threshold) / dist.std()
        #     yp = zscore > 0
            
        #     if yp == y:
        #         score = score + 1 + i/len(y_true) #* abs(zscore)
            
        # return score
                # precision, recall, threshold =  precision_recall_curve(y_pred, probs)
        # s1 = auc(recall, precision)
        # s2 = roc_auc_score(y_pred, probs)
        # return gmean([s1,s2])
        y_test_all = np.asarray(y_true)
        probs_all = pd.Series(probs)
        splits = 6
        length = math.floor(len(probs_all)/splits)
        start = 0
        end = length-1
        scores = []
        for i in range(0,splits):
    
            y_test = y_test_all[start:end]
            if y_test.sum() != 0:
                probs = probs_all[start:end]
                precision, recall, threshold =  precision_recall_curve(y_test, probs)
                pauc = auc(recall, precision)
                if math.isnan(pauc):
                    pauc = 0
                    
                scores.append(pauc*(1+i/10))
    
            end = end + length
            start = start + length
            if (end >= len(probs_all)):
                end = len(probs_all)-1
            
        return gmean(scores)
            
            
            
        
    def score_results(self, y_true, probs, threshold):

        y_pred = probs > threshold
        #return roc_auc_score(y_true, y_pred)
        return accuracy_score(y_true, y_pred)

        # y_test_all = np.asarray(y_true)
        # probs_all = pd.Series(probs)
        # splits = 6
        # length = math.floor(len(probs_all)/splits)
        # start = 0
        # end = length-1
        # scores = []
        # for i in range(0,splits):
    
        #     y_test = y_test_all[start:end]
        #     if y_test.sum() != 0:
        #         probs = probs_all[start:end]
        #         precision, recall, threshold =  precision_recall_curve(y_test, probs)
        #         pauc = auc(recall, precision)
        #         if math.isnan(pauc):
        #             pauc = 0
                    
        #         scores.append(pauc)
    
        #     end = end + length
        #     start = start + length
        #     if (end >= len(probs_all)):
        #         end = len(probs_all)-1
            
        # return gmean(scores)

        
    def get_class_weights(self, y):
        # Calculate class weights
        pos = y.sum()/(len(y))
        neg = 1 - pos
        pw = neg/pos
        nw = pos/neg
        return {0:nw,1:pw}