Overall Statistics |
Total Orders 549 Average Win 1.63% Average Loss -0.91% Compounding Annual Return 12.272% Drawdown 46.000% Expectancy 0.361 Start Equity 100000 End Equity 185479.40 Net Profit 85.479% Sharpe Ratio 0.387 Sortino Ratio 0.301 Probabilistic Sharpe Ratio 7.238% Loss Rate 51% Win Rate 49% Profit-Loss Ratio 1.79 Alpha -0.015 Beta 1.062 Annual Standard Deviation 0.231 Annual Variance 0.053 Information Ratio -0.065 Tracking Error 0.144 Treynor Ratio 0.084 Total Fees $712.05 Estimated Strategy Capacity $710000000.00 Lowest Capacity Asset SPY R735QTJ8XC9X Portfolio Turnover 11.79% |
# region imports from AlgorithmImports import * import gym from stable_baselines3 import DQN # endregion class StableBaselinesExampleAlgorithm(QCAlgorithm): def Initialize(self): self.SetStartDate(2019, 1, 1) self.SetEndDate(2024,5,1) self.SetCash(100000) self.spy = self.AddEquity("SPY", Resolution.Daily).Symbol training_length = 252*2 self.training_data = RollingWindow[TradeBar](training_length) history = self.History[TradeBar](self.spy, training_length, Resolution.Daily) for trade_bar in history: self.training_data.Add(trade_bar) self.Train(self.my_training_method) self.Train(self.DateRules.Every(DayOfWeek.Sunday), self.TimeRules.At(8,0), self.my_training_method) def get_observations_and_rewards(self, n_step=5): training_df = self.PandasConverter.GetDataFrame[TradeBar](list(self.training_data)[::-1]) daily_pct_change = training_df['close'].pct_change().dropna() obs = [] rewards = [] for i in range(len(daily_pct_change)-n_step): obs.append(training_df.iloc[i:i+n_step].values) rewards.append(float(daily_pct_change.iloc[i+n_step])) obs = np.array(obs) rewards = np.array(rewards) return obs, rewards def my_training_method(self): obs, rewards = self.get_observations_and_rewards() self.env = TradingEnv(obs, rewards) self.model = DQN("MlpPolicy", self.env) self.model.learn(total_timesteps=100) def OnData(self, data): features, _ = self.get_observations_and_rewards() action, _ = self.model.predict(features[-5:], deterministic=True) _, _, _, _ = self.env.step(action) if action == 0: #self.Liquidate(self.spy) self.SetHoldings(self.spy, 1) elif action == 1: self.SetHoldings(self.spy, 2) elif action == 2: self.SetHoldings(self.spy, 0) class TradingEnv(gym.Env): FLAT = 0 LONG = 1 SHORT = 2 def __init__(self, ohlcv, ret): super(TradingEnv, self).__init__() self.ohlcv = ohlcv self.ret = ret self.trading_cost = 0.01 self.reward = 1 # The number of step the training has taken, starts at 5 since we're using the previous 5 data for observation. self.current_step = 5 #self.current_step = 25 # The last action self.last_action = 0 # Define action and observation space # Example when using discrete actions, we have 3: LONG, SHORT and FLAT. n_actions = 3 self.action_space = gym.spaces.Discrete(n_actions) # The observation will be the coordinate of the agent, shape for (5 previous data poionts, OHLCV) self.observation_space = gym.spaces.Box(low=-2, high=2, shape=(5, 5, 5), dtype=np.float64) def reset(self): # Reset the number of step the training has taken self.current_step = 5 # Reset the last action self.last_action = 0 # must return np.array type return self.ohlcv[self.current_step-5:self.current_step].astype(np.float32) def step(self, action): if action == self.LONG: self.reward *= 1 + self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0) elif action == self.SHORT: self.reward *= 1 + -1 * self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0) elif action == self.FLAT: self.reward *= 1 - (self.trading_cost if self.last_action != action else 0) else: raise ValueError("Received invalid action={} which is not part of the action space".format(action)) self.last_action = action self.current_step += 1 # Have we iterate all data points? done = (self.current_step == self.ret.shape[0]-1) # Reward as return return self.ohlcv[self.current_step-5:self.current_step].astype(np.float32), self.reward, done, {}