Overall Statistics |
Total Orders 167 Average Win 1.37% Average Loss -0.53% Compounding Annual Return 5.662% Drawdown 11.800% Expectancy 0.236 Start Equity 100000 End Equity 110455.02 Net Profit 10.455% Sharpe Ratio -0.019 Sortino Ratio -0.02 Probabilistic Sharpe Ratio 16.749% Loss Rate 66% Win Rate 34% Profit-Loss Ratio 2.58 Alpha -0.001 Beta -0.01 Annual Standard Deviation 0.107 Annual Variance 0.011 Information Ratio -0.497 Tracking Error 0.174 Treynor Ratio 0.208 Total Fees $201.41 Estimated Strategy Capacity $810000000.00 Lowest Capacity Asset SPY R735QTJ8XC9X Portfolio Turnover 11.47% |
# region imports from AlgorithmImports import * import gym from stable_baselines3 import DQN # endregion class StableBaselinesExampleAlgorithm(QCAlgorithm): def initialize(self): self.set_start_date(2022, 7, 4) self.set_cash(100000) self.spy = self.add_equity("SPY", Resolution.DAILY).symbol training_length = 252*2 self.training_data = RollingWindow[TradeBar](training_length) history = self.history[TradeBar](self.spy, training_length, Resolution.DAILY) for trade_bar in history: self.training_data.add(trade_bar) self.train(self.my_training_method) self.train(self.date_rules.every(DayOfWeek.SUNDAY), self.time_rules.at(8,0), self.my_training_method) def get_observations_and_rewards(self, n_step=5): training_df = self.pandas_converter.get_data_frame[TradeBar](list(self.training_data)[::-1]) daily_pct_change = training_df['close'].pct_change().dropna() obs = [] rewards = [] for i in range(len(daily_pct_change)-n_step): obs.append(training_df.iloc[i:i+n_step].values) rewards.append(float(daily_pct_change.iloc[i+n_step])) obs = np.array(obs) rewards = np.array(rewards) return obs, rewards def my_training_method(self): obs, rewards = self.get_observations_and_rewards() self.env = TradingEnv(obs, rewards) self.model = DQN("MlpPolicy", self.env) self.model.learn(total_timesteps=500) def on_data(self, data): features, _ = self.get_observations_and_rewards() action, _ = self.model.predict(features[-5:], deterministic=True) _, _, _, _ = self.env.step(action) if action == 0: self.liquidate(self.spy) elif action == 1: self.set_holdings(self.spy, 1) elif action == 2: self.set_holdings(self.spy, -1) class TradingEnv(gym.Env): FLAT = 0 LONG = 1 SHORT = 2 def __init__(self, ohlcv, ret): super(TradingEnv, self).__init__() self.ohlcv = ohlcv self.ret = ret self.trading_cost = 0.01 self.reward = 1 # The number of step the training has taken, starts at 5 since we're using the previous 5 data for observation. self.current_step = 5 # The last action self.last_action = 0 # Define action and observation space # Example when using discrete actions, we have 3: LONG, SHORT and FLAT. n_actions = 3 self.action_space = gym.spaces.Discrete(n_actions) # The observation will be the coordinate of the agent, shape for (5 previous data poionts, OHLCV) self.observation_space = gym.spaces.Box(low=-2, high=2, shape=(5, 5, 5), dtype=np.float64) def reset(self): # Reset the number of step the training has taken self.current_step = 5 # Reset the last action self.last_action = 0 # must return np.array type return self.ohlcv[self.current_step-5:self.current_step].astype(np.float32) def step(self, action): if action == self.LONG: self.reward *= 1 + self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0) elif action == self.SHORT: self.reward *= 1 + -1 * self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0) elif action == self.FLAT: self.reward *= 1 - (self.trading_cost if self.last_action != action else 0) else: raise ValueError("Received invalid action={} which is not part of the action space".format(action)) self.last_action = action self.current_step += 1 # Have we iterate all data points? done = (self.current_step == self.ret.shape[0]-1) # Reward as return return self.ohlcv[self.current_step-5:self.current_step].astype(np.float32), self.reward, done, {}