Commit e4adbff8 authored by O'Reilly Media, Inc.'s avatar O'Reilly Media, Inc.
Browse files

Initial commit

parents
File added
{\rtf1\ansi\ansicpg1252\cocoartf1038\cocoasubrtf350
{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
{\colortbl;\red255\green255\blue255;}
\margl1440\margr1440\vieww9000\viewh8400\viewkind0
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural\pardirnatural
\f0\fs24 \cf0 No work files for this lesson...}
\ No newline at end of file
{\rtf1\ansi\ansicpg1252\cocoartf1038\cocoasubrtf350
{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
{\colortbl;\red255\green255\blue255;}
\margl1440\margr1440\vieww9000\viewh8400\viewkind0
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural\pardirnatural
\f0\fs24 \cf0 No work files for this lesson...}
\ No newline at end of file
MAZE = {
3: {
8: {
99: 'End'
}
},
12: {
6: {
5: 'End'
}
}
}
def flat_map(array):
new_array = []
for a in array:
if isinstance(a, list):
new_array += flat_map(a)
else:
new_array.append(a)
return new_array
def create_dict(flat_array):
head, *tail = flat_array
if len(tail) == 1:
return {head: tail[0]}
else:
return {head: create_dict(tail)}
def invert_dict(dictionary, stack=None):
if not stack: stack = []
if (not isinstance(dictionary, dict)):
return dictionary
for k, v in dictionary.items():
stack.append([invert_dict(v), k])
return stack
def create_new_maze(dictionary):
new_maze = {}
for path in invert_dict(dictionary):
new_maze.update(create_dict(flat_map(path)[1:]))
return new_maze
def policy(current_state):
upside_down_maze = create_new_maze(current_state)
states = []
while (isinstance(upside_down_maze, dict)):
new_state = max(upside_down_maze.keys())
states = [new_state] + states
upside_down_maze = upside_down_maze[new_state]
states = [upside_down_maze] + states
total_reward = 0
for s in states:
total_reward += s
print("Tacking action to get to state {}".format(s))
print("Finished game with total reward of {}".format(total_reward))
MAZE = {
3: {
8: {
99: 'End'
}
},
12: {
6: {
5: 'End'
}
}
}
MAZE2 = {
4: {
-100: {
4: 'End'
}
},
3: {
100: {
3: 'End'
}
}
}
# v_i = r_i + gamma * v_{i+1}
# v_i = r_i + gamma (r_{i+1} + gamma ........
def discounted_reward(current_state, gamma = 0.9):
if (isinstance(current_state, dict)):
return sum([k + gamma * discounted_reward(v) for k,v in current_state.items()])
else:
return 0
def policy(current_state, total_reward = 0, gamma = 0.9):
if (not isinstance(current_state, dict)):
print("Finished the game with a total reward of {}".format(total_reward))
else:
bellman_maze = {(k + gamma * discounted_reward(v), k): v for k,v in current_state.items()}
new_state = max(bellman_maze.keys())
print("Taking action to get to state {} ({})".format(new_state[1], new_state[0]))
policy(bellman_maze[new_state], total_reward + new_state[1])
MAZE = {
3: {
8: {
99: 'End'
}
},
12: {
6: {
5: 'End'
}
}
}
def policy(current_state, total_reward = 0):
if (not isinstance(current_state, dict)):
print("Finished game with total reward of {}".format(total_reward))
else:
new_state = max(current_state.keys())
print("Taking action to get to state {}".format(new_state))
policy(current_state[new_state], total_reward + new_state)
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
from collections import defaultdict
# Pulled this on August 30th
# Base Case Portfolio
# Show what happens with buy and hold strategy.
class QTrader(object):
def __init__(self, eta=1, transaction_cost=0.005, position=10000):
self.stock_data = pd.merge(pd.read_csv('./tbill.csv', index_col='Date'), pd.read_csv('./^GSPC.csv', index_col='Date'), right_index=True, left_index=True).sort_index()
# These are price returns and the weekly returns for TBills (3 months)
self.returns = pd.DataFrame({
'stocks': self.stock_data['Adj Close'].rolling(window=2, center=False).apply(lambda x: x[1] / x[0] - 1),
'tbills': (self.stock_data['tbill_rate'] / 100 + 1) ** (1/52) - 1,
}, index=self.stock_data.index)
self.returns['risk_adjusted'] = self.returns.stocks - self.returns.tbills
self.returns['risk_adjusted_moving'] = self.returns.risk_adjusted.rolling(window=12).apply(lambda x: x.mean())
self.returns['risk_adjusted_stdev'] = self.returns.risk_adjusted.rolling(window=12).apply(lambda x: x.std())
self.returns['risk_adjusted_high'] = self.returns.risk_adjusted_moving + 1.5 * self.returns.risk_adjusted_stdev
self.returns['risk_adjusted_low'] = self.returns.risk_adjusted_moving - 1.5 * self.returns.risk_adjusted_stdev
self.returns['state'] = (self.returns.risk_adjusted > self.returns.risk_adjusted_high).astype('int') - \
(self.returns.risk_adjusted < self.returns.risk_adjusted_high).astype('int') # pd.qcut(self.returns.sharpe_moving, 10, labels=range(10))
def buy_and_hold(self, dates):
return pd.Series(1, index=dates)
def buy_tbills(self, dates):
return pd.Series(0, index=dates)
# This will go long or short or do nothing randomly
def random(self, dates):
return pd.Series(np.random.randint(-1, 2, size=len(dates)), index=dates)
def calc_returns(self, holdings):
return pd.Series(self.returns.tbills + holdings * self.returns.risk_adjusted, index=holdings.index)
def evaluate(self, holdings):
return (self.calc_returns(holdings) + 1).cumprod()
def state(self, first_moment, second_moment):
return first_moment + second_moment * 10
def q_holdings(self, training_indexes, testing_indexes):
factors = pd.DataFrame({'action': 0, 'reward': 0, 'state': 0}, index=training_indexes)
# Initialize Q matrix
q = {0: {1:0, 0:0, -1:0}}
# For Dyna-Q
T = np.zeros((3, 3, 3)) + 0.00001
R = np.zeros((3,3))
# Episodes
for i in range(100):
last_row, last_date = None, None
for date, row in factors.iterrows():
return_data = self.returns.loc[date]
if return_data.state not in q:
q[return_data.state] = {1: 0, 0:0, -1:0}
if last_row is None or np.isnan(return_data.state):
state = 0
reward = 0
action = 0
else:
state = int(return_data.state)
if random.random() > 0.001:
action = max(q[state], key=q[state].get)
else:
action = random.randint(-1,1)
reward = last_row.action * (return_data.stocks - return_data.tbills)
alpha = 1
discount = 0.9
factors.loc[date, 'reward'] = reward
factors.loc[date, 'action'] = action
factors.loc[date, 'state'] = return_data.state
update = alpha * (factors.loc[date, 'reward'] + discount * max(q[row.state].values()) - q[state][action])
if not np.isnan(update):
q[state][action] += update
# Dyna
action_idx = int(last_row.action+1)
state_idx = int(last_row.state+1)
new_state_idx = int(state+1)
T[state_idx][action_idx][new_state_idx] += 1
R[state_idx][action_idx] = (1 - alpha) * R[state_idx][action_idx] + alpha * reward
last_date, last_row = date, factors.loc[date]
for j in range(100):
state_idx = random.randint(0,2)
action_idx = random.randint(0,2)
new_state = np.random.choice([-1, 0, 1], 1, p=T[state_idx][action_idx]/T[state_idx][action_idx].sum())[0]
r = R[state_idx][action_idx]
q[state][action] += alpha * (r + discount * max(q[new_state].values()) - q[state][action])
sharpe = self.sharpe(factors.action)
if sharpe > 0.20:
break
print("For episode {} we get an internal sharpe ratio of {}".format(i, self.sharpe(factors.action)))
testing = pd.DataFrame({'action': 0, 'state': 0}, index=testing_indexes)
testing['state'] = self.returns.loc[testing_indexes, 'state']
testing['action'] = testing['state'].apply(lambda state: max(q[state], key=q[state].get))
print(self.sharpe(testing.action))
return testing.action
def discretize(self, number, steps):
return 0
def sharpe(self, holdings):
returns = holdings * (self.returns.stocks - self.returns.tbills)
return np.nanmean(returns) / np.nanstd(returns)
def graph_portfolios(self):
midpoint = int(len(self.returns.index) / 2)
training_indexes = self.returns.index[:midpoint]
testing_indexes = self.returns.index[midpoint:]
portfolios = pd.DataFrame({
'buy_and_hold': self.buy_and_hold(testing_indexes),
'buy_tbills': self.buy_tbills(testing_indexes),
'random': self.random(testing_indexes),
'qtrader': self.q_holdings(training_indexes, testing_indexes)
}, index=testing_indexes)
portfolio_values = pd.DataFrame({
'buy_and_hold': self.evaluate(portfolios.buy_and_hold),
'buy_tbills': self.evaluate(portfolios.buy_tbills),
'random': self.evaluate(portfolios.random),
'qtrader': self.evaluate(portfolios.qtrader)
}, index=testing_indexes)
portfolio_values.plot()
plt.annotate("Buy and hold sharpe: {}\n QTrader sharpe: {}".format(self.sharpe(portfolios.buy_and_hold), self.sharpe(portfolios.qtrader)), xy=(0.25, 0.95), xycoords='axes fraction')
plt.show()
This diff is collapsed.
## Example files for the title:
# Reinforcement Learning (RL) in Python, by Matthew Kirk
[![Reinforcement Learning (RL) in Python, by Matthew Kirk](http://akamaicovers.oreilly.com/images/9781491996539/cat.gif)](https://www.safaribooksonline.com/)
The following applies to example files from material published by O’Reilly Media, Inc. Content from other publishers may include different rules of usage. Please refer to any additional usage rights explained in the actual example files or refer to the publisher’s website.
O'Reilly books are here to help you get your job done. In general, you may use the code in O'Reilly books in your programs and documentation. You do not need to contact us for permission unless you're reproducing a significant portion of the code. For example, writing a program that uses several chunks of code from our books does not require permission. Answering a question by citing our books and quoting example code does not require permission. On the other hand, selling or distributing a CD-ROM of examples from O'Reilly books does require permission. Incorporating a significant amount of example code from our books into your product's documentation does require permission.
We appreciate, but do not require, attribution. An attribution usually includes the title, author, publisher, and ISBN.
If you think your use of code examples falls outside fair use or the permission given here, feel free to contact us at <permissions@oreilly.com>.
Please note that the examples are not production code and have not been carefully testing. They are provided "as-is" and come with no warranty of any kind.
Due to ongoing efforts to ensure you have the best possible learning experience, we may update the working files for this course from time to time.
We recommend that you download the latest version of these files from:
http://www.infiniteskills.com/02557
\ No newline at end of file
logo.png

5.31 KB

Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment