import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from odo import odo
import talib
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import AverageDollarVolume, SimpleMovingAverage, RSI, SimpleBeta, AverageDollarVolume, AnnualizedVolatility, ExponentialWeightedMovingAverage
from quantopian.pipeline.filters import StaticAssets
from quantopian.pipeline import CustomFactor
class ATR(CustomFactor):
inputs = [USEquityPricing.close,USEquityPricing.high,USEquityPricing.low]
window_length = 14
def compute(self, today, assets, out, close, high, low):
hml = high - low
hmpc = np.abs(high - np.roll(close, 1, axis=0))
lmpc = np.abs(low - np.roll(close, 1, axis=0))
tr = np.maximum(hml, np.maximum(hmpc, lmpc))
atr = np.mean(tr[1:], axis=0) #skip the first one as it will be NaN
out[:] = atr
class ADX_Dir_Ind(CustomFactor):
inputs=[USEquityPricing.high, USEquityPricing.low, USEquityPricing.close]
true_length=14
window_length=true_length+true_length
def compute(self, today, assets, out, high, low, close):
anynan =np.isnan(close).any(axis=0)
for col_ix, have_nans in enumerate(anynan):
if have_nans:
out[col_ix] = np.nan
continue
results = talib.ADX(
high[:, col_ix],
low[:, col_ix],
close[:, col_ix],
timeperiod=self.true_length)
out[col_ix] = results[-1]
# how to do static asset
# https://www.quantopian.com/posts/how-to-get-static-assets-in-research
# basically just picking some stocks from the DJIA, along with a few others
my_assets = StaticAssets(symbols([
'GDOT', 'CSCO', 'MSFT', 'BA', 'LMT', 'XOM', 'O', 'MMM', 'CAT', 'INTC',
'CVX', 'HD', 'EXPE', 'UNH', 'UTX', 'WMT', 'DIS', 'TRV', 'PG', 'GE', 'MCD']
))
# how to do factors:
# https://www.quantopian.com/tutorials/pipeline#lesson4
# list of factors = https://www.quantopian.com/help#built-in-factors
def make_pipeline():
sma50 = SimpleMovingAverage(
inputs=[USEquityPricing.close],
window_length=50
)
sma100 = SimpleMovingAverage(
inputs=[USEquityPricing.close],
window_length=100
)
sma150 = SimpleMovingAverage(
inputs=[USEquityPricing.close],
window_length=150
)
adx = ADX_Dir_Ind()
atr = ATR()
close = USEquityPricing.close.latest
atr_ratio = atr / close
rsi = RSI()
buy_signal = sma50 > sma100 > sma150
sell_signal = sma50 < sma100 < sma150
sma_diff_1 = (sma50 - sma150) / sma150
sma_diff_2 = (sma50 - sma100) / sma100
## (8554, 'SPY'), --> https://www.quantopian.com/posts/sid-slash-ticker-set
spy_beta = SimpleBeta(target=symbols('SPY'),regression_length=150)
avg_vol = AverageDollarVolume(window_length=1)
volatility = AnnualizedVolatility()
volatility_short = AnnualizedVolatility(annualization_factor=150)
return Pipeline(
columns={
'sma50': sma50,
'sma100': sma100,
'sma150': sma150,
'sma_diff_1': sma_diff_1,
'sma_diff_2': sma_diff_2,
'spy_beta': spy_beta,
'adx': adx,
'atr': atr,
'atr_ratio': atr_ratio,
'close': close,
'rsi': rsi,
'buy_signal': buy_signal,
'sell_signal': sell_signal,
'avg_vol': avg_vol,
'volatility': volatility,
'volatility_short': volatility_short
},
screen=my_assets
)
my_pipe = make_pipeline()
result = run_pipeline(my_pipe, '2011-01-01', '2017-01-01')
result.head()
And I extract the symbols from the multi-index that I want.
Pandas dataframes a little bit confusing. https://pandas.pydata.org/pandas-docs/stable/generated/pandas.MultiIndex.html
# multiindex =
result.index.get_level_values(0)
#result[['close']].values
stock_ids = result.index.get_level_values(1).values[:21]
print(stock_ids[0].symbol, stock_ids[0].sid)
print(len(stock_ids), stock_ids)
#result[['close']].values
#result[['close']].at(1)
# access a single result -- multi index as tuple and then column name.
#result.loc[('2016-12-15', 24)]['adx']
# another way to do the same thing
#result.xs(['2016-12-15', 24])
# https://www.somebits.com/~nelson/pandas-multiindex-slice-demo.html
# here is how you get the columns you want for a secondary index. slice(None) on 1st index.
#result.loc[(slice(None), slice(24, 24)), :]['close']
# now do this to just get the values.
# .cumsum() seems to do something with plottin
#print(result.loc[(slice(None), slice(5061, 5061)), :]['close'])
df = result.loc[(slice(None), slice(5061, 5061)), :]['close']
df.index.get_level_values(0)
legend = []
for stock in stock_ids:
close_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['close']
idx = close_tbl.index.get_level_values(0)
plt.plot(idx.values, close_tbl)
legend.append(stock.symbol)
plt.ylabel('Asset price')
plt.legend(legend)
plt.show()
for stock in stock_ids:
adx_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['adx']
adx_idx = adx_tbl.index.get_level_values(0)
atr_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['atr']
atr_idx = atr_tbl.index.get_level_values(0)
rsi_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['rsi']
rsi_idx = rsi_tbl.index.get_level_values(0)
volatility_short = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['volatility_short']
volatility_short_idx = volatility_short.index.get_level_values(0)
# plt.plot(volatility_short_idx.values, volatility_short)
# plt.legend(['volatility'])
# plt.ylabel('volatility indicator value')
# plt.show()
# plt.plot(adx_idx.values, adx_tbl)
# plt.legend(['adx'])
# plt.ylabel('adx indicator value')
# plt.show()
# plt.plot(atr_idx.values, atr_tbl)
# plt.legend(['atr'])
# plt.ylabel('atr indicator value')
# plt.show()
# plt.plot(rsi_idx.values, rsi_tbl)
# plt.legend(['rsi'])
# plt.ylabel('rsi indicator value')
# plt.show()
def formatRow(row, signal):
return {'signal': signal,
'price': row['close'],
'profitable': None,
'profits': 0.0,
'adx': row['adx'],
'atr': row['atr'],
'atr_ratio': row['atr_ratio'],
'avg_vol': row['avg_vol'],
'volatility': row['volatility'],
'volatility_short': row['volatility_short'],
'rsi': row['rsi'],
'sma50': row['sma50'],
'sma100': row['sma100'],
'sma150': row['sma150'],
'spy_beta': row['spy_beta'],
'sma_diff_1': row['sma_diff_1'],
'sma_diff_2': row['sma_diff_2']
}
def recordSignals(dataframe):
buy_pts = []
sell_pts = []
trade_dict = {}
last_buy = False
last_sell = False
i = 0
for index, row in dataframe.iterrows():
i += 1
if last_buy == False and row['buy_signal'] == True:
buy_pts.append(index[0])
trade_dict[index[0]] = formatRow(row, 'buy')
if last_sell == False and row['sell_signal'] == True:
sell_pts.append(index[0])
trade_dict[index[0]] = formatRow(row, 'sell')
last_sell = row['sell_signal']
last_buy = row['buy_signal']
#print row['close'], row['buy_signal']
return {
'buy_pts': buy_pts,
'sell_pts': sell_pts,
'trade_dict': trade_dict
}
signals = {}
for stock in stock_ids:
df = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]
signals[stock.symbol] = recordSignals(df)
signals.keys()
I do this by plotting colored avxlines over the charts
for stock in stock_ids:
close = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['close']
close_idx = close.index.get_level_values(0)
sma50 = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['sma50']
sma50_idx = sma50.index.get_level_values(0)
sma150 = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['sma150']
sma150_idx = sma150.index.get_level_values(0)
plt.plot(close_idx.values, close)
plt.plot(sma50_idx.values, sma50)
plt.plot(sma150_idx.values, sma150)
for xc in signals[stock.symbol]['buy_pts']:
plt.axvline(x=xc, color="green")
for xc in signals[stock.symbol]['sell_pts']:
plt.axvline(x=xc, color="red")
plt.ylabel(stock.symbol + ' price')
plt.legend([stock.symbol, 'sma50', 'sma100', 'sma150'])
plt.show()
It's OK, we are not necessarily going to be overall profitable. More importantly I just want to know what other indicators are doing when I enter these positions.
# now I need to compute the profit of this algo.
def tradeCalc(signal_list):
trading_size = 10000.0
currentValue = 0
port = {
'last_price': 0,
'last_signal': None,
'current_value': 0,
'shares_held': 0,
}
trade_dict = signal_list['trade_dict']
keylist = trade_dict.keys()
keylist.sort()
#print keylist
total = 0
wins = 0
total_profit = 0
for key in keylist:
value = trade_dict[key]
signal = value['signal']
price = value['price']
# what is the new value, or start it out a current value
if port['last_signal'] == None:
port['current_value'] = trading_size
elif port['last_signal'] == 'buy' and signal == 'sell':
value['profitable'] = price > port['last_price']
last_qty = trading_size / port['last_price']
last_val = trading_size
this_val = last_qty * price
value['profits'] = this_val - last_val
elif port['last_signal'] == 'sell' and signal == 'buy':
value['profitable'] = price < port['last_price']
last_qty = trading_size / port['last_price']
last_val = trading_size
this_val = last_qty * price
value['profits'] = -1* (this_val - last_val)
port['last_signal'] = signal
port['last_price'] = price
print 'on', key, signal, 'at', price, 'was good:', value['profitable'], '->profit', value['profits']
if value['profitable']:
wins += 1
total +=1
total_profit += value['profits']
print "winrate: ", float(wins) / float(total)
print "total profitability", total_profit, ' trading size: ', trading_size
print 'total return 7 yrs: %', ((total_profit) / trading_size) * 100
for k, v in signals.items():
print("Computing trades for " + k)
tradeCalc(v)
Unfortunately, I don't see many obviously linear relationships between indicator values and magnitude of loss or profit!
If I see an rsquared that is high, I plot it. Otherwise I just print the rsquared.
Interestingly it seems that there is some negative correlation between volatility and profitable outcomes. I might have predicted the opposite.
from scipy.stats import linregress
def plotWithRegression(indicator, symbol, x, y):
slope, intercept, r_value, p_value, std_err = linregress(x, y)
rsquared = r_value**2
print('doing regression on ', symbol, indicator)
print('rvalue, r-squared results: ', r_value, str(rsquared))
if rsquared > 0.4:
fit = np.polyfit(x, y, 1)
fit_fn = np.poly1d(fit)
plt.title(indicator + ' at entry of trade vs profit on trade for' + symbol)
#plt.scatter(x_adx, y_profit)
plt.plot(x, y, 'yo', x, fit_fn(x), '--k')
plt.show()
def plotIndicators(symbol, signal_list):
trade_dict = signal_list['trade_dict']
keylist = trade_dict.keys()
keylist.sort()
#print keylist
y_profit = []
x_adx = []
x_atr = []
x_rsi = []
x_spy_beta = []
x_sma_diff_1 = []
x_sma_diff_2 = []
x_volatility = []
x_volatility_short = []
x_avg_vol = []
x_atr_ratio = []
for key in keylist:
value = trade_dict[key]
y_profit.append(value['profits'])
x_adx.append(value['adx'])
x_atr.append(value['atr'])
x_rsi.append(value['rsi'])
x_spy_beta.append(value['spy_beta'])
x_sma_diff_1.append(value['sma_diff_1'])
x_sma_diff_2.append(value['sma_diff_2'])
x_volatility.append(value['volatility'])
x_volatility_short.append(value['volatility_short'])
x_avg_vol.append(value['avg_vol'])
x_atr_ratio.append(value['atr_ratio'])
plotWithRegression('ATR', symbol, x_atr, y_profit)
plotWithRegression('RSI', symbol, x_rsi, y_profit)
plotWithRegression('ADX', symbol, x_adx, y_profit)
plotWithRegression('SMA Difference 1', symbol, x_sma_diff_1, y_profit)
plotWithRegression('SMA Difference 2', symbol, x_sma_diff_2, y_profit)
plotWithRegression('SPY Beta', symbol, x_spy_beta, y_profit)
plotWithRegression('ATR Ratio', symbol, x_atr_ratio, y_profit)
plotWithRegression('Avg Vol', symbol, x_avg_vol, y_profit)
plotWithRegression('Volatility', symbol, x_volatility, y_profit)
plotWithRegression('Volatility Shorter Duration', symbol, x_volatility_short, y_profit)
for k, v in signals.items():
print("plotting indicators for " + k)
plotIndicators(k, v)
I will use a simple classifier where there are 2 states:
I will use an algorithm that can sniff out nonlinear relationships. First I build a set of features then I scale the features using a min/max scaler. The learning algorithm I am using (SVC) works best with feature scaling.
from sklearn import preprocessing
# will take this structure price, ma1, ma2, ma3, adx, rsi, spy_beta, sma_diff_1
features = []
labels = [] # 0 == not profitable, 1 == profitable
for k, v in signals.items():
trade_dict = v['trade_dict']
keylist = trade_dict.keys()
keylist.sort()
i = 0
for key in keylist:
i += 1
if i == 1: # skip the first row
continue
value = trade_dict[key]
# i use only features that can be scaled together and are independent of a given security price.
features.append([
# value['price'],
# value['sma50'],
# value['sma100'],
# value['sma150'],
value['adx'],
# value['atr'],
value['rsi'],
value['spy_beta'],
value['sma_diff_1'],
value['sma_diff_2'],
value['volatility'],
value['volatility_short'],
value['atr_ratio'],
value['avg_vol']
])
if value['profitable']:
labels.append(1)
else:
labels.append(0)
# Each set of features should be scaled independently (?)
# min_max_scaler = preprocessing.MinMaxScaler()
# features = min_max_scaler.fit_transform(features)
#print features, labels
print "Len trades: ", len(features)
train_size = int(float(len(features)) * 0.85)
# feature scaling should be done by group of security *probably*, for now I am only using features
# that are in the same range for every security and can be scaled all together.
# The exception is perhaps average volume.
min_max_scaler = preprocessing.MinMaxScaler()
features = min_max_scaler.fit_transform(features)
training_features = features[:train_size]
training_labels = labels[:train_size]
test_features = features[train_size:]
test_labels = labels[train_size:]
print "len training", len(training_features), " len test", len(test_features)
from sklearn import svm
clf = svm.SVC(C=1000, kernel='rbf')
clf.fit(training_features, training_labels)
pred = clf.predict(test_features)
print('predicted: ', pred)
print('test labels', test_labels)
from sklearn.metrics import accuracy_score
accuracy_score(test_labels, pred)
from sklearn.neighbors import KNeighborsClassifier
neighbors = KNeighborsClassifier()
neighbors.fit(training_features, training_labels)
prediction = neighbors.predict(test_features)
print('predicted: ', prediction)
print('test labels', test_labels)
accuracy_score(test_labels, prediction)
Unsurprisingly, because I did not find any obviously strong linear relationships between any of the indicators and the resulting profitability of my trades, I was not able to build a predictive system that would predict outcomes with a high degree of accuracy.
The majority of the tests I ran I got an accuracy only slighly better than 50% so that means there was not much success in the ML model fitting.
I think the one indicator that did seem to have some correlation with good entry points was volatility. The volatility indicator I am using the one build into Quantopian Factors which is based on annualized volatility calculated from the standard deviation of the stock price. I believe this should related the volatility guage you will see in options pricing.
Given that volatility was useful, I would also like to look at options pricing as a way of learning information about the underlying value. At the very least it would be interesting to look at put/call ratio to see if there is some predictive power there.
Volume also seemed to have some some correlation with good entry points in some tests. I want to try to refine that in the future.
In some tests ATR ratio also seemed to have some correlation with good entry points. I believe that because ATR to price ratio is essentially a measure of volatility that this makes sense, given that volatility also has some correlation with good entry points.