Step 1: Building a datapipeline with many indicators

In [108]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from odo import odo
import talib
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import AverageDollarVolume, SimpleMovingAverage, RSI, SimpleBeta, AverageDollarVolume, AnnualizedVolatility, ExponentialWeightedMovingAverage 
from quantopian.pipeline.filters import  StaticAssets
from quantopian.pipeline import  CustomFactor

class ATR(CustomFactor):  
    inputs = [USEquityPricing.close,USEquityPricing.high,USEquityPricing.low]  
    window_length = 14  
    def compute(self, today, assets, out, close, high, low):  
        hml = high - low  
        hmpc = np.abs(high - np.roll(close, 1, axis=0))  
        lmpc = np.abs(low - np.roll(close, 1, axis=0))  
        tr = np.maximum(hml, np.maximum(hmpc, lmpc))  
        atr = np.mean(tr[1:], axis=0) #skip the first one as it will be NaN  
        out[:] = atr

class ADX_Dir_Ind(CustomFactor):  
    inputs=[USEquityPricing.high, USEquityPricing.low, USEquityPricing.close]  
    true_length=14  
    window_length=true_length+true_length  
    def compute(self, today, assets, out, high, low, close):  
        anynan =np.isnan(close).any(axis=0)  
        for col_ix, have_nans in enumerate(anynan):  
            if have_nans:  
                out[col_ix] = np.nan  
                continue  
            results = talib.ADX(  
                high[:, col_ix],  
                low[:, col_ix],  
                close[:, col_ix],  
                timeperiod=self.true_length)  
            out[col_ix] = results[-1]
# how to do static asset
# https://www.quantopian.com/posts/how-to-get-static-assets-in-research
# basically just picking some stocks from the DJIA, along with a few others
my_assets = StaticAssets(symbols([
    'GDOT', 'CSCO', 'MSFT', 'BA', 'LMT', 'XOM', 'O', 'MMM', 'CAT', 'INTC',
    'CVX', 'HD', 'EXPE', 'UNH', 'UTX', 'WMT', 'DIS', 'TRV', 'PG', 'GE', 'MCD']
))

# how to do factors:
# https://www.quantopian.com/tutorials/pipeline#lesson4
# list of factors = https://www.quantopian.com/help#built-in-factors
def make_pipeline():
    sma50 = SimpleMovingAverage(
        inputs=[USEquityPricing.close],
        window_length=50
    )
    sma100 = SimpleMovingAverage(
        inputs=[USEquityPricing.close],
        window_length=100
    )
    sma150 = SimpleMovingAverage(
        inputs=[USEquityPricing.close],
        window_length=150
    )
    adx = ADX_Dir_Ind()
    atr = ATR()
    close = USEquityPricing.close.latest
    atr_ratio = atr / close
    rsi = RSI()
    buy_signal = sma50 > sma100 > sma150
    sell_signal = sma50 < sma100 < sma150
    sma_diff_1 = (sma50 - sma150) / sma150
    sma_diff_2 = (sma50 - sma100) / sma100
    ## (8554, 'SPY'), --> https://www.quantopian.com/posts/sid-slash-ticker-set
    spy_beta = SimpleBeta(target=symbols('SPY'),regression_length=150)
    avg_vol = AverageDollarVolume(window_length=1)
    volatility = AnnualizedVolatility()
    volatility_short = AnnualizedVolatility(annualization_factor=150)

    return Pipeline(
        columns={
            'sma50': sma50,
            'sma100': sma100,
            'sma150': sma150,
            'sma_diff_1': sma_diff_1,
            'sma_diff_2': sma_diff_2,
            'spy_beta': spy_beta,
            'adx': adx,
            'atr': atr,
            'atr_ratio': atr_ratio,
            'close': close,
            'rsi': rsi,
            'buy_signal': buy_signal,
            'sell_signal': sell_signal,
            'avg_vol': avg_vol,
            'volatility': volatility,
            'volatility_short': volatility_short
        },
        screen=my_assets
    )


my_pipe = make_pipeline()
result = run_pipeline(my_pipe, '2011-01-01', '2017-01-01')
result.head()
Out[108]:
adx atr atr_ratio avg_vol buy_signal close rsi sell_signal sma100 sma150 sma50 sma_diff_1 sma_diff_2 spy_beta volatility volatility_short
2011-01-03 00:00:00+00:00 Equity(698 [BA]) 11.794604 0.960308 0.014713 1.190407e+08 True 65.269 58.636364 False 65.597119 65.325511 66.271223 0.014477 0.010276 1.326340 0.299217 0.230851
Equity(1267 [CAT]) 61.725952 1.115000 0.011901 2.162524e+08 True 93.690 77.021277 False 79.521566 74.382678 86.055260 0.156926 0.082163 1.408257 0.316105 0.243880
Equity(1900 [CSCO]) 35.117304 0.265385 0.013118 5.231631e+08 False 20.230 63.829787 True 21.317790 21.884060 20.848380 -0.047326 -0.022020 1.061885 0.305511 0.235707
Equity(2190 [DIS]) 23.090536 0.427692 0.011396 1.100276e+08 True 37.530 65.172414 False 34.990427 34.480844 36.584679 0.061015 0.045563 1.048521 0.233794 0.180376
Equity(3149 [GE]) 74.818316 0.258596 0.014123 4.259592e+08 True 18.310 71.256905 False 16.205797 15.879880 16.655275 0.048829 0.027736 1.218541 0.273774 0.211221

Next I do some sanity checks on the data

And I extract the symbols from the multi-index that I want.

Pandas dataframes a little bit confusing. https://pandas.pydata.org/pandas-docs/stable/generated/pandas.MultiIndex.html

In [109]:
# multiindex = 
result.index.get_level_values(0)
#result[['close']].values
Out[109]:
DatetimeIndex(['2011-01-03', '2011-01-03', '2011-01-03', '2011-01-03',
               '2011-01-03', '2011-01-03', '2011-01-03', '2011-01-03',
               '2011-01-03', '2011-01-03',
               ...
               '2017-01-03', '2017-01-03', '2017-01-03', '2017-01-03',
               '2017-01-03', '2017-01-03', '2017-01-03', '2017-01-03',
               '2017-01-03', '2017-01-03'],
              dtype='datetime64[ns, UTC]', length=31731, freq=None)
In [110]:
stock_ids = result.index.get_level_values(1).values[:21]
print(stock_ids[0].symbol, stock_ids[0].sid)
print(len(stock_ids), stock_ids)
#result[['close']].values
#result[['close']].at(1)
# access a single result -- multi index as tuple and then column name. 
#result.loc[('2016-12-15', 24)]['adx']
# another way to do the same thing
#result.xs(['2016-12-15', 24])

# https://www.somebits.com/~nelson/pandas-multiindex-slice-demo.html
# here is how you get the columns you want for a secondary index. slice(None) on 1st index.
#result.loc[(slice(None), slice(24, 24)), :]['close']
# now do this to just get the values. 
# .cumsum() seems to do something with plottin
#print(result.loc[(slice(None), slice(5061, 5061)), :]['close'])

df = result.loc[(slice(None), slice(5061, 5061)), :]['close']
df.index.get_level_values(0)
(u'BA', 698)
(21, array([Equity(698 [BA]), Equity(1267 [CAT]), Equity(1900 [CSCO]),
       Equity(2190 [DIS]), Equity(3149 [GE]), Equity(3496 [HD]),
       Equity(3951 [INTC]), Equity(4707 [MCD]), Equity(4922 [MMM]),
       Equity(5061 [MSFT]), Equity(5938 [PG]), Equity(7041 [TRV]),
       Equity(7792 [UNH]), Equity(7883 [UTX]), Equity(8229 [WMT]),
       Equity(8347 [XOM]), Equity(12087 [O]), Equity(12691 [LMT]),
       Equity(23112 [CVX]), Equity(27543 [EXPE]), Equity(39932 [GDOT])], dtype=object))
Out[110]:
DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2016-12-19', '2016-12-20', '2016-12-21', '2016-12-22',
               '2016-12-23', '2016-12-27', '2016-12-28', '2016-12-29',
               '2016-12-30', '2017-01-03'],
              dtype='datetime64[ns, UTC]', length=1511, freq=None)

Plotting each security to verify that we see trends.

In [111]:
legend = []
for stock in stock_ids:
    close_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['close']
    idx = close_tbl.index.get_level_values(0)
    plt.plot(idx.values, close_tbl)
    legend.append(stock.symbol)

plt.ylabel('Asset price')
plt.legend(legend)
plt.show()

Plotting some of these indicators just for sanity check

In [116]:
for stock in stock_ids:
    adx_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['adx']
    adx_idx = adx_tbl.index.get_level_values(0)
    
    atr_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['atr']
    atr_idx = atr_tbl.index.get_level_values(0)
    
    rsi_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['rsi']
    rsi_idx = rsi_tbl.index.get_level_values(0)
    
    volatility_short = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['volatility_short']
    volatility_short_idx = volatility_short.index.get_level_values(0)
    
#     plt.plot(volatility_short_idx.values, volatility_short)
#     plt.legend(['volatility'])
#     plt.ylabel('volatility indicator value')
#     plt.show()
    
#     plt.plot(adx_idx.values, adx_tbl)
#     plt.legend(['adx'])
#     plt.ylabel('adx indicator value')
#     plt.show()
    
#     plt.plot(atr_idx.values, atr_tbl)
#     plt.legend(['atr'])
#     plt.ylabel('atr indicator value')
#     plt.show()
    
#     plt.plot(rsi_idx.values, rsi_tbl)
#     plt.legend(['rsi'])
#     plt.ylabel('rsi indicator value')
#     plt.show()

I record signals along with the indicator values at the given timestamp

In [117]:
def formatRow(row, signal):
    return {'signal': signal, 
            'price': row['close'], 
            'profitable': None,
            'profits': 0.0,
            'adx': row['adx'],
            'atr': row['atr'],
            'atr_ratio': row['atr_ratio'],
            'avg_vol': row['avg_vol'],
            'volatility': row['volatility'],
            'volatility_short': row['volatility_short'],
            'rsi': row['rsi'],
            'sma50': row['sma50'],
            'sma100': row['sma100'],
            'sma150': row['sma150'],
            'spy_beta': row['spy_beta'],
            'sma_diff_1': row['sma_diff_1'],
            'sma_diff_2': row['sma_diff_2']
           }

def recordSignals(dataframe):
    buy_pts = []
    sell_pts = []
    trade_dict = {}
    last_buy = False
    last_sell = False
    i = 0

    for index, row in dataframe.iterrows():
        i += 1
        if last_buy == False and row['buy_signal'] == True:
            buy_pts.append(index[0])
            trade_dict[index[0]] = formatRow(row, 'buy')
        if last_sell == False and row['sell_signal'] == True:
            sell_pts.append(index[0])
            trade_dict[index[0]] = formatRow(row, 'sell')

        last_sell = row['sell_signal']
        last_buy = row['buy_signal']
        #print row['close'], row['buy_signal']
    
    return {
        'buy_pts': buy_pts,
        'sell_pts': sell_pts,
        'trade_dict': trade_dict
    }

signals = {}
for stock in stock_ids:
    df = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]
    signals[stock.symbol] = recordSignals(df)

signals.keys()
Out[117]:
[u'WMT',
 u'PG',
 u'XOM',
 u'GDOT',
 u'BA',
 u'INTC',
 u'CVX',
 u'LMT',
 u'MCD',
 u'CSCO',
 u'O',
 u'CAT',
 u'MMM',
 u'GE',
 u'MSFT',
 u'EXPE',
 u'UNH',
 u'UTX',
 u'TRV',
 u'HD',
 u'DIS']

Next, I visually validate the trades based on symbols

I do this by plotting colored avxlines over the charts

In [118]:
for stock in stock_ids:
    close = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['close']
    close_idx = close.index.get_level_values(0)
    
    sma50 = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['sma50']
    sma50_idx = sma50.index.get_level_values(0)
    
    sma150 = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['sma150']
    sma150_idx = sma150.index.get_level_values(0)
    
    plt.plot(close_idx.values, close)
    plt.plot(sma50_idx.values, sma50)
    plt.plot(sma150_idx.values, sma150)
    for xc in signals[stock.symbol]['buy_pts']:
        plt.axvline(x=xc, color="green")
    for xc in signals[stock.symbol]['sell_pts']:
        plt.axvline(x=xc, color="red")
    plt.ylabel(stock.symbol + ' price')
    plt.legend([stock.symbol, 'sma50', 'sma100', 'sma150'])
    plt.show()