Step 1: Building a datapipeline with many indicators¶

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from odo import odo
import talib
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import AverageDollarVolume, SimpleMovingAverage, RSI, SimpleBeta, AverageDollarVolume, AnnualizedVolatility, ExponentialWeightedMovingAverage 
from quantopian.pipeline.filters import  StaticAssets
from quantopian.pipeline import  CustomFactor

class ATR(CustomFactor):  
    inputs = [USEquityPricing.close,USEquityPricing.high,USEquityPricing.low]  
    window_length = 14  
    def compute(self, today, assets, out, close, high, low):  
        hml = high - low  
        hmpc = np.abs(high - np.roll(close, 1, axis=0))  
        lmpc = np.abs(low - np.roll(close, 1, axis=0))  
        tr = np.maximum(hml, np.maximum(hmpc, lmpc))  
        atr = np.mean(tr[1:], axis=0) #skip the first one as it will be NaN  
        out[:] = atr

class ADX_Dir_Ind(CustomFactor):  
    inputs=[USEquityPricing.high, USEquityPricing.low, USEquityPricing.close]  
    true_length=14  
    window_length=true_length+true_length  
    def compute(self, today, assets, out, high, low, close):  
        anynan =np.isnan(close).any(axis=0)  
        for col_ix, have_nans in enumerate(anynan):  
            if have_nans:  
                out[col_ix] = np.nan  
                continue  
            results = talib.ADX(  
                high[:, col_ix],  
                low[:, col_ix],  
                close[:, col_ix],  
                timeperiod=self.true_length)  
            out[col_ix] = results[-1]
# how to do static asset
# https://www.quantopian.com/posts/how-to-get-static-assets-in-research
# basically just picking some stocks from the DJIA, along with a few others
my_assets = StaticAssets(symbols([
    'GDOT', 'CSCO', 'MSFT', 'BA', 'LMT', 'XOM', 'O', 'MMM', 'CAT', 'INTC',
    'CVX', 'HD', 'EXPE', 'UNH', 'UTX', 'WMT', 'DIS', 'TRV', 'PG', 'GE', 'MCD']
))

# how to do factors:
# https://www.quantopian.com/tutorials/pipeline#lesson4
# list of factors = https://www.quantopian.com/help#built-in-factors
def make_pipeline():
    sma50 = SimpleMovingAverage(
        inputs=[USEquityPricing.close],
        window_length=50
    )
    sma100 = SimpleMovingAverage(
        inputs=[USEquityPricing.close],
        window_length=100
    )
    sma150 = SimpleMovingAverage(
        inputs=[USEquityPricing.close],
        window_length=150
    )
    adx = ADX_Dir_Ind()
    atr = ATR()
    close = USEquityPricing.close.latest
    atr_ratio = atr / close
    rsi = RSI()
    buy_signal = sma50 > sma100 > sma150
    sell_signal = sma50 < sma100 < sma150
    sma_diff_1 = (sma50 - sma150) / sma150
    sma_diff_2 = (sma50 - sma100) / sma100
    ## (8554, 'SPY'), --> https://www.quantopian.com/posts/sid-slash-ticker-set
    spy_beta = SimpleBeta(target=symbols('SPY'),regression_length=150)
    avg_vol = AverageDollarVolume(window_length=1)
    volatility = AnnualizedVolatility()
    volatility_short = AnnualizedVolatility(annualization_factor=150)

    return Pipeline(
        columns={
            'sma50': sma50,
            'sma100': sma100,
            'sma150': sma150,
            'sma_diff_1': sma_diff_1,
            'sma_diff_2': sma_diff_2,
            'spy_beta': spy_beta,
            'adx': adx,
            'atr': atr,
            'atr_ratio': atr_ratio,
            'close': close,
            'rsi': rsi,
            'buy_signal': buy_signal,
            'sell_signal': sell_signal,
            'avg_vol': avg_vol,
            'volatility': volatility,
            'volatility_short': volatility_short
        },
        screen=my_assets
    )


my_pipe = make_pipeline()
result = run_pipeline(my_pipe, '2011-01-01', '2017-01-01')
result.head()

Next I do some sanity checks on the data¶

And I extract the symbols from the multi-index that I want.

Pandas dataframes a little bit confusing. https://pandas.pydata.org/pandas-docs/stable/generated/pandas.MultiIndex.html

# multiindex = 
result.index.get_level_values(0)
#result[['close']].values

DatetimeIndex(['2011-01-03', '2011-01-03', '2011-01-03', '2011-01-03',
               '2011-01-03', '2011-01-03', '2011-01-03', '2011-01-03',
               '2011-01-03', '2011-01-03',
               ...
               '2017-01-03', '2017-01-03', '2017-01-03', '2017-01-03',
               '2017-01-03', '2017-01-03', '2017-01-03', '2017-01-03',
               '2017-01-03', '2017-01-03'],
              dtype='datetime64[ns, UTC]', length=31731, freq=None)

stock_ids = result.index.get_level_values(1).values[:21]
print(stock_ids[0].symbol, stock_ids[0].sid)
print(len(stock_ids), stock_ids)
#result[['close']].values
#result[['close']].at(1)
# access a single result -- multi index as tuple and then column name. 
#result.loc[('2016-12-15', 24)]['adx']
# another way to do the same thing
#result.xs(['2016-12-15', 24])

# https://www.somebits.com/~nelson/pandas-multiindex-slice-demo.html
# here is how you get the columns you want for a secondary index. slice(None) on 1st index.
#result.loc[(slice(None), slice(24, 24)), :]['close']
# now do this to just get the values. 
# .cumsum() seems to do something with plottin
#print(result.loc[(slice(None), slice(5061, 5061)), :]['close'])

df = result.loc[(slice(None), slice(5061, 5061)), :]['close']
df.index.get_level_values(0)

(u'BA', 698)
(21, array([Equity(698 [BA]), Equity(1267 [CAT]), Equity(1900 [CSCO]),
       Equity(2190 [DIS]), Equity(3149 [GE]), Equity(3496 [HD]),
       Equity(3951 [INTC]), Equity(4707 [MCD]), Equity(4922 [MMM]),
       Equity(5061 [MSFT]), Equity(5938 [PG]), Equity(7041 [TRV]),
       Equity(7792 [UNH]), Equity(7883 [UTX]), Equity(8229 [WMT]),
       Equity(8347 [XOM]), Equity(12087 [O]), Equity(12691 [LMT]),
       Equity(23112 [CVX]), Equity(27543 [EXPE]), Equity(39932 [GDOT])], dtype=object))

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2016-12-19', '2016-12-20', '2016-12-21', '2016-12-22',
               '2016-12-23', '2016-12-27', '2016-12-28', '2016-12-29',
               '2016-12-30', '2017-01-03'],
              dtype='datetime64[ns, UTC]', length=1511, freq=None)

Plotting each security to verify that we see trends.¶

legend = []
for stock in stock_ids:
    close_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['close']
    idx = close_tbl.index.get_level_values(0)
    plt.plot(idx.values, close_tbl)
    legend.append(stock.symbol)

plt.ylabel('Asset price')
plt.legend(legend)
plt.show()

Plotting some of these indicators just for sanity check¶

for stock in stock_ids:
    adx_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['adx']
    adx_idx = adx_tbl.index.get_level_values(0)
    
    atr_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['atr']
    atr_idx = atr_tbl.index.get_level_values(0)
    
    rsi_tbl = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['rsi']
    rsi_idx = rsi_tbl.index.get_level_values(0)
    
    volatility_short = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['volatility_short']
    volatility_short_idx = volatility_short.index.get_level_values(0)
    
#     plt.plot(volatility_short_idx.values, volatility_short)
#     plt.legend(['volatility'])
#     plt.ylabel('volatility indicator value')
#     plt.show()
    
#     plt.plot(adx_idx.values, adx_tbl)
#     plt.legend(['adx'])
#     plt.ylabel('adx indicator value')
#     plt.show()
    
#     plt.plot(atr_idx.values, atr_tbl)
#     plt.legend(['atr'])
#     plt.ylabel('atr indicator value')
#     plt.show()
    
#     plt.plot(rsi_idx.values, rsi_tbl)
#     plt.legend(['rsi'])
#     plt.ylabel('rsi indicator value')
#     plt.show()

I record signals along with the indicator values at the given timestamp¶

def formatRow(row, signal):
    return {'signal': signal, 
            'price': row['close'], 
            'profitable': None,
            'profits': 0.0,
            'adx': row['adx'],
            'atr': row['atr'],
            'atr_ratio': row['atr_ratio'],
            'avg_vol': row['avg_vol'],
            'volatility': row['volatility'],
            'volatility_short': row['volatility_short'],
            'rsi': row['rsi'],
            'sma50': row['sma50'],
            'sma100': row['sma100'],
            'sma150': row['sma150'],
            'spy_beta': row['spy_beta'],
            'sma_diff_1': row['sma_diff_1'],
            'sma_diff_2': row['sma_diff_2']
           }

def recordSignals(dataframe):
    buy_pts = []
    sell_pts = []
    trade_dict = {}
    last_buy = False
    last_sell = False
    i = 0

    for index, row in dataframe.iterrows():
        i += 1
        if last_buy == False and row['buy_signal'] == True:
            buy_pts.append(index[0])
            trade_dict[index[0]] = formatRow(row, 'buy')
        if last_sell == False and row['sell_signal'] == True:
            sell_pts.append(index[0])
            trade_dict[index[0]] = formatRow(row, 'sell')

        last_sell = row['sell_signal']
        last_buy = row['buy_signal']
        #print row['close'], row['buy_signal']
    
    return {
        'buy_pts': buy_pts,
        'sell_pts': sell_pts,
        'trade_dict': trade_dict
    }

signals = {}
for stock in stock_ids:
    df = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]
    signals[stock.symbol] = recordSignals(df)

signals.keys()

[u'WMT',
 u'PG',
 u'XOM',
 u'GDOT',
 u'BA',
 u'INTC',
 u'CVX',
 u'LMT',
 u'MCD',
 u'CSCO',
 u'O',
 u'CAT',
 u'MMM',
 u'GE',
 u'MSFT',
 u'EXPE',
 u'UNH',
 u'UTX',
 u'TRV',
 u'HD',
 u'DIS']

Next, I visually validate the trades based on symbols¶

I do this by plotting colored avxlines over the charts

for stock in stock_ids:
    close = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['close']
    close_idx = close.index.get_level_values(0)
    
    sma50 = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['sma50']
    sma50_idx = sma50.index.get_level_values(0)
    
    sma150 = result.loc[(slice(None), slice(stock.sid, stock.sid)), :]['sma150']
    sma150_idx = sma150.index.get_level_values(0)
    
    plt.plot(close_idx.values, close)
    plt.plot(sma50_idx.values, sma50)
    plt.plot(sma150_idx.values, sma150)
    for xc in signals[stock.symbol]['buy_pts']:
        plt.axvline(x=xc, color="green")
    for xc in signals[stock.symbol]['sell_pts']:
        plt.axvline(x=xc, color="red")
    plt.ylabel(stock.symbol + ' price')
    plt.legend([stock.symbol, 'sma50', 'sma100', 'sma150'])
    plt.show()

I calculate the profitability of the signaled trades for each security¶

It's OK, we are not necessarily going to be overall profitable. More importantly I just want to know what other indicators are doing when I enter these positions.

# now I need to compute the profit of this algo. 
def tradeCalc(signal_list):
    trading_size = 10000.0
    currentValue = 0
    port = {
        'last_price': 0,
        'last_signal': None,
        'current_value': 0,
        'shares_held': 0,
    }
    trade_dict = signal_list['trade_dict']
    keylist = trade_dict.keys()
    keylist.sort()
    #print keylist 

    total = 0
    wins = 0
    total_profit = 0
    for key in keylist:
        value = trade_dict[key]
        signal = value['signal']
        price = value['price']

        # what is the new value, or start it out a current value
        if port['last_signal'] == None:        
            port['current_value'] = trading_size
        elif port['last_signal'] == 'buy' and signal == 'sell':
            value['profitable'] = price > port['last_price']
            last_qty = trading_size / port['last_price'] 
            last_val = trading_size
            this_val = last_qty * price
            value['profits'] = this_val - last_val 
        elif port['last_signal'] == 'sell' and signal == 'buy':
            value['profitable'] = price < port['last_price']
            last_qty = trading_size / port['last_price'] 
            last_val = trading_size
            this_val = last_qty * price
            value['profits'] = -1* (this_val - last_val)

        port['last_signal'] = signal
        port['last_price'] = price

        print 'on', key, signal, 'at', price, 'was good:', value['profitable'], '->profit', value['profits']
        if value['profitable']:
            wins += 1
        total +=1
        total_profit += value['profits']

    print "winrate: ", float(wins) / float(total)
    print "total profitability", total_profit, ' trading size: ', trading_size
    print 'total return 7 yrs: %', ((total_profit) / trading_size) * 100
    
for k, v in signals.items():
    print("Computing trades for " + k)
    tradeCalc(v)

Computing trades for WMT
on 2011-01-03 00:00:00+00:00 buy at 53.97 was good: None ->profit 0.0
on 2011-06-20 00:00:00+00:00 sell at 52.85 was good: False ->profit -207.522697795
on 2011-08-23 00:00:00+00:00 buy at 52.2 was good: True ->profit 122.989593188
on 2011-10-07 00:00:00+00:00 sell at 52.74 was good: True ->profit 103.448275862
on 2011-11-22 00:00:00+00:00 buy at 56.66 was good: False ->profit -743.268866136
on 2013-01-31 00:00:00+00:00 sell at 69.74 was good: True ->profit 2308.50688316
on 2013-05-08 00:00:00+00:00 buy at 78.36 was good: False ->profit -1236.019501
on 2013-10-07 00:00:00+00:00 sell at 72.79 was good: False ->profit -710.821847882
on 2013-11-27 00:00:00+00:00 buy at 80.68 was good: False ->profit -1083.94010166
on 2014-05-05 00:00:00+00:00 sell at 79.18 was good: False ->profit -185.919682697
on 2014-07-09 00:00:00+00:00 buy at 76.67 was good: True ->profit 316.999242233
on 2014-10-01 00:00:00+00:00 sell at 76.49 was good: False ->profit -23.47724012
on 2014-11-21 00:00:00+00:00 buy at 84.58 was good: False ->profit -1057.65459537
on 2015-06-02 00:00:00+00:00 sell at 74.71 was good: False ->profit -1166.94253961
on 2016-03-28 00:00:00+00:00 buy at 68.01 was good: True ->profit 896.800963726
on 2016-12-19 00:00:00+00:00 sell at 70.94 was good: True ->profit 430.818997206
winrate:  0.375
total profitability -2236.0031169  trading size:  10000.0
total return 7 yrs: % -22.360031169
Computing trades for PG
on 2011-01-03 00:00:00+00:00 buy at 64.36 was good: None ->profit 0.0
on 2011-07-12 00:00:00+00:00 sell at 64.7 was good: True ->profit 52.827843381
on 2011-07-26 00:00:00+00:00 buy at 63.31 was good: True ->profit 214.837712519
on 2011-10-19 00:00:00+00:00 sell at 64.395 was good: True ->profit 171.378929079
on 2011-12-29 00:00:00+00:00 buy at 66.51 was good: False ->profit -328.441649196
on 2012-07-12 00:00:00+00:00 sell at 61.4 was good: False ->profit -768.305517967
on 2012-10-15 00:00:00+00:00 buy at 67.95 was good: False ->profit -1066.7752443
on 2014-04-29 00:00:00+00:00 sell at 82.95 was good: True ->profit 2207.50551876
on 2014-07-07 00:00:00+00:00 buy at 79.93 was good: True ->profit 364.074743822
on 2015-05-05 00:00:00+00:00 sell at 80.34 was good: True ->profit 51.2948830226
on 2016-02-03 00:00:00+00:00 buy at 80.23 was good: True ->profit 13.6918098083
winrate:  0.636363636364
total profitability 912.089028933  trading size:  10000.0
total return 7 yrs: % 9.12089028933
Computing trades for XOM
on 2011-01-03 00:00:00+00:00 buy at 73.18 was good: None ->profit 0.0
on 2011-08-16 00:00:00+00:00 sell at 74.29 was good: True ->profit 151.6807871
on 2012-01-19 00:00:00+00:00 buy at 86.42 was good: False ->profit -1632.79041594
on 2012-07-19 00:00:00+00:00 sell at 86.21 was good: False ->profit -24.2999305716
on 2012-09-17 00:00:00+00:00 buy at 92.27 was good: False ->profit -702.934694351
on 2013-03-14 00:00:00+00:00 sell at 89.27 was good: False ->profit -325.132762545
on 2013-05-10 00:00:00+00:00 buy at 91.03 was good: False ->profit -197.154699227
on 2013-11-29 00:00:00+00:00 sell at 93.8 was good: True ->profit 304.295287268
on 2014-01-06 00:00:00+00:00 buy at 99.48 was good: False ->profit -605.543710021
on 2014-10-24 00:00:00+00:00 sell at 94.11 was good: False ->profit -539.806996381
on 2016-02-01 00:00:00+00:00 buy at 77.58 was good: True ->profit 1756.45521199
on 2016-11-14 00:00:00+00:00 sell at 85.64 was good: True ->profit 1038.92755865
winrate:  0.333333333333
total profitability -776.304364031  trading size:  10000.0
total return 7 yrs: % -7.76304364031
Computing trades for GDOT
on 2011-01-03 00:00:00+00:00 buy at 56.74 was good: None ->profit 0.0
on 2011-05-02 00:00:00+00:00 sell at 43.18 was good: False ->profit -2389.84843144
on 2012-01-13 00:00:00+00:00 buy at 29.76 was good: True ->profit 3107.92033349
on 2012-02-10 00:00:00+00:00 sell at 31.5 was good: True ->profit 584.677419355
on 2013-02-28 00:00:00+00:00 buy at 14.0 was good: True ->profit 5555.55555556
on 2014-02-19 00:00:00+00:00 sell at 20.31 was good: True ->profit 4507.14285714
on 2014-10-10 00:00:00+00:00 buy at 21.01 was good: False ->profit -344.657804037
on 2015-03-16 00:00:00+00:00 sell at 16.34 was good: False ->profit -2222.75107092
on 2015-08-11 00:00:00+00:00 buy at 18.45 was good: False ->profit -1291.30966952
on 2015-12-23 00:00:00+00:00 sell at 16.6 was good: False ->profit -1002.7100271
on 2016-03-17 00:00:00+00:00 buy at 22.43 was good: False ->profit -3512.04819277
winrate:  0.363636363636
total profitability 2991.97096975  trading size:  10000.0
total return 7 yrs: % 29.9197096975
Computing trades for BA
on 2011-01-03 00:00:00+00:00 buy at 65.269 was good: None ->profit 0.0
on 2011-09-08 00:00:00+00:00 sell at 64.92 was good: False ->profit -53.4710199329
on 2012-01-26 00:00:00+00:00 buy at 75.8 was good: False ->profit -1675.90881084
on 2012-07-12 00:00:00+00:00 sell at 71.54 was good: False ->profit -562.005277045
on 2012-11-08 00:00:00+00:00 buy at 70.11 was good: True ->profit 199.888174448
on 2014-05-16 00:00:00+00:00 sell at 131.21 was good: True ->profit 8714.87662245
on 2014-08-19 00:00:00+00:00 buy at 124.98 was good: True ->profit 474.811371085
on 2014-10-21 00:00:00+00:00 sell at 124.36 was good: False ->profit -49.60793727
on 2015-01-07 00:00:00+00:00 buy at 127.51 was good: False ->profit -253.296880026
on 2015-08-14 00:00:00+00:00 sell at 144.84 was good: True ->profit 1359.10908948
on 2016-02-24 00:00:00+00:00 buy at 116.89 was good: True ->profit 1929.71554819
on 2016-03-18 00:00:00+00:00 sell at 130.699 was good: True ->profit 1181.36709727
on 2016-07-15 00:00:00+00:00 buy at 131.58 was good: False ->profit -67.4067896464
winrate:  0.461538461538
total profitability 11198.0711882  trading size:  10000.0
total return 7 yrs: % 111.980711882
Computing trades for INTC
on 2011-01-03 00:00:00+00:00 sell at 21.05 was good: None ->profit 0.0
on 2011-01-20 00:00:00+00:00 buy at 21.01 was good: True ->profit 19.0023752969
on 2012-08-21 00:00:00+00:00 sell at 26.24 was good: True ->profit 2489.2908139
on 2013-04-29 00:00:00+00:00 buy at 23.4 was good: True ->profit 1082.31707317
on 2013-11-05 00:00:00+00:00 sell at 24.03 was good: True ->profit 269.230769231
on 2013-12-31 00:00:00+00:00 buy at 25.84 was good: False ->profit -753.225135248
on 2015-05-05 00:00:00+00:00 sell at 33.19 was good: True ->profit 2844.42724458
on 2015-12-15 00:00:00+00:00 buy at 34.47 was good: False ->profit -385.658330823
on 2016-04-20 00:00:00+00:00 sell at 31.61 was good: False ->profit -829.706991587
on 2016-07-15 00:00:00+00:00 buy at 35.21 was good: False ->profit -1138.88010123
winrate:  0.5
total profitability 3596.79771729  trading size:  10000.0
total return 7 yrs: % 35.9679771729
Computing trades for CVX
on 2011-01-03 00:00:00+00:00 buy at 91.35 was good: None ->profit 0.0
on 2011-09-09 00:00:00+00:00 sell at 98.4 was good: True ->profit 771.756978654
on 2012-01-12 00:00:00+00:00 buy at 107.75 was good: False ->profit -950.203252033
on 2012-07-10 00:00:00+00:00 sell at 104.45 was good: False ->profit -306.26450116
on 2012-09-13 00:00:00+00:00 buy at 114.48 was good: False ->profit -960.268070847
on 2013-02-22 00:00:00+00:00 sell at 114.99 was good: True ->profit 44.5492662474
on 2013-04-08 00:00:00+00:00 buy at 117.52 was good: False ->profit -220.019132098
on 2013-12-17 00:00:00+00:00 sell at 120.21 was good: True ->profit 228.897208986
on 2014-01-03 00:00:00+00:00 buy at 124.14 was good: False ->profit -326.927876217
on 2014-02-03 00:00:00+00:00 sell at 111.62 was good: False ->profit -1008.53874658
on 2014-06-26 00:00:00+00:00 buy at 131.23 was good: False ->profit -1756.85361046
on 2014-11-17 00:00:00+00:00 sell at 116.34 was good: False ->profit -1134.64908939
on 2016-01-29 00:00:00+00:00 buy at 85.91 was good: True ->profit 2615.60942066
winrate:  0.307692307692
total profitability -3002.91140423  trading size:  10000.0
total return 7 yrs: % -30.0291140423
Computing trades for LMT
on 2011-01-03 00:00:00+00:00 sell at 69.93 was good: None ->profit 0.0
on 2011-02-10 00:00:00+00:00 buy at 82.25 was good: False ->profit -1761.76176176
on 2011-08-15 00:00:00+00:00 sell at 69.13 was good: False ->profit -1595.13677812
on 2012-01-13 00:00:00+00:00 buy at 81.69 was good: False ->profit -1816.86677275
on 2012-09-06 00:00:00+00:00 sell at 91.53 was good: True ->profit 1204.55380095
on 2012-10-05 00:00:00+00:00 buy at 94.51 was good: False ->profit -325.576313777
on 2013-03-19 00:00:00+00:00 sell at 92.36 was good: False ->profit -227.489154587
on 2013-04-10 00:00:00+00:00 buy at 95.42 was good: False ->profit -331.312256388
on 2015-08-21 00:00:00+00:00 sell at 209.21 was good: True ->profit 11925.1729197
on 2015-09-23 00:00:00+00:00 buy at 203.57 was good: True ->profit 269.585583863
winrate:  0.3
total profitability 7341.16926717  trading size:  10000.0
total return 7 yrs: % 73.4116926717
Computing trades for MCD
on 2011-01-03 00:00:00+00:00 buy at 76.76 was good: None ->profit 0.0
on 2011-04-14 00:00:00+00:00 sell at 76.89 was good: True ->profit 16.9359041167
on 2011-06-09 00:00:00+00:00 buy at 81.14 was good: False ->profit -552.737677201
on 2012-06-13 00:00:00+00:00 sell at 87.49 was good: True ->profit 782.597978802
on 2012-12-07 00:00:00+00:00 buy at 88.09 was good: False ->profit -68.5792662019
on 2013-09-25 00:00:00+00:00 sell at 97.81 was good: True ->profit 1103.41695993
on 2014-03-05 00:00:00+00:00 buy at 94.97 was good: True ->profit 290.358859012
on 2014-10-03 00:00:00+00:00 sell at 94.12 was good: False ->profit -89.5019479836
on 2015-02-20 00:00:00+00:00 buy at 94.17 was good: False ->profit -5.31236719082
on 2016-09-15 00:00:00+00:00 sell at 115.16 was good: True ->profit 2228.94764787
winrate:  0.5
total profitability 3706.12609115  trading size:  10000.0
total return 7 yrs: % 37.0612609115
Computing trades for CSCO
on 2011-01-03 00:00:00+00:00 sell at 20.23 was good: None ->profit 0.0
on 2011-11-17 00:00:00+00:00 buy at 18.8 was good: True ->profit 706.870983688
on 2012-07-03 00:00:00+00:00 sell at 17.07 was good: False ->profit -920.212765957
on 2012-11-15 00:00:00+00:00 buy at 17.66 was good: False ->profit -345.635618043
on 2013-12-06 00:00:00+00:00 sell at 20.91 was good: True ->profit 1840.31710079
on 2014-05-07 00:00:00+00:00 buy at 22.72 was good: False ->profit -865.614538498
on 2015-09-14 00:00:00+00:00 sell at 26.02 was good: True ->profit 1452.46478873
on 2016-02-29 00:00:00+00:00 buy at 26.4 was good: False ->profit -146.041506533
on 2016-03-17 00:00:00+00:00 sell at 27.88 was good: True ->profit 560.606060606
on 2016-06-09 00:00:00+00:00 buy at 29.14 was good: False ->profit -451.93687231
winrate:  0.4
total profitability 1830.81763248  trading size:  10000.0
total return 7 yrs: % 18.3081763248
Computing trades for O
on 2011-01-03 00:00:00+00:00 buy at 34.2 was good: None ->profit 0.0
on 2011-08-11 00:00:00+00:00 sell at 30.76 was good: False ->profit -1005.84795322
on 2011-12-28 00:00:00+00:00 buy at 35.4145 was good: False ->profit -1513.16644993
on 2013-01-25 00:00:00+00:00 sell at 43.71 was good: True ->profit 2342.40212342
on 2013-02-22 00:00:00+00:00 buy at 44.31 was good: False ->profit -137.268359643
on 2013-09-16 00:00:00+00:00 sell at 39.21 was good: False ->profit -1150.9817197
on 2014-03-05 00:00:00+00:00 buy at 44.33 was good: False ->profit -1305.78933945
on 2015-06-30 00:00:00+00:00 sell at 44.39 was good: True ->profit 13.5348522445
on 2015-11-04 00:00:00+00:00 buy at 50.13 was good: False ->profit -1293.08402793
on 2016-12-06 00:00:00+00:00 sell at 54.71 was good: True ->profit 913.624576102
winrate:  0.3
total profitability -3136.57629811  trading size:  10000.0
total return 7 yrs: % -31.3657629811
Computing trades for CAT
on 2011-01-03 00:00:00+00:00 buy at 93.69 was good: None ->profit 0.0
on 2011-08-29 00:00:00+00:00 sell at 85.1 was good: False ->profit -916.853452877
on 2012-02-13 00:00:00+00:00 buy at 111.72 was good: False ->profit -3128.08460635
on 2012-07-10 00:00:00+00:00 sell at 83.14 was good: False ->profit -2558.1811672
on 2012-12-14 00:00:00+00:00 buy at 88.38 was good: False ->profit -630.262208323
on 2013-06-19 00:00:00+00:00 sell at 84.56 was good: False ->profit -432.224485178
on 2013-11-26 00:00:00+00:00 buy at 84.4 was good: True ->profit 18.9214758751
on 2013-12-04 00:00:00+00:00 sell at 83.86 was good: False ->profit -63.981042654
on 2013-12-11 00:00:00+00:00 buy at 86.43 was good: False ->profit -306.463152874
on 2014-11-07 00:00:00+00:00 sell at 100.89 was good: True ->profit 1673.03019785
on 2015-08-03 00:00:00+00:00 buy at 78.66 was good: True ->profit 2203.38983051
on 2015-09-25 00:00:00+00:00 sell at 65.79 was good: False ->profit -1636.15560641
on 2016-05-09 00:00:00+00:00 buy at 73.37 was good: False ->profit -1152.15078279
winrate:  0.230769230769
total profitability -6929.01500042  trading size:  10000.0
total return 7 yrs: % -69.2901500042
Computing trades for MMM
on 2011-01-03 00:00:00+00:00 buy at 86.35 was good: None ->profit 0.0
on 2011-09-06 00:00:00+00:00 sell at 79.36 was good: False ->profit -809.496236248
on 2012-02-23 00:00:00+00:00 buy at 87.77 was good: False ->profit -1059.72782258
on 2015-07-13 00:00:00+00:00 sell at 154.95 was good: True ->profit 7654.09593255
on 2016-01-28 00:00:00+00:00 buy at 145.52 was good: True ->profit 608.583414005
winrate:  0.4
total profitability 6393.45528773  trading size:  10000.0
total return 7 yrs: % 63.9345528773
Computing trades for GE
on 2011-01-03 00:00:00+00:00 buy at 18.31 was good: None ->profit 0.0
on 2011-07-26 00:00:00+00:00 sell at 18.96 was good: True ->profit 354.997269252
on 2012-02-06 00:00:00+00:00 buy at 19.02 was good: False ->profit -31.6455696203
on 2014-05-19 00:00:00+00:00 sell at 26.66 was good: True ->profit 4016.82439537
on 2014-07-09 00:00:00+00:00 buy at 26.37 was good: True ->profit 108.777194299
on 2014-10-21 00:00:00+00:00 sell at 25.03 was good: False ->profit -508.153204399
on 2015-03-17 00:00:00+00:00 buy at 25.45 was good: False ->profit -167.79864163
on 2015-04-08 00:00:00+00:00 sell at 25.01 was good: False ->profit -172.888015717
on 2015-05-11 00:00:00+00:00 buy at 27.35 was good: False ->profit -935.6257497
on 2015-10-15 00:00:00+00:00 sell at 27.59 was good: True ->profit 87.7513711152
on 2015-11-27 00:00:00+00:00 buy at 30.37 was good: False ->profit -1007.61145343
on 2016-05-27 00:00:00+00:00 sell at 30.01 was good: False ->profit -118.538030952
on 2016-06-07 00:00:00+00:00 buy at 30.13 was good: False ->profit -39.9866711096
on 2016-12-09 00:00:00+00:00 sell at 31.51 was good: True ->profit 458.015267176
winrate:  0.357142857143
total profitability 2044.11816066  trading size:  10000.0
total return 7 yrs: % 20.4411816066
Computing trades for MSFT
on 2011-01-03 00:00:00+00:00 buy at 27.91 was good: None ->profit 0.0
on 2011-05-25 00:00:00+00:00 sell at 24.15 was good: False ->profit -1347.18738803
on 2011-09-28 00:00:00+00:00 buy at 25.67 was good: False ->profit -629.399585921
on 2012-08-14 00:00:00+00:00 sell at 30.2 was good: True ->profit 1764.70588235
on 2013-05-03 00:00:00+00:00 buy at 33.16 was good: False ->profit -980.132450331
on 2013-12-04 00:00:00+00:00 sell at 38.31 was good: True ->profit 1553.07599517
on 2013-12-12 00:00:00+00:00 buy at 37.6 was good: True ->profit 185.330200992
on 2015-03-31 00:00:00+00:00 sell at 40.94 was good: True ->profit 888.29787234
on 2015-07-20 00:00:00+00:00 buy at 46.61 was good: False ->profit -1384.95359062
on 2016-05-18 00:00:00+00:00 sell at 50.51 was good: True ->profit 836.730315383
on 2016-07-21 00:00:00+00:00 buy at 55.94 was good: False ->profit -1075.0346466
winrate:  0.454545454545
total profitability -188.567395267  trading size:  10000.0
total return 7 yrs: % -1.88567395267
Computing trades for EXPE
on 2011-01-03 00:00:00+00:00 buy at 25.1 was good: None ->profit 0.0
on 2011-03-08 00:00:00+00:00 sell at 21.01 was good: False ->profit -1629.48207171
on 2011-07-19 00:00:00+00:00 buy at 29.74 was good: False ->profit -4155.16420752
on 2011-12-16 00:00:00+00:00 sell at 27.59 was good: False ->profit -722.932078009
on 2012-02-09 00:00:00+00:00 buy at 33.66 was good: False ->profit -2200.07249003
on 2013-06-26 00:00:00+00:00 sell at 57.12 was good: True ->profit 6969.6969697
on 2014-01-13 00:00:00+00:00 buy at 69.6 was good: False ->profit -2184.87394958
on 2016-03-03 00:00:00+00:00 sell at 106.69 was good: True ->profit 5329.02298851
on 2016-07-25 00:00:00+00:00 buy at 117.2 was good: False ->profit -985.097010029
winrate:  0.222222222222
total profitability 421.098151318  trading size:  10000.0
total return 7 yrs: % 4.21098151318
Computing trades for UNH
on 2011-01-03 00:00:00+00:00 buy at 36.12 was good: None ->profit 0.0
on 2011-11-09 00:00:00+00:00 sell at 46.18 was good: True ->profit 2785.16057586
on 2012-01-31 00:00:00+00:00 buy at 51.08 was good: False ->profit -1061.06539628
on 2012-09-11 00:00:00+00:00 sell at 53.76 was good: True ->profit 524.667188724
on 2013-01-24 00:00:00+00:00 buy at 55.91 was good: False ->profit -399.925595238
on 2013-03-15 00:00:00+00:00 sell at 54.74 was good: False ->profit -209.264890002
on 2013-04-12 00:00:00+00:00 buy at 62.98 was good: False ->profit -1505.29777128
on 2014-02-20 00:00:00+00:00 sell at 73.55 was good: True ->profit 1678.31057479
on 2014-03-04 00:00:00+00:00 buy at 77.11 was good: False ->profit -484.024473148
on 2015-12-03 00:00:00+00:00 sell at 117.68 was good: True ->profit 5261.31500454
on 2016-04-05 00:00:00+00:00 buy at 129.495 was good: False ->profit -1003.99388171
winrate:  0.363636363636
total profitability 5585.88133625  trading size:  10000.0
total return 7 yrs: % 55.8588133625
Computing trades for UTX
on 2011-01-03 00:00:00+00:00 buy at 78.73 was good: None ->profit 0.0
on 2011-09-09 00:00:00+00:00 sell at 72.62 was good: False ->profit -776.070113045
on 2012-02-16 00:00:00+00:00 buy at 82.6 was good: False ->profit -1374.27705866
on 2012-07-25 00:00:00+00:00 sell at 71.95 was good: False ->profit -1289.34624697
on 2012-11-06 00:00:00+00:00 buy at 77.91 was good: False ->profit -828.353022933
on 2014-08-28 00:00:00+00:00 sell at 109.08 was good: True ->profit 4000.77011937
on 2015-01-29 00:00:00+00:00 buy at 116.97 was good: False ->profit -723.322332233
on 2015-07-21 00:00:00+00:00 sell at 110.48 was good: False ->profit -554.843122168
on 2016-02-29 00:00:00+00:00 buy at 97.69 was good: True ->profit 1157.67559739
on 2016-04-28 00:00:00+00:00 sell at 105.95 was good: True ->profit 845.531784215
on 2016-06-01 00:00:00+00:00 buy at 100.61 was good: True ->profit 504.011326097
winrate:  0.363636363636
total profitability 961.776931061  trading size:  10000.0
total return 7 yrs: % 9.61776931061
Computing trades for TRV
on 2011-01-03 00:00:00+00:00 buy at 55.71 was good: None ->profit 0.0
on 2011-08-29 00:00:00+00:00 sell at 48.29 was good: False ->profit -1331.89732544
on 2012-01-23 00:00:00+00:00 buy at 61.58 was good: False ->profit -2752.12259267
on 2013-09-26 00:00:00+00:00 sell at 86.01 was good: True ->profit 3967.19714193
on 2013-11-15 00:00:00+00:00 buy at 88.24 was good: False ->profit -259.272177654
on 2014-04-21 00:00:00+00:00 sell at 86.68 was good: False ->profit -176.79057117
on 2014-06-23 00:00:00+00:00 buy at 94.6 was good: False ->profit -913.705583756
on 2015-06-30 00:00:00+00:00 sell at 96.17 was good: True ->profit 165.961945032
on 2015-10-30 00:00:00+00:00 buy at 114.3 was good: False ->profit -1885.20328585
on 2016-05-12 00:00:00+00:00 sell at 111.99 was good: False ->profit -202.099737533
on 2016-06-13 00:00:00+00:00 buy at 113.83 was good: False ->profit -164.300383963
on 2016-12-30 00:00:00+00:00 sell at 122.3 was good: True ->profit 744.092067118
winrate:  0.25
total profitability -2808.14050395  trading size:  10000.0
total return 7 yrs: % -28.0814050395
Computing trades for HD
on 2011-01-03 00:00:00+00:00 buy at 35.06 was good: None ->profit 0.0
on 2011-08-05 00:00:00+00:00 sell at 31.72 was good: False ->profit -952.65259555
on 2011-12-22 00:00:00+00:00 buy at 42.02 was good: False ->profit -3247.16267339
on 2013-12-09 00:00:00+00:00 sell at 79.84 was good: True ->profit 9000.47596383
on 2014-01-15 00:00:00+00:00 buy at 81.02 was good: False ->profit -147.795591182
on 2014-06-06 00:00:00+00:00 sell at 80.37 was good: False ->profit -80.2271044187
on 2014-07-03 00:00:00+00:00 buy at 81.97 was good: False ->profit -199.07925843
on 2016-05-27 00:00:00+00:00 sell at 134.055 was good: True ->profit 6354.15395877
on 2016-06-08 00:00:00+00:00 buy at 129.949 was good: True ->profit 306.292193503
on 2016-10-06 00:00:00+00:00 sell at 127.57 was good: False ->profit -183.071820483
on 2016-11-28 00:00:00+00:00 buy at 131.57 was good: False ->profit -313.553343263
on 2016-12-01 00:00:00+00:00 sell at 129.46 was good: False ->profit -160.370905222
winrate:  0.25
total profitability 10377.0088242  trading size:  10000.0
total return 7 yrs: % 103.770088242
Computing trades for DIS
on 2011-01-03 00:00:00+00:00 buy at 37.53 was good: None ->profit 0.0
on 2011-08-02 00:00:00+00:00 sell at 38.43 was good: True ->profit 239.808153477
on 2012-02-07 00:00:00+00:00 buy at 40.43 was good: False ->profit -520.426749935
on 2015-11-18 00:00:00+00:00 sell at 116.11 was good: True ->profit 18718.7731882
on 2016-02-29 00:00:00+00:00 buy at 95.31 was good: True ->profit 1791.40470244
on 2016-03-28 00:00:00+00:00 sell at 97.229 was good: True ->profit 201.342986046
on 2016-07-18 00:00:00+00:00 buy at 99.79 was good: False ->profit -263.398780199
on 2016-09-20 00:00:00+00:00 sell at 92.65 was good: False ->profit -715.502555366
winrate:  0.5
total profitability 19452.0009447  trading size:  10000.0
total return 7 yrs: % 194.520009447

Next, I am plotting regressions against trade profits vs indicator values.¶

Unfortunately, I don't see many obviously linear relationships between indicator values and magnitude of loss or profit!

If I see an rsquared that is high, I plot it. Otherwise I just print the rsquared.

Interestingly it seems that there is some negative correlation between volatility and profitable outcomes. I might have predicted the opposite.

from scipy.stats import linregress

def plotWithRegression(indicator, symbol, x, y):
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    rsquared = r_value**2
    print('doing regression on ', symbol, indicator)
    print('rvalue, r-squared results: ', r_value, str(rsquared))
    if rsquared > 0.4: 
        fit = np.polyfit(x, y, 1)
        fit_fn = np.poly1d(fit)
        plt.title(indicator + ' at entry of trade vs profit on trade for' + symbol)
        #plt.scatter(x_adx, y_profit)
        plt.plot(x, y, 'yo', x, fit_fn(x), '--k')
        plt.show()

def plotIndicators(symbol, signal_list):
    trade_dict = signal_list['trade_dict']
    keylist = trade_dict.keys()
    keylist.sort()
    #print keylist 

    y_profit = []
    x_adx = []
    x_atr = []
    x_rsi = []
    x_spy_beta = []
    x_sma_diff_1 = []
    x_sma_diff_2 = []
    x_volatility = []
    x_volatility_short = []
    x_avg_vol = []
    x_atr_ratio = []
    for key in keylist:
        value = trade_dict[key]
        y_profit.append(value['profits'])
        x_adx.append(value['adx'])
        x_atr.append(value['atr'])
        x_rsi.append(value['rsi'])
        x_spy_beta.append(value['spy_beta'])
        x_sma_diff_1.append(value['sma_diff_1'])
        x_sma_diff_2.append(value['sma_diff_2'])
        x_volatility.append(value['volatility'])
        x_volatility_short.append(value['volatility_short'])
        x_avg_vol.append(value['avg_vol'])
        x_atr_ratio.append(value['atr_ratio'])
    
    plotWithRegression('ATR', symbol, x_atr, y_profit)
    plotWithRegression('RSI', symbol, x_rsi, y_profit)
    plotWithRegression('ADX', symbol, x_adx, y_profit)
    plotWithRegression('SMA Difference 1', symbol, x_sma_diff_1, y_profit)
    plotWithRegression('SMA Difference 2', symbol, x_sma_diff_2, y_profit)
    plotWithRegression('SPY Beta', symbol, x_spy_beta, y_profit)
    plotWithRegression('ATR Ratio', symbol, x_atr_ratio, y_profit)
    plotWithRegression('Avg Vol', symbol, x_avg_vol, y_profit)
    plotWithRegression('Volatility', symbol, x_volatility, y_profit)
    plotWithRegression('Volatility Shorter Duration', symbol, x_volatility_short, y_profit)

    
for k, v in signals.items():
    print("plotting indicators for " + k)
    plotIndicators(k, v)

plotting indicators for WMT
('doing regression on ', u'WMT', 'ATR')
('rvalue, r-squared results: ', -0.25183612363745017, '0.0634214331687')
('doing regression on ', u'WMT', 'RSI')
('rvalue, r-squared results: ', 0.067699574272361313, '0.00458323235666')
('doing regression on ', u'WMT', 'ADX')
('rvalue, r-squared results: ', -0.57446647220786418, '0.330011727691')
('doing regression on ', u'WMT', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.22810989825091438, '0.05203412568')
('doing regression on ', u'WMT', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.24478574463070038, '0.0599200607744')
('doing regression on ', u'WMT', 'SPY Beta')
('rvalue, r-squared results: ', -0.26749215448555613, '0.0715520527113')
('doing regression on ', u'WMT', 'ATR Ratio')
('rvalue, r-squared results: ', -0.060739963543928348, '0.00368934317132')
('doing regression on ', u'WMT', 'Avg Vol')
('rvalue, r-squared results: ', 0.050565472652617155, '0.00255686702458')
('doing regression on ', u'WMT', 'Volatility')
('rvalue, r-squared results: ', 0.34913124527449751, '0.121892626427')
('doing regression on ', u'WMT', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', 0.34913124527449757, '0.121892626427')
plotting indicators for PG
('doing regression on ', u'PG', 'ATR')
('rvalue, r-squared results: ', 0.18665984966424151, '0.0348418994767')
('doing regression on ', u'PG', 'RSI')
('rvalue, r-squared results: ', 0.49285125636522403, '0.242902360901')
('doing regression on ', u'PG', 'ADX')
('rvalue, r-squared results: ', 0.4724263453253037, '0.223186651757')
('doing regression on ', u'PG', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.16673185051791994, '0.0277995099771')
('doing regression on ', u'PG', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.16747283662850018, '0.0280471510084')
('doing regression on ', u'PG', 'SPY Beta')
('rvalue, r-squared results: ', 0.18750941729463702, '0.0351597815742')
('doing regression on ', u'PG', 'ATR Ratio')
('rvalue, r-squared results: ', 0.010094962954421464, '0.000101908277051')
('doing regression on ', u'PG', 'Avg Vol')
('rvalue, r-squared results: ', 0.60463211129711569, '0.365579990012')
('doing regression on ', u'PG', 'Volatility')
('rvalue, r-squared results: ', -0.063726305747107512, '0.00406104204417')
('doing regression on ', u'PG', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', -0.063726305747107234, '0.00406104204417')
plotting indicators for XOM
('doing regression on ', u'XOM', 'ATR')
('rvalue, r-squared results: ', 0.45741751481140769, '0.209230782856')
('doing regression on ', u'XOM', 'RSI')
('rvalue, r-squared results: ', -0.26384578627468142, '0.0696145989349')
('doing regression on ', u'XOM', 'ADX')
('rvalue, r-squared results: ', -0.081647763519759803, '0.00666635728778')
('doing regression on ', u'XOM', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.33825205719489548, '0.114414454197')
('doing regression on ', u'XOM', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.42579954178290019, '0.181305249783')
('doing regression on ', u'XOM', 'SPY Beta')
('rvalue, r-squared results: ', 0.29737653260122865, '0.0884328021419')
('doing regression on ', u'XOM', 'ATR Ratio')
('rvalue, r-squared results: ', 0.49694393174052665, '0.246953271294')
('doing regression on ', u'XOM', 'Avg Vol')
('rvalue, r-squared results: ', 0.10869107172598103, '0.0118137490729')
('doing regression on ', u'XOM', 'Volatility')
('rvalue, r-squared results: ', 0.14161783133154424, '0.020055610151')
('doing regression on ', u'XOM', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', 0.1416178313315441, '0.020055610151')
plotting indicators for GDOT
('doing regression on ', u'GDOT', 'ATR')
('rvalue, r-squared results: ', 0.094731084039506003, '0.0089739782833')
('doing regression on ', u'GDOT', 'RSI')
('rvalue, r-squared results: ', -0.6218030539499062, '0.386639037901')
('doing regression on ', u'GDOT', 'ADX')
('rvalue, r-squared results: ', 0.091859679282278422, '0.00843820067784')
('doing regression on ', u'GDOT', 'SMA Difference 1')
('rvalue, r-squared results: ', 0.37039629048394374, '0.137193412004')
('doing regression on ', u'GDOT', 'SMA Difference 2')
('rvalue, r-squared results: ', 0.37150553428851241, '0.138016362007')
('doing regression on ', u'GDOT', 'SPY Beta')
('rvalue, r-squared results: ', -0.51435447336554774, '0.264560524271')
('doing regression on ', u'GDOT', 'ATR Ratio')
('rvalue, r-squared results: ', 0.5030544447788623, '0.253063774412')
('doing regression on ', u'GDOT', 'Avg Vol')
('rvalue, r-squared results: ', -0.35713731460980641, '0.127547061487')
('doing regression on ', u'GDOT', 'Volatility')
('rvalue, r-squared results: ', 0.42974615420769857, '0.184681757056')
('doing regression on ', u'GDOT', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', 0.42974615420769846, '0.184681757056')
plotting indicators for BA
('doing regression on ', u'BA', 'ATR')
('rvalue, r-squared results: ', 0.13651504655121985, '0.0186363579349')
('doing regression on ', u'BA', 'RSI')
('rvalue, r-squared results: ', 0.14639495462885474, '0.0214314827408')
('doing regression on ', u'BA', 'ADX')
('rvalue, r-squared results: ', -0.046891670533307624, '0.0021988287654')
('doing regression on ', u'BA', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.32036066407789648, '0.102630955088')
('doing regression on ', u'BA', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.31725782179892764, '0.100652525493')
('doing regression on ', u'BA', 'SPY Beta')
('rvalue, r-squared results: ', -0.089063500171309568, '0.00793230706276')
('doing regression on ', u'BA', 'ATR Ratio')
('rvalue, r-squared results: ', -0.13136513405092409, '0.0172567984442')
('doing regression on ', u'BA', 'Avg Vol')
('rvalue, r-squared results: ', -0.2010120493539789, '0.0404058439855')
('doing regression on ', u'BA', 'Volatility')
('rvalue, r-squared results: ', -0.30514340760493092, '0.0931124992047')
('doing regression on ', u'BA', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', -0.30514340760493086, '0.0931124992047')
plotting indicators for INTC
('doing regression on ', u'INTC', 'ATR')
('rvalue, r-squared results: ', 0.0486539882731752, '0.00236721057489')
('doing regression on ', u'INTC', 'RSI')
('rvalue, r-squared results: ', 0.36473975661203861, '0.133035090053')
('doing regression on ', u'INTC', 'ADX')
('rvalue, r-squared results: ', 0.41648472400121161, '0.173459525326')
('doing regression on ', u'INTC', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.53162561775193928, '0.28262579745')
('doing regression on ', u'INTC', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.52176680609175607, '0.272240599939')
('doing regression on ', u'INTC', 'SPY Beta')
('rvalue, r-squared results: ', 0.23123665192112172, '0.0534703891917')
('doing regression on ', u'INTC', 'ATR Ratio')
('rvalue, r-squared results: ', 0.2086958885580254, '0.043553973901')
('doing regression on ', u'INTC', 'Avg Vol')
('rvalue, r-squared results: ', -0.076843023287429285, '0.00590485022795')
('doing regression on ', u'INTC', 'Volatility')
('rvalue, r-squared results: ', 0.26358136518177966, '0.0694751360711')
('doing regression on ', u'INTC', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', 0.26358136518177944, '0.0694751360711')
plotting indicators for CVX
('doing regression on ', u'CVX', 'ATR')
('rvalue, r-squared results: ', 0.70800056312931914, '0.501264797391')

('doing regression on ', u'CVX', 'RSI')
('rvalue, r-squared results: ', -0.10419074864962694, '0.0108557121042')
('doing regression on ', u'CVX', 'ADX')
('rvalue, r-squared results: ', -0.065363536367873551, '0.00427239188651')
('doing regression on ', u'CVX', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.034782454531546748, '0.00120981914324')
('doing regression on ', u'CVX', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.071635005700200566, '0.00513157404167')
('doing regression on ', u'CVX', 'SPY Beta')
('rvalue, r-squared results: ', 0.58975135263848655, '0.347806657939')
('doing regression on ', u'CVX', 'ATR Ratio')
('rvalue, r-squared results: ', 0.78242902992175445, '0.612195186864')

('doing regression on ', u'CVX', 'Avg Vol')
('rvalue, r-squared results: ', 0.1267719594920014, '0.0160711297134')
('doing regression on ', u'CVX', 'Volatility')
('rvalue, r-squared results: ', 0.4390334559111701, '0.192750375409')
('doing regression on ', u'CVX', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', 0.4390334559111701, '0.192750375409')
plotting indicators for LMT
('doing regression on ', u'LMT', 'ATR')
('rvalue, r-squared results: ', 0.21230177588232807, '0.0450720440428')
('doing regression on ', u'LMT', 'RSI')
('rvalue, r-squared results: ', -0.027094768256027028, '0.000734126466848')
('doing regression on ', u'LMT', 'ADX')
('rvalue, r-squared results: ', 0.039588461814015763, '0.0015672463088')
('doing regression on ', u'LMT', 'SMA Difference 1')
('rvalue, r-squared results: ', 0.067464728191048359, '0.00455148954989')
('doing regression on ', u'LMT', 'SMA Difference 2')
('rvalue, r-squared results: ', 0.063729575598863095, '0.00406145880601')
('doing regression on ', u'LMT', 'SPY Beta')
('rvalue, r-squared results: ', 0.81531095867425174, '0.664731959334')

('doing regression on ', u'LMT', 'ATR Ratio')
('rvalue, r-squared results: ', -0.31081335771535096, '0.0966049433343')
('doing regression on ', u'LMT', 'Avg Vol')
('rvalue, r-squared results: ', 0.41734796363600807, '0.174179322751')
('doing regression on ', u'LMT', 'Volatility')
('rvalue, r-squared results: ', -0.43865565132236622, '0.192418780437')
('doing regression on ', u'LMT', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', -0.43865565132236634, '0.192418780437')
plotting indicators for MCD
('doing regression on ', u'MCD', 'ATR')
('rvalue, r-squared results: ', 0.51434587358603368, '0.264551677675')
('doing regression on ', u'MCD', 'RSI')
('rvalue, r-squared results: ', -0.055506604548737522, '0.00308098314853')
('doing regression on ', u'MCD', 'ADX')
('rvalue, r-squared results: ', 0.25019541488651181, '0.0625977456302')
('doing regression on ', u'MCD', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.41313480212718606, '0.170680364729')
('doing regression on ', u'MCD', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.48251491144969277, '0.232820639771')
('doing regression on ', u'MCD', 'SPY Beta')
('rvalue, r-squared results: ', 0.064263619757735826, '0.00412981282437')
('doing regression on ', u'MCD', 'ATR Ratio')
('rvalue, r-squared results: ', 0.19779070836087795, '0.0391211643139')
('doing regression on ', u'MCD', 'Avg Vol')
('rvalue, r-squared results: ', 0.021044825035469585, '0.000442884660774')
('doing regression on ', u'MCD', 'Volatility')
('rvalue, r-squared results: ', 0.46779625413368375, '0.218833335382')
('doing regression on ', u'MCD', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', 0.46779625413368353, '0.218833335382')
plotting indicators for CSCO
('doing regression on ', u'CSCO', 'ATR')
('rvalue, r-squared results: ', 0.23411642837394414, '0.0548105020346')
('doing regression on ', u'CSCO', 'RSI')
('rvalue, r-squared results: ', -0.36258753615133171, '0.131469721372')
('doing regression on ', u'CSCO', 'ADX')
('rvalue, r-squared results: ', 0.20389663674160011, '0.0415738384745')
('doing regression on ', u'CSCO', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.1473055390785489, '0.0216989218432')
('doing regression on ', u'CSCO', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.1625784162664152, '0.0264317414357')
('doing regression on ', u'CSCO', 'SPY Beta')
('rvalue, r-squared results: ', -0.29602075888947299, '0.0876282896935')
('doing regression on ', u'CSCO', 'ATR Ratio')
('rvalue, r-squared results: ', 0.10489334509740664, '0.0110026138457')
('doing regression on ', u'CSCO', 'Avg Vol')
('rvalue, r-squared results: ', 0.28268571076253768, '0.0799112110693')
('doing regression on ', u'CSCO', 'Volatility')
('rvalue, r-squared results: ', -0.29018641712339666, '0.0842081566829')
('doing regression on ', u'CSCO', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', -0.2901864171233966, '0.0842081566829')
plotting indicators for O
('doing regression on ', u'O', 'ATR')
('rvalue, r-squared results: ', 0.025208760242137411, '0.000635481592946')
('doing regression on ', u'O', 'RSI')
('rvalue, r-squared results: ', 0.17741385830085477, '0.0314756771172')
('doing regression on ', u'O', 'ADX')
('rvalue, r-squared results: ', 0.42514229474146054, '0.180745970778')
('doing regression on ', u'O', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.11474847494097312, '0.0131672125013')
('doing regression on ', u'O', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.14808851959380809, '0.0219302096355')
('doing regression on ', u'O', 'SPY Beta')
('rvalue, r-squared results: ', -0.63070609960378898, '0.397790184077')
('doing regression on ', u'O', 'ATR Ratio')
('rvalue, r-squared results: ', -0.18032354684897786, '0.0325165815482')
('doing regression on ', u'O', 'Avg Vol')
('rvalue, r-squared results: ', 0.59478888104279581, '0.353773813012')
('doing regression on ', u'O', 'Volatility')
('rvalue, r-squared results: ', -0.48046015222820859, '0.230841957879')
('doing regression on ', u'O', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', -0.48046015222820848, '0.230841957879')
plotting indicators for CAT
('doing regression on ', u'CAT', 'ATR')
('rvalue, r-squared results: ', -0.29028913346369101, '0.0842677810071')
('doing regression on ', u'CAT', 'RSI')
('rvalue, r-squared results: ', 0.061777416553830754, '0.00381644919607')
('doing regression on ', u'CAT', 'ADX')
('rvalue, r-squared results: ', -0.22497823868241099, '0.0506152078806')
('doing regression on ', u'CAT', 'SMA Difference 1')
('rvalue, r-squared results: ', 0.056280516955308169, '0.00316749658876')
('doing regression on ', u'CAT', 'SMA Difference 2')
('rvalue, r-squared results: ', 0.029646823584729715, '0.000878934148664')
('doing regression on ', u'CAT', 'SPY Beta')
('rvalue, r-squared results: ', -0.33346908997621011, '0.11120163397')
('doing regression on ', u'CAT', 'ATR Ratio')
('rvalue, r-squared results: ', -0.2736892085475004, '0.0749057828754')
('doing regression on ', u'CAT', 'Avg Vol')
('rvalue, r-squared results: ', -0.495543299356421, '0.245563161537')
('doing regression on ', u'CAT', 'Volatility')
('rvalue, r-squared results: ', -0.72560164479482936, '0.526497746929')

('doing regression on ', u'CAT', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', -0.7256016447948298, '0.526497746929')

plotting indicators for MMM
('doing regression on ', u'MMM', 'ATR')
('rvalue, r-squared results: ', 0.17699966847337828, '0.0313288826397')
('doing regression on ', u'MMM', 'RSI')
('rvalue, r-squared results: ', -0.60473321743844577, '0.365702264273')
('doing regression on ', u'MMM', 'ADX')
('rvalue, r-squared results: ', -0.18511233187173748, '0.034266575411')
('doing regression on ', u'MMM', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.43278144018100584, '0.187299774965')
('doing regression on ', u'MMM', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.40911145444792796, '0.16737218216')
('doing regression on ', u'MMM', 'SPY Beta')
('rvalue, r-squared results: ', 0.12138755508445012, '0.0147349385294')
('doing regression on ', u'MMM', 'ATR Ratio')
('rvalue, r-squared results: ', -0.24698802298069422, '0.0610030834959')
('doing regression on ', u'MMM', 'Avg Vol')
('rvalue, r-squared results: ', 0.05814670169035157, '0.00338103891747')
('doing regression on ', u'MMM', 'Volatility')
('rvalue, r-squared results: ', -0.78893933097659286, '0.622425267962')

('doing regression on ', u'MMM', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', -0.78893933097659297, '0.622425267962')

plotting indicators for GE
('doing regression on ', u'GE', 'ATR')
('rvalue, r-squared results: ', -0.38783857902074542, '0.150418763377')
('doing regression on ', u'GE', 'RSI')
('rvalue, r-squared results: ', -0.17005113548647249, '0.0289173886802')
('doing regression on ', u'GE', 'ADX')
('rvalue, r-squared results: ', -0.22464027146938439, '0.0504632515658')
('doing regression on ', u'GE', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.24233198871546544, '0.0587247927548')
('doing regression on ', u'GE', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.24398733947422327, '0.0595298218237')
('doing regression on ', u'GE', 'SPY Beta')
('rvalue, r-squared results: ', 0.16616917093898578, '0.0276121933705')
('doing regression on ', u'GE', 'ATR Ratio')
('rvalue, r-squared results: ', -0.37628601939148493, '0.141591168389')
('doing regression on ', u'GE', 'Avg Vol')
('rvalue, r-squared results: ', -0.34093003664428473, '0.116233289886')
('doing regression on ', u'GE', 'Volatility')
('rvalue, r-squared results: ', -0.16427699921723291, '0.0269869324718')
('doing regression on ', u'GE', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', -0.16427699921723296, '0.0269869324718')
plotting indicators for MSFT
('doing regression on ', u'MSFT', 'ATR')
('rvalue, r-squared results: ', -0.31247607625654622, '0.0976412982327')
('doing regression on ', u'MSFT', 'RSI')
('rvalue, r-squared results: ', 0.0058419999659967449, '3.41289636027e-05')
('doing regression on ', u'MSFT', 'ADX')
('rvalue, r-squared results: ', 0.34206346976490332, '0.117007417348')
('doing regression on ', u'MSFT', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.040521607076207292, '0.00164200064004')
('doing regression on ', u'MSFT', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.039068972103983787, '0.00152638458126')
('doing regression on ', u'MSFT', 'SPY Beta')
('rvalue, r-squared results: ', -0.016557088823342177, '0.000274137190304')
('doing regression on ', u'MSFT', 'ATR Ratio')
('rvalue, r-squared results: ', -0.4369513354579142, '0.190926469558')
('doing regression on ', u'MSFT', 'Avg Vol')
('rvalue, r-squared results: ', -0.30937859116779043, '0.095715112673')
('doing regression on ', u'MSFT', 'Volatility')
('rvalue, r-squared results: ', 0.10923725734579028, '0.0119327783924')
('doing regression on ', u'MSFT', 'Volatility Shorter Duration')
('rvalue, r-squared results: ', 0.10923725734579023, '0.0119327783924')
plotting indicators for EXPE
('doing regression on ', u'EXPE', 'ATR')
('rvalue, r-squared results: ', 0.61484747955496577, '0.378037423115')
('doing regression on ', u'EXPE', 'RSI')
('rvalue, r-squared results: ', -0.080305101087510627, '0.00644890926068')
('doing regression on ', u'EXPE', 'ADX')
('rvalue, r-squared results: ', -0.56108219329648046, '0.314813227634')
('doing regression on ', u'EXPE', 'SMA Difference 1')
('rvalue, r-squared results: ', -0.65101012562072635, '0.423814183661')

('doing regression on ', u'EXPE', 'SMA Difference 2')
('rvalue, r-squared results: ', -0.69474683049360897, '0.482673158481')

('doing regression on ', u'EXPE', 'SPY Beta')
('rvalue, r-squared results: ', 0.69835838763270219, '0.487704437577')

I now prepare the features to attempt machine learning on.¶

I will use a simple classifier where there are 2 states:

0 = non profitable trade resulting given the entry signal
1 = profitable trade resulting given the entry signal

I will use an algorithm that can sniff out nonlinear relationships. First I build a set of features then I scale the features using a min/max scaler. The learning algorithm I am using (SVC) works best with feature scaling.

from sklearn import preprocessing
# will take this structure price, ma1, ma2, ma3, adx, rsi, spy_beta, sma_diff_1
features = []
labels = []  # 0 == not profitable, 1 == profitable

for k, v in signals.items():
    trade_dict = v['trade_dict']
    keylist = trade_dict.keys()
    keylist.sort()
    i = 0
    for key in keylist:
        i += 1
        if i == 1:   # skip the first row
            continue
        value = trade_dict[key]
        # i use only features that can be scaled together and are independent of a given security price.
        features.append([
#             value['price'],
#             value['sma50'],
#             value['sma100'],
#             value['sma150'],
            value['adx'],
#             value['atr'],
            value['rsi'],
            value['spy_beta'],
            value['sma_diff_1'],
            value['sma_diff_2'],
            value['volatility'],
            value['volatility_short'],
            value['atr_ratio'],
            value['avg_vol']
        ])
        if value['profitable']:
            labels.append(1)
        else:
            labels.append(0)
    
    # Each set of features should be scaled independently (?)
#     min_max_scaler = preprocessing.MinMaxScaler()
#     features = min_max_scaler.fit_transform(features)
    
        
#print features, labels
print "Len trades: ", len(features)
train_size = int(float(len(features)) * 0.85)

# feature scaling should be done by group of security *probably*, for now I am only using features 
# that are in the same range for every security and can be scaled all together. 
# The exception is perhaps average volume.
min_max_scaler = preprocessing.MinMaxScaler()
features = min_max_scaler.fit_transform(features)

training_features = features[:train_size]
training_labels = labels[:train_size]
test_features = features[train_size:]
test_labels = labels[train_size:]
print "len training", len(training_features), " len test", len(test_features)

Len trades:  211
len training 179  len test 32

I will try SVC and compute accuracy of my test set vs the predicted output.¶

from sklearn import svm
clf = svm.SVC(C=1000, kernel='rbf')
clf.fit(training_features, training_labels)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

pred = clf.predict(test_features)
print('predicted: ', pred)
print('test labels', test_labels)

('predicted: ', array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 1, 1, 1, 0, 1]))
('test labels', [1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0])

from sklearn.metrics import accuracy_score
accuracy_score(test_labels, pred)

0.65625

I will try K Nearest Neighbors and compute accuracy of my test set vs the predicted output.¶

from sklearn.neighbors import KNeighborsClassifier
neighbors = KNeighborsClassifier()
neighbors.fit(training_features, training_labels)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_neighbors=5, p=2, weights='uniform')

prediction = neighbors.predict(test_features)
print('predicted: ', prediction)
print('test labels', test_labels)
accuracy_score(test_labels, prediction)

('predicted: ', array([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 0]))
('test labels', [1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0])

0.5625

Closing thoughts¶

Unsurprisingly, because I did not find any obviously strong linear relationships between any of the indicators and the resulting profitability of my trades, I was not able to build a predictive system that would predict outcomes with a high degree of accuracy.

The majority of the tests I ran I got an accuracy only slighly better than 50% so that means there was not much success in the ML model fitting.

In the future¶

I think the one indicator that did seem to have some correlation with good entry points was volatility. The volatility indicator I am using the one build into Quantopian Factors which is based on annualized volatility calculated from the standard deviation of the stock price. I believe this should related the volatility guage you will see in options pricing.

Given that volatility was useful, I would also like to look at options pricing as a way of learning information about the underlying value. At the very least it would be interesting to look at put/call ratio to see if there is some predictive power there.

Volume also seemed to have some some correlation with good entry points in some tests. I want to try to refine that in the future.

In some tests ATR ratio also seemed to have some correlation with good entry points. I believe that because ATR to price ratio is essentially a measure of volatility that this makes sense, given that volatility also has some correlation with good entry points.

		adx	atr	atr_ratio	avg_vol	buy_signal	close	rsi	sell_signal	sma100	sma150	sma50	sma_diff_1	sma_diff_2	spy_beta	volatility	volatility_short
2011-01-03 00:00:00+00:00	Equity(698 [BA])	11.794604	0.960308	0.014713	1.190407e+08	True	65.269	58.636364	False	65.597119	65.325511	66.271223	0.014477	0.010276	1.326340	0.299217	0.230851
	Equity(1267 [CAT])	61.725952	1.115000	0.011901	2.162524e+08	True	93.690	77.021277	False	79.521566	74.382678	86.055260	0.156926	0.082163	1.408257	0.316105	0.243880
	Equity(1900 [CSCO])	35.117304	0.265385	0.013118	5.231631e+08	False	20.230	63.829787	True	21.317790	21.884060	20.848380	-0.047326	-0.022020	1.061885	0.305511	0.235707
	Equity(2190 [DIS])	23.090536	0.427692	0.011396	1.100276e+08	True	37.530	65.172414	False	34.990427	34.480844	36.584679	0.061015	0.045563	1.048521	0.233794	0.180376
	Equity(3149 [GE])	74.818316	0.258596	0.014123	4.259592e+08	True	18.310	71.256905	False	16.205797	15.879880	16.655275	0.048829	0.027736	1.218541	0.273774	0.211221