Predicting Market Movements with Machine Learning

Linear regression-based strategies
Machine learning-based strategies
Deep learning-based strategies

Using Linear Regression for Market Movement Prediction

import os
import random
import numpy as np
from pylab import mpl, plt

plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
os.environ['PYTHONHASHSEED'] = '0'

x = np.linspace(0, 10)

def set_seeds(seed=100):
    random.seed(seed)
    np.random.seed(seed)
set_seeds()

y = x + np.random.standard_normal(len(x))

reg = np.polyfit(x, y, deg=1)

plt.figure(figsize=(10, 6))
plt.plot(x, y, 'bo', label='data')
plt.plot(x, np.polyval(reg, x), 'r', lw=2.5, label='linear regression')
plt.legend(loc=0)

Using logistic regression to predict market direction

symbol = 'GLD'
data = pd.DataFrame(raw[symbol])
data.rename(columns={symbol: 'price'}, inplace=True)
data['return'] = np.log(data['price'] / data['price'].shift(1))
data.dropna(inplace=True)
lags = 3
cols = []
for lag in range(1, lages+1):
    col = 'lag_{}'.format(lag)
    data[col] = data['return'].shift(lag)
    cols.append(col)
data.dropna(inplace=True)

from sklearn.metrics import accuracy_score

lm = linear_model.LogisticRegression(C=1e7, solver='lbfgs', multi_class='auto', max_iter=1000)

lm.fit(data[cols], np.sign(data['return']))

data['prediction'] = lm.predict(data[cols])

data['prediction'].value_counts()

hits = np.sign(data['return'].iloc[lags:] * data['prediction'].iloc[lags:]).value_counts()

accuracy_score(data['prediction'], np.sign(data['return']))

data['strategy'] = data['prediction'] * data['return']

data[['return', 'strategy']].sum().apply(np.exp)

data[['return', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))

data = pd.DataFrame(raw[symbol])

data.rename(columns={symbol: 'price'}, inplace=True)

data['return'] = np.log(data['price'] / data['price'].shift(1))

lags = 5

cols = []

for lag in range(1, lags+1):
    col = 'lag_%d' % lag
    data[col] = data['price'].shift(lag)
    cols.append(col)

data.dropna(inplace=True)

lm.fit(data[cols], np.sign(data['return']))

data['prediction'] = lm.predict(data[cols])

data['prediction'].vlaue_counts()

hits = np.sign(data['return'].iloc[lags:' * data['prediction'].iloc[lags:]).value_counts()

accuracy_score(data['prediction'], np.sign(data['return]))

data['strategy'] = data['prediction'] * data['return']

data[['return', 'strategy']].sum().apply(np.exp)

data[['return', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))

Without considering transaction costs

import ScikitVectorBacktester as SCI

scibt = SCI.ScikitVectorBacktest('EUR=', '2010-1-1', '2019-12-31', 10000, 0.0, 'logistic')

scibt.run_strategy('2015-1-1', '2019-12-31', '2015-1-1', '2019-12-31', lags=15)

scibt.run_strategy('2016-1-1', '2018-12-31', '2019-1-1', '2019-12-31', lags=15)

scibt.plot_results()

Consider the same strategy applied to the GDX ETF, for which an out-of-sample outperformance (over the year 2018)

scibt = SCI.ScikitVecotrBacktester('GDX', '2010-1-1', '2019-12-31', 10000, 0.00, 'logistic')

scibt.run_strategy('2013-1-1', '2017-12-31', '2018-1-1', '2018-12-31', lags=10)

scibt.plot_results()

Taking transaction costs into account

scibt = SCI.ScibitVectorBacktester('GDX', '2010-1-1', '2019-12-31', 10000, 0.0025, 'logistic')

scibt.run_strategy('2013-1-1', '2017-12-31', '2018-1-1', '2018-12-31', lags=10)

scibt.plot_results()

Using deep learning for market movement prediction

The simple classification problem revisited

hours = np.array([0.5, 0.75, 1., 1.25, 1.5, 1.75, 1.75, 2., 2.25, 2.5, 2.75, 3., 3.25, 3.5, 4., 4.25, 4.5, 4.75, 5., 5.5])

success = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])

data = pd.DataFrame({'hours': hours, 'success': success}]

data.info()

“MLP” stands for multi-layer perceptron is another expression for dense neural network

from sklearn.neural_netowrk import MLPClassifier

model = MLPClassifier(hidden_layer_sizes=[32], max_iter=1000, random_state=100)

Generates the predictions and plots the results

model.fit(data['hours'].values.reshape(-1, 1), data['success'])

MLPClassifier(hidden_layer_sizes=[32], max_iter=1000, random_state=100)

data['prediction'] = model.predict(data['hours'].values.reshape(-1, 1))

data.tail()

data.plot(x='hours', y=['success', 'prediction'], style=['ro', b-'], ylim=[-.1, 1.1], figsize=(10, 6)

Using deep neural networks to predict market direction

Apply the approach to stock market data in the form of log returns from a financial time series

The data needs to be retrieved and prepared

symbol = 'EUR='

data = pd.DataFrame(raw[symbol])

data.rename(columns={symbol: 'price'}, inplace=True)

data['return'] = np.log(data['price']/data['price'].shift(1))

data['direction'] = np.where(data['return'] > 0, 1, 0)

lages =5

cols = []
for lag in range(1, lags + 1):
    col = f'lag_{lag}'
    data[col] = data['return'].shift(lag)
    cols.append(col)
data.dropna(inplace=True)

data.round(4).tail()

Use a dense neural network (DNN) with the Keras package, defines training and test data sub-sets, defines the feature columns, and labels and fits the classifier

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam, RMSprop

optimizer = Adam(learning_rate=0.0001)

def set_seeds(seed=100):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(100)

set_seeds()
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(lags,)))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

cutoff = '2017-12-31'

training_data = data[data.index < cutoff].copy()

mu, std = training_data.mean(), training_data.std()

training_data_ = (training_data - mu) / std

test_data = data[data.index >= cutoff].copy()

test_data_ = (test_data - mu) / std

%%time
model.fit(training_data[cols], training_data['direction'], epochs=50, verbose=False, validation_split=0.2, shuffle=False)

res = pd.DataFrame(model.history.history)

res[['accuracy', 'val_accuracy']].plot(figsize=(10, 6), style='--')

Equipped with the fitted classifier, the model can generate predictions on the training data set

model.evaluate(training_data_[cols], training_data['direction'])

pred = np.where(model.predict(training_data_[cols] > 0.5, 1, 0)

pred[:30].flatten()

training_data['prediction'] = np.where(pred > 0, 1, -1)

training_data['strategy'] = (training_data['prediction'] * training_data['return'])

training_data[['return', 'strategy']].sum().apply(np.exp)

training_data[['return', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10,6))

How the strategy performs on the test data set (out-of-sample)

model.evaluate(test_data_[cols], test_data['direction']

pred = np.where(model.predict(test_data[cols]) > 0.5, 1, 0)

test_data['prediction'] = np.where(pred > 0, 1, -1)

test_data['prediction'].value_counts()

test_data['strategy'] = (test_data['prediction'] * test_data['return'])

test_data[['return', 'strategy']].sum().apply(np.exp)

test_data[['return', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))

Adding different types of features

Add more classes/categories and add other types of features to the mix, such as ones based on momentum, volatility, or distance measures.

data['momentum'] = data['return'].rolloing(5).mean().shift(1)

data['volatility'] = data['return'].rolling(20).std().shift(1)

data['distance'] = (data['price'] - data['price'].rolling(50).mean()).shift(1)

data.dropna(inplace=True)

cols.extend(['momentum', 'volatillity', 'distance'])

print(data.round(4).tail())

Redefine the training and test data sets, to normalise the features data, and to update the model to reflect the new features columns

training_data = data[data.index < cutoff].copy()

mu, std = training_data.mean(), training_data.std()

training_data_ = (training_data - mu) / std

test_data = data[data.index >= cutoff].copy()

test_data_ = (test_data - mu) / std

set_seeds()

model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(len(cols),)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

Based on the enriched feature set, the classifier can be trained.

The in-sample performance of the strategy is quite a bit better than before.

%%time
model.fit(training_data_[cols], training_data['direction'], verbose=[False, epochs=25)

model.evaluate(training_data_[cols], training_data['direction'])

pred = np.where(model.prediction(training_data_[cols] > 0.5, 1, 0)

training_data['prediction'] = np.where(pred > 0, 1, -1)

training_data['strategy'] = (training_data['prediction'] * training_data['return'])

training_data[['return', 'strategy']].sum().apply(np.exp)

training_data[['return', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))

The final step is the evaluation of the classifier and the derivation of the strategy performance out-of-sample.

model.evaluate(test_data_[cols], test_data['direction'])

pred = np.where(model.predict(test_data[cols]) > 0.5, 1, 0)

test_data['prediction'] = np.where(pred > 0, 1, -1)

test_data['prediction'].value_counts()

test_data['strategy'] = (test_data['prediction'] * test_data['return'])

test_data[['return', 'strategy']].sum().apply(np.exp)

test_data[['return', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))

Author: Zhe

Leave a Reply Cancel reply

Predicting Market Movements with Machine Learning

Author: Zhe

Related posts

Leave a Reply Cancel reply