Random Forest Classifier

Explanation & Rationale

The Random Forest Classifier strategy leverages an ensemble of decision trees to analyze past market data and predict future price movements. By incorporating technical indicators as features, the model captures complex patterns and relationships while reducing the risk of overfitting through averaging multiple decision trees. This approach enhances robustness and adaptability, making it well-suited for noisy financial markets where single-model predictions may be less reliable.

Code

'''
Random Forest Classifier.

Trains a Random Forest Classifier using the past 30 days of data to predict the next day's action.
The model leverages basic technical indicators as features to make predictions.
Learn more @ docs.ubacktest.com/examples/machine-learning/rfclassifier
'''

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

def create_features(data, indicator_window=14):

    data[f'SMA_{indicator_window}'] = data['close'].rolling(window=indicator_window).mean()
    data[f'volume_{indicator_window}'] = data['volume'].rolling(window=indicator_window).mean()
    delta = data['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=indicator_window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=indicator_window).mean()
    data[f'RSI_{indicator_window}'] = 100 - (100 / (1 + gain / loss))
    data[f'EMA_{indicator_window}'] = data['close'].ewm(span=indicator_window, adjust=False).mean()
    data['EMA_12'] = data['close'].ewm(span=12, adjust=False).mean()
    data['EMA_26'] = data['close'].ewm(span=26, adjust=False).mean()
    data['MACD'] = data['EMA_12'] - data['EMA_26']
    data['MACD_signal'] = data['MACD'].ewm(span=9, adjust=False).mean()  # Signal line for MACD
    data['bollinger_upper'] = data[f'SMA_{indicator_window}'] + (data['close'].rolling(window=indicator_window).std() * 2)
    data['bollinger_lower'] = data[f'SMA_{indicator_window}'] - (data['close'].rolling(window=indicator_window).std() * 2)

    return data

def random_forest_classifier(data, training_window=30, indicator_window=14, n_estimators=100, max_depth=None):

    # Create features
    data = create_features(data, indicator_window)

    # Create the target variable: 1 if the next day's price is higher, -1 if it is lower
    data['target'] = (data['close'].shift(-1) > data['close']).astype(int) * 2 - 1

    features = [
        'close',
        'volume',
        f'SMA_{indicator_window}',
        f'volume_{indicator_window}',
        f'RSI_{indicator_window}',
        f'EMA_{indicator_window}',
        'MACD',
        'MACD_signal',
        'bollinger_upper',
        'bollinger_lower',
    ]

    scaler = StandardScaler()  # Standardize features for better Random Forest performance
    predictions = []

    for i in range(training_window+indicator_window, len(data)):
        train_data = data.iloc[i-training_window:i]  # Rolling window for training
        test_data = data.iloc[[i]]  # Single test point (next day)

        X_train, y_train = train_data[features], train_data['target']
        X_test = test_data[features]

        # Scale the data
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Train the Random Forest model
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        model.fit(X_train_scaled, y_train)

        # Make prediction
        pred = model.predict(X_test_scaled)[0]
        predictions.append(pred)

    # Assign predictions back to the data
    data.loc[data.index[training_window+indicator_window:], 'signal'] = predictions

    return data

def strategy(data):

    # Call the random_forest_classifier function to get signals
    data = random_forest_classifier(data)

    return data