Using WSL
Installing Python extensions
Installing dependencies
sudo apt update
sudo apt upgrade
sudo apt install python3-pip
sudo apt install python3-pandas
pip3 install torch
pip3 install numpy
pip3 install scikit-learn
Importing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset
Loading data
data = pd.read_csv('AMZN.csv')
Extracting date and closing price
data = data[['Date', 'Close']]
Selecting whether to use CPU or GPU
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
Preparing a dataframe to predict the closing price of the next day using the closing prices of the previous seven days
def prepare_dataframe_for_lstm(df, n_steps):
df = dc(df)
df.set_index('Date', inplace=True)
for i in range(1, n_steps+1):
df[f'Close(t-{i})'] = df['Close'].shift(i)
df.dropna(inplace=True)
return df
lookback = 7
shifted_df = prepare_dataframe_for_lstm(data, lookback)
Converting to numpy
shifted_df_as_np = shifted_df.to_numpy()
Scaling data between -1 and 1
scaler = MinMaxScaler(feature_range=(-1, 1))
shifted_df_as_np = scaler.fit_transform(shifted_df_as_np)
Processing data, where x is the input value (the first column of the matrix) and y is the output value (the remaining 7 columns of the matrix).
X = shifted_df_as_np[:, 1:]
y = shifted_df_as_np[:, 0]
Flipping x horizontally
X = dc(np.flip(X, axis=1))
Splitting, where 95% is used for training and 5% is used for testing
X = dc(np.flip(X, axis=1))
split_index = int(len(X) * 0.95)
X_train = X[:split_index]
X_test = X[split_index:]
y_train = y[:split_index]
y_test = y[split_index:]
Reshaping the matrix to get dimensions
X_train = X_train.reshape((-1, lookback, 1))
X_test = X_test.reshape((-1, lookback, 1))
y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))
Converting all data in numpy to tensors
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()
Creating a dataset
class TimeSeriesDataset(Dataset):
def __init__(self, X, y):
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, i):
return self.X[i], self.y[i]
train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)
Loading the dataset
from torch.utils.data import DataLoader
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
Building the LSTM model
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_stacked_layers):
super().__init__()
self.hidden_size = hidden_size
self.num_stacked_layers = num_stacked_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers,
batch_first=True)
self.fc = nn.Linear(hidden_size, 1)
def forward(self, x):
batch_size = x.size(0)
h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :])
return out
model = LSTM(1, 4, 1)
model.to(device)
Defining parameters
learning_rate = 0.001
num_epochs = 10
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Creating a training function
def train_one_epoch():
model.train(True)
print(f'Epoch: {epoch + 1}')
running_loss = 0.0
for batch_index, batch in enumerate(train_loader):
x_batch, y_batch = batch[0].to(device), batch[1].to(device)
output = model(x_batch)
loss = loss_function(output, y_batch)
running_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch_index % 100 == 99: # print every 100 batches
avg_loss_across_batches = running_loss / 100
print('Batch {0}, Loss: {1:.3f}'.format(batch_index+1,
avg_loss_across_batches))
running_loss = 0.0
print()
Creating a testing function
def validate_one_epoch():
model.train(False)
running_loss = 0.0
for batch_index, batch in enumerate(test_loader):
x_batch, y_batch = batch[0].to(device), batch[1].to(device)
with torch.no_grad():
output = model(x_batch)
loss = loss_function(output, y_batch)
running_loss += loss.item()
avg_loss_across_batches = running_loss / len(test_loader)
print('Val Loss: {0:.3f}'.format(avg_loss_across_batches))
print('***************************************************')
print()
Looping
for epoch in range(num_epochs):
train_one_epoch()
validate_one_epoch()
Visualization
with torch.no_grad():
predicted = model(X_train.to(device)).to('cpu').numpy()
plt.plot(y_train, label='Actual Close')
plt.plot(predicted, label='Predicted Close')
plt.xlabel('Day')
plt.ylabel('Close')
plt.legend()
plt.show()