Data science ins an interdisiplinary study of data whose central focus is the data life cycle and how data is applied to the decision making process.
An abstract class is a Python class that has methods we must implement, so we can create a custom dataset by creating a subclass that extends the fucntionality of the Dataset class.
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# print format
torch.set_printoptions(linewidth=120)
# custom dataset class that extends Dataset and implements required methods
class OHLC(Dataset):
def __init__(self, csv_file):
self.data = pd.read_csv(csc_file)
def __get__item(self,index):
r = self.data.iloc[index]
label = torch.tensor(r.is_up_day, dtype=torch.long)
sample = self.normalize(torch.tensor([r.open, r.high, r.low, r.close]))
return sample, label
def __len__(self):
return len(self.data)
# Download fashionMNIST dataset
# train set variable
train_set = torchvision.datasets.FashionMNIST(
root='./Documents/data'
,train=True
,download=True # downloads it locally (checks existence beforehand)
,transform=transforms.Compose([
transforms.ToTensor() # butilt in tensor transformer
])
)
# data loader variable with batch size = 10
train_loader = torch.utils.data.DataLoader(
train_set, batch_size=40 # default size of batch size is 1
)
print('Balanced Dataset. \nDistribution ->',train_set.train_labels.bincount())
batch = next(iter(train_loader))
images, labels = batch
grid = torchvision.utils.make_grid(images,nrow=10)
plt.figure(figsize=(15,15))
plt.imshow(np.transpose(grid, (1,2,0)))
print('Labels: ', labels)
Objects are defined in code using classes. A class defines the object's specification or spec, which specifies what data and code each object of the class should have.
When we pass a tensor to our network as input, the tensor flows forward through each layer transformation until the tensor reaches the output layer. This process of a tensor flowing forward through the network is known as a forward pass.
Each layer has its own transformation and the tensor passes forward through each layer's transformation. The composition of all the individual layer forward passes defines the overall forward pass transformation for the network itself.
Every PyTorch nn.Module has a forward() method, that represents the forward pass. The forward() method is what will implement the transformation of the network.
PyTorch also provides mini NN operations which we can use in our class extension implementation
Two types of parameters
Hyperparameters: values which are choosen manually and arbitrarily. These are things like number of output channels, kernel channels, out channels and features.
Data dependent parameters: parameters with values that depend on the data
import torch.nn as nn
# A simple class
class Network(nn.Module): # extending nn.Module base class
def __init__(self):
super(Network, self).__init__() # initializing base class
# prebuilt layers
# 1 input channel, convolved by 6 different filters
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
# fully connected, or dense layers
self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
self.fc2 = nn.Linear(in_features=120, out_features=60)
self.out = nn.Linear(in_features=60, out_features=10)
def forward(self, t):
# implement forward pass
return self.layer(t)
When we say that a network is learning, we specifically mean that the network is learning the appropriate values for the learnable parameters. Appropriate values are values that minimize the loss function.
If you have trouble understand how something works, you can take a look at the source code. You might get a hint on how things work there. Its awesome.
network = Network()
print(network)
print()
print(network.conv2)
print('\nThese weight values are the learnable parameters\nShown partly:\n',network.conv2.weight[0][0])
print(network)
print()
print(network.conv2)
print(network.conv2.weight.shape)
print()
print('Shape of a single filter:')
print(network.conv2.weight[0].shape)
print()
print('Fully Connected 1\n\t',network.fc1.weight.shape)
print('Fully Connected 2\n\t',network.fc2.weight.shape)
print('Fully Connected Out\n\t',network.out.weight.shape)
for name, param in network.named_parameters():
print(name, '\t\t', param.shape)
for param in network.parameters():
print(param.shape)
in_features = torch.tensor([1,2,3,4], dtype=torch.float32)
weight_matrix = torch.tensor([
[1,2,3,4],
[2,3,4,5],
[3,4,5,6]
], dtype=torch.float32)
print(weight_matrix.matmul(in_features))
# Same thing using a linear layer
fc = nn.Linear(in_features=4, out_features=3, bias=False)
print(fc)
print(fc(in_features))
fc.weight = nn.Parameter(weight_matrix)
print('Bias:= True\t',fc(in_features))
fc = nn.Linear(in_features=4, out_features=3, bias=False)
fc.weight = nn.Parameter(weight_matrix)
print('Bias:= False\t',fc(in_features))
The forward() method explicitly defines a networks transformation
The input layer is the identity function -> f(x) = x and exists implicitly
# Implementing the forward method
class Network(nn.Module): # extending nn.Module base class
def __init__(self):
super(Network, self).__init__() # initializing base class
# prebuilt layers
# 1 input channel, convolved by 6 different filters
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
# fully connected, or dense layers
self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
self.fc2 = nn.Linear(in_features=120, out_features=60)
self.out = nn.Linear(in_features=60, out_features=10)
def forward(self, t):
# (1) input layer
t = t
# (2) hidden conv layer
t = self.conv1(t)
t = F.relu(t)
t = F.max_pool2d(t, kernel_size=2, stride=2)
# (3) hidden conv layer
t = self.conv2(t)
t = F.relu(t)
t = F.max_pool2d(t, kernel_size=2, stride=2)
# (4) hidden layer reshape
# 4*4 -> height * width -> reduction due to conv operations
t = t.reshape(-1, 12*4*4)
t = self.fc1(t)
t = F.relu(t)
# (5) hidden linear layer
t = self.fc2(t)
t = F.relu(t)
# (6) output layer (10 classes)
# using the softmax fucntion which returns a positive probability that sum to 1
# but we won't use it here because it will be there in the loss function
# which will implicitly execute the softmax function
t = self.out(t)
# t = F.softmax(t, dim=1) -> done in the loss part implicitly
return t
# turning off gradient calculation feature
torch.set_grad_enabled(False)
# initialize network
network = Network()
sample = next(iter(train_set))
image, label = sample
image.shape
# the network expects a batch input
# prediction
pred = network(image.unsqueeze(0))
print(label)
print(pred,
pred.shape,
pred.argmax(dim=1),
sep='\n')
data_loader = torch.utils.data.DataLoader(
train_set,
batch_size=10
)
batch = next(iter(data_loader))
# now we are dealing with batches by default
images, labels = batch
# 10 images with one channel
print(images.shape, labels.shape, sep='\n')
preds = network(images)
print('\nPrediction Probabilities\n', preds)
# using the argmax function
print('\nPredictions: \n', preds.argmax(dim=1))
print('\nLabels: \n', labels)
print('\nCorrect? \n', preds.argmax(dim=1).eq(labels).sum())
def get_num_correct(preds, labels):
res = preds.argmax(dim=1).eq(labels).sum().item()
return str(res) + ' items predicted correctly'
get_num_correct(preds, labels)
print(network)
# lets check versions
print(torch.__version__)
print(torchvision.__version__)
# set gradient calculator back on
torch.set_grad_enabled(True)
# redefine number of correct labels function
def get_num_correct(preds, labels):
return preds.argmax(dim=1).eq(labels).sum().item()
# A training process for a single batch
network = Network()
print('First Gradient: ', network.conv1.weight.grad)
# create a batch of 100
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)
batch = next(iter(train_loader))
images, labels = batch
preds = network(images)
loss = F.cross_entropy(preds, labels)
loss.backward()
optimizer.step()
#------------------
print('Gradient shape after .backward() call ', network.conv1.weight.grad.shape)
print()
print('The First loss is :', loss.item())
preds = network(images)
loss2 = F.cross_entropy(preds, labels)
print('The Second loss is :', loss2.item())
network = Network()
num_epochs = 5
batch_size = 100
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
optimizer = optim.Adam(network.parameters(), lr=0.01)
# loop over all epochs
for epoch in range(num_epochs):
# variables to track
total_loss = 0
total_correct = 0
# loop over all batches in the train loader
for batch in train_loader:
images, labels = batch
preds = network(images)
loss = F.cross_entropy(preds, labels)
optimizer.zero_grad() # zero grad because pytorch accumulates gradient
loss.backward() # calculate gradients
optimizer.step() # update weights
# update variables
total_loss += loss.item()
total_correct += get_num_correct(preds, labels)
# print information
print("Epoch: ", epoch+1, "\n\tAccuracy (%):", total_correct/len(train_set),
"\n\tLoss ", total_loss)
print("\nNumber of steps taken towards the loss minimum:", len(train_set)/batch_size)
Hello, think about this. This is important for the AI community, the engineers like yourself who are building AI systems. There are hypothetical situations that envision AI runaway systems. Under this assumption, these AI systems with a goal might go rogue, by this I'm talking about unintended consequences like the automobile. These things are rubbish polluters and have shaped the formation of all of humanity's cities. Some people think this is the future. However, what if I told you that this is already the case. What if this is indeed the case for things like the Facebook newsfeed and the YouTube recommendation system. These artificial intelligence systems are controlled by companies, these companies are incentivised to make a large profit. If the AI systems are doing their jobs, then I ask you, is it possible for these companies to turn them off? Is it even legal for them to do so? Even if the AI systems are working by hijacking the attention spans of humans at large, let's say 2 billion humans more or less controlled by these systems, spending every waking moment at their devices. Well, I'd say engineers of the future have much to consider.
# predictions for the entire training set
def get_all_preds(model, loader):
all_preds = torch.tensor([])
"""
The training set is broken down into batches before
being passed into the network. If passed all at once,
it will create a problem for the machine.
"""
for batch in loader:
images, labels = batch
preds = model(images)
# prediction for all samples
# for each image a tensor with prediction for every category
all_preds = torch.cat(
(all_preds, preds),
dim=0
)
return all_preds
# we need to turn off the gradient calculator to save on overhead
with torch.no_grad():
prediction_loader = torch.utils.data.DataLoader(train_set, batch_size=10000)
train_preds = get_all_preds(network, prediction_loader)
print("Requires grad? ", train_preds.requires_grad)
print(train_preds.shape)
preds_correct = get_num_correct(train_preds, train_set.targets)
print('\nTotal Correct:', preds_correct)
print('Accuracy:', preds_correct/len(train_set))
# stack predictions with targets
stacked = torch.stack(
(
train_set.targets,
train_preds.argmax(dim=1)
), dim=1
)
# create confusion matrix with zeros using torch.zeros
cmt = torch.zeros(10,10, dtype=torch.int32)
for predicted in stacked:
true_label, predicted_label = predicted.tolist()
#cmt[true_label,predicted_label] = cmt[true_label,predicted_label] + 1
cmt[true_label,predicted_label] += 1
# sklearn method
from sklearn.metrics import confusion_matrix
cmt2 = confusion_matrix(train_set.targets, train_preds.argmax(dim=1))
print(cmt)
#--------------------------------------------------------------------------
# Ploting the confusion matrix
plt.figure(figsize=(10,10))
sns.heatmap(cmt,annot=True)