import torch


def activation(x, func=None):
  """
  Arguments
  ---------
  x: torch.tensor
  """
  if func == 'softmax':

    #dim =1 takes sum over the rows.
    return torch.exp(x)/torch.sum(torch.exp(x), dim=1).view(-1, 1)
    
  return 1/(1+torch.exp(-x))


#Generate data for example.
torch.manual_seed(7)

#features
features = torch.randn((1, 5))

#weights
weights = torch.randn_like(features)
#bias
bias = torch.randn((1, 1))


print(f"F:{features},\nW:{weights},\nb:{bias}")

F:tensor([[-0.1468,  0.7861,  0.9468, -1.1143,  1.6908]]),
W:tensor([[-0.8948, -0.3556,  1.2324,  0.1382, -1.6822]]),
b:tensor([[0.3177]])


#Using matrix multuplication change the shape of second tensor to match with number of columns of features.
#Functions that can be used torch.reshape(), torch.resize(), torch.view()

activation((torch.matmul(features, weights.T))+bias)

tensor([[0.1595]])


weights.shape

torch.Size([1, 5])


activation(torch.sum(features*weights)+bias)

tensor([[0.1595]])


#Torch has a function torch.from_numpy() to convert numpy array to tensors.
import numpy as np

np.random.seed(13)
torch.from_numpy(np.random.randint(10,size=(5, 2)))

tensor([[2, 0],
        [0, 6],
        [2, 4],
        [9, 3],
        [4, 2]])


from torchvision import datasets, transforms

#Normalizing the data using transform

transform= transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])

#Download and load the MNIST dataset

trainset = datasets.MNIST('MNIST_data/',download=True, train=True, transform = transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz

Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz

Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz

Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

/usr/local/lib/python3.7/dist-packages/torchvision/datasets/mnist.py:498: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at  /pytorch/torch/csrc/utils/tensor_numpy.cpp:180.)
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


iter_data = iter(trainloader)
images, labels = next(iter_data)
print(images.shape)
print(labels.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


import matplotlib.pyplot as plt
%matplotlib inline


plt.imshow(images[1].numpy().squeeze(), cmap='Greys_r')

<matplotlib.image.AxesImage at 0x7f8dbd63f510>


#Input images of size 64*784 where each row is of size 28*28 = 784
torch.manual_seed(13)
img_input = torch.flatten(images, start_dim=1)

#Assiging weights to the first layer which will have 684 weights with respect to each hidden units.
W_1 = torch.randn((784, 256))
bias = torch.randn((256))

#Assiging weights to the output layer

W_2 = torch.randn((256, 10))
bias1 = torch.randn((10))

#Using sigmoid as activation and output function
out = activation(torch.matmul(activation(torch.matmul(img_input, W_1)+bias), W_2)+bias1)

#Using softmax for activation and output function
out1 = activation(torch.matmul(activation(torch.matmul(img_input, W_1)+bias, func='softmax'), W_2)+bias1, func='softmax')

#Using sigmoid for activation and softmax for final ouput.
out2 = activation(torch.matmul(activation(torch.matmul(img_input, W_1)+bias), W_2)+bias1, func='softmax')


torch.sum(torch.matmul(img_input, W_1)+bias, dim=0).shape

torch.Size([256])


torch.flatten(images, start_dim=1).shape

torch.Size([64, 784])


import seaborn as sns


fig, ax = plt.subplots(nrows=1, ncols=4,figsize=(16, 8))


#First input image of the 64 images.
ax[0].imshow(img_input[1].numpy().reshape((28, 28)).squeeze(),cmap='Reds')

#Proba class of network without training
sns.barplot(y=list(range(10)), x=out[:1,:].numpy().squeeze(), ax=ax[1], orient='h')
ax[1].set_title('Class Probability --- Logistic')
ax[1].set_xlabel('Probabiity')
ax[1].set_ylabel('Class')

#Proba class using softmax.
sns.barplot(y=list(range(10)), x=out1[:1, :].numpy().squeeze(), ax=ax[2], orient='h')
ax[2].set_title('Class Probability --- Softmax')
ax[2].set_xlabel('Probabiity')
ax[2].set_ylabel('Class')


#Proba class using softmax.
sns.barplot(y=list(range(10)), x=out2[:1, :].numpy().squeeze(), ax=ax[3], orient='h')
ax[3].set_title('Class Probability --- Logistic-Softmax')
ax[3].set_xlabel('Probabiity')
ax[3].set_ylabel('Class')

Text(0, 0.5, 'Class')


out2.sum(dim=1)

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000])


from torch import nn

#Build a forward network
#Note that the input is a batch of 64 flattened images, that is matrix of size (64, 784).
#Each column of the matrix corresponds to weights associated to the neuron.
#(64, 784)*(784, 128)*(128, 64)*(64, 10)=(64, 10)
#The result is probability for each image in the batch.

model = nn.Sequential(nn.Linear(784, 128),nn.ReLU(),nn.Linear(128, 64),nn.ReLU(),nn.Linear(64, 10), nn.LogSoftmax(dim=1))

#Loss function
criterion = nn.NLLLoss()

#data
images, labels = next(iter(trainloader))

#Flatten images
images = images.view(images.shape[0], -1)

logits = model(images)

#loss calculation
loss =  criterion(logits, labels)

print(loss)

tensor(2.3218, grad_fn=<NllLossBackward>)


logits.shape

torch.Size([64, 10])


#Autograd
#To keep  a track of operations that created the tensor we have to set requires_grad = True.
x = torch.randn(2, 2, requires_grad=True)
x

tensor([[ 0.9017, -0.4343],
        [ 0.1861,  0.1828]], requires_grad=True)


y= x**2
y

tensor([[0.8131, 0.1886],
        [0.0346, 0.0334]], grad_fn=<PowBackward0>)


#The operation that created y can be seen as follows
y.grad_fn

<PowBackward0 at 0x7f8dadefa7d0>


z = torch.exp(y)


z.grad_fn

<ExpBackward at 0x7f8dadf0d390>


z1=z.mean()
z1

tensor(1.3829, grad_fn=<MeanBackward0>)


#Currently no value is given to grad attribute. As we haven't called the backward method on the variable
print(x.grad)

None


z1.backward(retain_graph=True)
print(x.grad)

tensor([[ 1.0166, -0.2622],
        [ 0.0964,  0.0945]])


x = torch.tensor([1., 4.], requires_grad=True)
y = x**2


y.backward()

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-44-ab75bb780f4c> in <module>()
----> 1 y.backward()

/usr/local/lib/python3.7/dist-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
    253                 create_graph=create_graph,
    254                 inputs=inputs)
--> 255         torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
    256 
    257     def register_hook(self, hook):

/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    141 
    142     grad_tensors_ = _tensor_or_tensors_to_tuple(grad_tensors, len(tensors))
--> 143     grad_tensors_ = _make_grads(tensors, grad_tensors_)
    144     if retain_graph is None:
    145         retain_graph = create_graph

/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py in _make_grads(outputs, grads)
     48             if out.requires_grad:
     49                 if out.numel() != 1:
---> 50                     raise RuntimeError("grad can be implicitly created only for scalar outputs")
     51                 new_grads.append(torch.ones_like(out, memory_format=torch.preserve_format))
     52             else:

RuntimeError: grad can be implicitly created only for scalar outputs


y.backward(gradient=torch.tensor([1., 4.]), retain_graph=True)


print(x.grad)

tensor([ 2., 32.])


t=torch.tensor([1.,2.], requires_grad=True)

#function (x1, x2) --> (x1^3, x2^3)
z=t**3

#gradient in the direction of (1, 1)
z.backward(gradient=torch.tensor([1., 1.]), retain_graph=True)

#check whether it is correct it must give (3, 12)
print(t.grad==torch.tensor([3., 12.]))

tensor([True, True])

Tensors¶

Neural Networks¶

Autograd¶

Mechanics¶

Example¶

Example 2¶

Example 3¶