import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


#Logistic  Function. Note add a column of 1's for bias.

def logistic_function(t):
  return 1/(1+np.exp(-t))

def predictor(W, X):
  """
  X: X is a m*(n+1) matrix where an extra column of 1's is added
  for bias calculation.
  W: W is (n+1)*1 size vector.
  X \dot W results in m*1 matrix of probabilities
  """
  return logistic_function(np.dot(X, W.T))

def error_value(Y, P):
  return sum(-Y*np.log(P)-(1-Y)*np.log(1-P))

def update_weights(W, P, X, Y, learnrate):
  W =W + learnrate*(np.dot((Y-P), X))
  return W


data = pd.read_csv('/content/data.csv',header=None)
X = data[[0, 1]]
Y = data[2]

def plot_points(X, y):
    admitted = X[np.argwhere(y==1)]
    rejected = X[np.argwhere(y==0)]
    plt.scatter([s[0][0] for s in rejected], [s[0][1] for s in rejected], s = 25, color = 'blue', edgecolor = 'k')
    plt.scatter([s[0][0] for s in admitted], [s[0][1] for s in admitted], s = 25, color = 'red', edgecolor = 'k')

def display(m, b, color='g--'):
    plt.xlim(-0.05,1.05)
    plt.ylim(-0.05,1.05)
    x = np.arange(-10, 10, 0.01)
    plt.plot(x, m*x+b, color)
X_arr = np.array(X)
Y_arr = np.array(Y)
plot_points(X_arr,Y_arr)
plt.show()


np.random.seed(20)

epochs = 100
learnrate = 0.01

def train(features, targets, epochs, learnrate, graph_lines=False):
    errors = []
    n_records, n_features = features.shape
    last_loss = None
    weights = np.random.normal(scale=1 / n_features**.5, size=n_features-1)
    bias = 0.01
    weights = np.append(weights, bias)
    for e in range(epochs):
      output = predictor(weights, features)
      error = error_value(targets, output)
      weights = update_weights(weights,output, features, targets, learnrate)

    # Printing out the log-loss error on the training set
      out = predictor(weights, features)
      loss = np.mean(error_value(targets, out))
      errors.append(loss)
      if e % (epochs / 10) == 0:
          print("\n========== Epoch", e,"==========")
          if last_loss and last_loss < loss:
              print("Train loss: ", loss, "  WARNING - Loss Increasing")
          else:
              print("Train loss: ", loss)
          last_loss = loss
          predictions = out > 0.5
          accuracy = np.mean(predictions == targets)
          print("Accuracy: ", accuracy)
      if graph_lines and e % (epochs / 100) == 0:
          display(-weights[0]/weights[1], -weights[2]/weights[1])

    # Plotting the solution boundary
    plt.title("Solution boundary")
    display(-weights[0]/weights[1], -weights[2]/weights[1], 'black')

    # Plotting the data
    plot_points(features[:,:features.shape[1]-1], targets)
    plt.show()

    # Plotting the error
    plt.title("Error Plot")
    plt.xlabel('Number of epochs')
    plt.ylabel('Error')
    plt.plot(errors)
    plt.show()
    return weights

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-edf10c139571> in <module>()
----> 1 np.random.seed(20)
      2 
      3 epochs = 100
      4 learnrate = 0.01
      5 

NameError: name 'np' is not defined


#Adding a column of ones so we don't have to keep track of the bias term separately.

X= np.hstack([X, np.ones([X.shape[0], 1], dtype=np.int32)])
print(X.shape)
Y=np.array(Y)

(100, 3)


w = train(X, Y, epochs, learnrate, True)

========== Epoch 0 ==========
Train loss:  72.44996738068772
Accuracy:  0.41

========== Epoch 10 ==========
Train loss:  62.10529758964154
Accuracy:  0.69

========== Epoch 20 ==========
Train loss:  55.347563587359105
Accuracy:  0.84

========== Epoch 30 ==========
Train loss:  50.06982571680566
Accuracy:  0.93

========== Epoch 40 ==========
Train loss:  45.88573209887136
Accuracy:  0.93

========== Epoch 50 ==========
Train loss:  42.513139935478776
Accuracy:  0.93

========== Epoch 60 ==========
Train loss:  39.749867550759014
Accuracy:  0.93

========== Epoch 70 ==========
Train loss:  37.45112267205692
Accuracy:  0.93

========== Epoch 80 ==========
Train loss:  35.51228345688485
Accuracy:  0.93

========== Epoch 90 ==========
Train loss:  33.85673438436189
Accuracy:  0.93

Logistic Regression¶

Sigmoid Function¶

Gradient of the Error Function.¶

Functions for regression.¶

Training the dataset¶

Final Stage¶