I am writing a program for an assignment (in a course I am auditing). I am pasting it below:
# Assignment 2 skeleton code # This code shows you how to use the 'argparse' library to read in parameters import argparse import math import matplotlib.pyplot as plt import numpy as np import pandas as pd import random as rn from dispkernel import dispKernel # Command Line Arguments parser = argparse.ArgumentParser(description='generate training and validation data for assignment 2') parser.add_argument('trainingfile', help='name stub for training data and label output in csv format', default="train") parser.add_argument('validationfile', help='name stub for validation data and label output in csv format', default="valid") parser.add_argument('-numtrain', help='number of training samples', type= int, default=200) parser.add_argument('-numvalid', help='number of validation samples', type= int, default=20) parser.add_argument('seed', help='random seed', type= int, default=1) parser.add_argument('learningrate', help='learning rate', type= float, default=0.1) parser.add_argument('actfunction', help='activation functions', choices=['sigmoid', 'relu', 'linear'], default='linear') parser.add_argument('numepoch', help='number of epochs', type= int, default=50) args = parser.parse_args() traindataname = args.trainingfile + "data.csv" trainlabelname = args.trainingfile + "label.csv" print("training data file name: ", traindataname) print("training label file name: ", trainlabelname) validdataname = args.validationfile + "data.csv" validlabelname = args.validationfile + "label.csv" print("validation data file name: ", validdataname) print("validation label file name: ", validlabelname) print("number of training samples = ", args.numtrain) print("number of validation samples = ", args.numvalid) print("learning rate = ", args.learningrate) print("number of epoch = ", args.numepoch) print("activation function is ", args.actfunction) # read in training data t_data = pd.read_csv(args.trainingfile, ',', header=None).values t_label = pd.read_csv('trainlabel.csv', ',', header=None).values row_dim_t = t_data.shape[1] col_dim_t = t_data.shape[0] # read in validation data v_data = pd.read_csv(args.validationfile, ',', header=None).values v_label = pd.read_csv('validlabel.csv', ',', header=None).values row_dim_v = v_data.shape[1] col_dim_v = v_data.shape[0] np.random.seed(args.seed) # initialize weights w = np.random.rand(row_dim_t, 1) # initialize bias b = np.random.uniform(0, 1) n_epoch = [] loss_t = [] loss_v = [] accuracy_t = [] accuracy_v = [] Z_t = np.zeros([col_dim_t, 1]) Z_v = np.zeros([col_dim_v, 1]) guess_t_label = np.zeros([col_dim_t, 1]) guess_v_label = np.zeros([col_dim_v, 1]) accuracy_j_t = np.zeros([col_dim_t, 1]) accuracy_j_v = np.zeros([col_dim_v, 1]) Y_t = np.zeros([col_dim_t, 1]) Y_v = np.zeros([col_dim_v, 1]) loss_j_t = np.zeros([col_dim_t, 1]) loss_j_v = np.zeros([col_dim_v, 1]) grad_loss_w = np.zeros([col_dim_t, row_dim_v]) grad_loss_b = np.zeros([col_dim_t, 1]) class Linear: def __init__(self, Z, data, label): self.Z = Z self.data = data self.label = label # pass Z through an activation function def act_linear(self): return self.Z # gradient of the loss wrt weights def grad_loss_w(self, Y): return 2*(Y - self.label)*self.data # gradient of the loss wrt bias def grad_loss_b(self, Y): return 2*(Y - self.label) for i in range(0, args.numepoch): n_epoch.append(i) # calculate predictor for training data Z_t[:] = np.dot(t_data[:, :], w[:]) + b # predict training label based on output # of predictor guess_t_label[:] = (Z_t >= 0.5) # determine whether predicted label is # correct or not accuracy_j_t[:] = 1 - np.absolute(guess_t_label[:] - t_label[:]) # calculate accuracy for training data accuracy_t.append(np.sum(accuracy_j_t[:], axis=0)/col_dim_t) # calculate predictor for validation data Z_v[:] = np.dot(v_data[:, :], w[:]) + b # predict validation label based on output # of predictor guess_v_label[:] = (Z_v >= 0.5) # determine whether predicted label is # correct or not accuracy_j_v[:] = 1 - np.absolute(guess_v_label[:] - v_label[:]) # calculate accuracy for validation data accuracy_v.append(np.sum(accuracy_j_v[:], axis=0)/col_dim_v) l_t = Linear(Z_t, t_data, t_label) l_v = Linear(Z_v, v_data, v_label) # pass Z through an activation function Y_t[:] = l_t.act_linear() Y_v[:] = l_v.act_linear() # calculate loss across all training data loss_j_t[:] = (Y_t[:] - t_label)**2 loss_t.append(np.sum(loss_j_t[:], axis=0)/col_dim_t) # calculate loss across all validation data loss_j_v[:] = (Y_v[:] - v_label)**2 loss_v.append(np.sum(loss_j_v[:], axis=0)/col_dim_v) grad_loss_w[:] = l_t.grad_loss_w(Y_t) grad_loss_b[:] = l_t.grad_loss_b(Y_t) # average gradient across all inputs avg_loss_w = np.sum(grad_loss_w[:], axis=0)/col_dim_t avg_loss_b = np.sum(grad_loss_b[:], axis=0)/col_dim_t # update weights and bias w[:] -= np.reshape(avg_loss_w, (row_dim_t, 1))*args.learningrate b -= avg_loss_b*args.learningrate # plot loss vs number of epochs fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 6)) ax1.plot(n_epoch, loss_t, 'b.', label='Training Data') ax1.plot(n_epoch, loss_v, 'r.', label='Validation Data') ax1.set(xlabel='Epoch', ylabel='Loss', title='Loss vs Number of Epochs \n with Activation ' + str.capitalize(args.actfunction)) ax1.legend() # plot accuracy vs number of epochs ax2.plot(n_epoch, accuracy_t, 'b.', label='Training Data') ax2.plot(n_epoch, accuracy_v, 'r.', label='Validation Data') ax2.set(xlabel='Epoch', ylabel='Accuracy', title='Accuracy vs Number of Epochs \n with Activation ' + str.capitalize(args.actfunction)) ax2.legend() plt.savefig('loss_accuracy.png') The training file contains 200 rows of 9 columns. In each row, there's a series of 1s and 0s. If the arrangement of 1s and 0s forms an X pattern (ie [1, 0, 1, 0, 1, 0, 1, 0, 1], this is assigned a label 1. If the 1s and 0s don't form an X, the label is 0. I'm using a linear activation function, and a mean squared error loss function. I'm also using 15 for the seed, 0.3 for the learning rate, and 10 for the epoch number. However, I keep running into problems. At 0.3, the weights are correct- I plot the weights on a grid and get an X pattern. The plots of epoch vs loss and accuracy gives me rubbish- I get underfitted plots. At lower learning rates, the plots look ok (the accuracy still looks terrible no matter what I do), but the weights are wrong- the learning rate is too slow. Please help!! -- https://mail.python.org/mailman/listinfo/python-list