diff --git a/Olutomilayo_Amazing-Grace_lgd.py b/Olutomilayo_Amazing-Grace_lgd.py new file mode 100644 index 0000000..2677780 --- /dev/null +++ b/Olutomilayo_Amazing-Grace_lgd.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Apr 21 16:49:53 2020 + +@author: AMAZING-GRACE +""" + + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +from scipy.optimize import fmin_tnc + +class LogisticRegressionUsingGD: + + @staticmethod + def sigmoid(x): + # Activation function used to map any real value between 0 and 1 + return 1 / (1 + np.exp(-x)) + + @staticmethod + def net_input(theta, x): + # Computes the weighted sum of inputs Similar to Linear Regression + + return np.dot(x, theta) + + def probability(self, theta, x): + # Calculates the probability that an instance belongs to a particular class + + return self.sigmoid(self.net_input(theta, x)) + + def cost_function(self, theta, x, y): + # Computes the cost function for all the training samples + m = x.shape[0] + total_cost = -(1 / m) * np.sum( + y * np.log(self.probability(theta, x)) + (1 - y) * np.log( + 1 - self.probability(theta, x))) + return total_cost + + def gradient(self, theta, x, y): + # Computes the gradient of the cost function at the point theta + m = x.shape[0] + return (1 / m) * np.dot(x.T, self.sigmoid(self.net_input(theta, x)) - y) + + def fit(self, x, y, theta): + """trains the model from the training data + Uses the fmin_tnc function that is used to find the minimum for any function + It takes arguments as + 1) func : function to minimize + 2) x0 : initial values for the parameters + 3) fprime: gradient for the function defined by 'func' + 4) args: arguments passed to the function + Parameters + ---------- + x: array-like, shape = [n_samples, n_features] + Training samples + y: array-like, shape = [n_samples, n_target_values] + Target classes + theta: initial weights + Returns + ------- + self: An instance of self + """ + + opt_weights = fmin_tnc(func=self.cost_function, x0=theta, fprime=self.gradient, + args=(x, y.flatten())) + self.w_ = opt_weights[0] + return self + + def predict(self, x): + """ Predicts the class labels + Parameters + ---------- + x: array-like, shape = [n_samples, n_features] + Test samples + Returns + ------- + predicted class labels + """ + theta = self.w_[:, np.newaxis] + return self.probability(theta, x) + + def accuracy(self, x, actual_classes, probab_threshold=0.5): + """Computes the accuracy of the classifier + Parameters + ---------- + x: array-like, shape = [n_samples, n_features] + Training samples + actual_classes : class labels from the training data set + probab_threshold: threshold/cutoff to categorize the samples into different classes + Returns + ------- + accuracy: accuracy of the model + """ + predicted_classes = (self.predict(x) >= probab_threshold).astype(int) + predicted_classes = predicted_classes.flatten() + accuracy = np.mean(predicted_classes == actual_classes) + return accuracy * 100 + + +################ TESTING OUR MODEL ############################################### + +data = pd.read_csv("marks.txt") + +# X = feature values, all the columns except the last column +X = data.iloc[:, :-1] + +# y = target values, last column of the data frame +y = data.iloc[:, -1] + +# filter out the applicants that got admitted +admitted = data.loc[y == 1] + +# filter out the applicants that din't get admission +not_admitted = data.loc[y == 0] + +# plots +plt.scatter(admitted.iloc[:, 0], admitted.iloc[:, 1], s=10, label='Admitted') +plt.scatter(not_admitted.iloc[:, 0], not_admitted.iloc[:, 1], s=10, + label='Not Admitted') + +# preparing the data for building the model + +X = np.c_[np.ones((X.shape[0], 1)), X] +y = y[:, np.newaxis] +theta = np.zeros((X.shape[1], 1)) + +model = LogisticRegressionUsingGD() +model.fit(X, y, theta) +accuracy = model.accuracy(X, y.flatten()) +parameters = model.w_ +print("The accuracy of the model is {}".format(accuracy)) +print("The model parameters using Gradient descent") +print("\n") +print(parameters) \ No newline at end of file diff --git a/README.md b/README.md index d3696d5..ea77ff7 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,14 @@ -# ML-Logistic-regression-algorithm-challenge - - -![DSN logo](DSN_logo.png)|DSN Algorithm Challenge| -|---|---| - -A lot of data scientists or machine learning enthusiasts do use various machine learning algorithms as a black box without knowing how they work or the mathematics behind it. The purpose of this challenge is to encourage the mathematical understanding of machine learning algorithms, their break and yield point. - -In summary, participants are encouraged to understand the fundamental concepts behind machine learning algorithms/models. - - -The rules and guidelines for this challenge are as follows: - -1. Ensure to register at https://bit.ly/dsnmlhack - -2. The algorithm challenge is open to all. - -3. Participants are expected to design and develop the Logistic Regression algorithm from scratch using Python or R programming. - -4. For python developers (numpy is advisable). - -5. To push your solution to us, make a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) to DSN's GitHub page at https://www.github.com/datasciencenigeria/ML-Logistic-regression-algorithm-challenge. Ensure to add your readme file to understand your code. - -6. The top 3 optimized code will be compensated as follows: - -- **1st position**: 20GB data plan. -- **2nd position**: 15GB data plan. -- **3rd position**: 10GB data plan. - -7. Add your scripts and readme.MD file as a folder saved as your full name (surname_first_middle name) by making a pull request to the repository. - ---- -For issues on this challenge kindly reach out to the AI+campus/city managers - -**Twitter**: [@DataScienceNIG](https://twitter.com/DataScienceNIG), [@elishatofunmi](https://twitter.com/Elishatofunmi), [@o_funminiyi](https://twitter.com/o_funminiyi), [@gbganalyst](https://twitter.com/gbganalyst) - -or - -**Call**: +2349062000119,+2349080564419. - -Good luck! +# Olutomilayo Amazing-Grace Logistics_Regression_from_Scratch +Implementing standard logistic regression from scratch + +Logistic Regression is a generalized Linear Regression in which we do not output the weighted +sum of inputs directly, but is passed through a function (sigmoid) that can map any real value between 0 +and 1. + +In the Logistic regression class; the hypothesis (sigmoid, net_input, probability functions), the cost function, gradient descent, train (fit, predict funtions) and accuracy were written. + +The created model was tested on marks.txt data +The numpy module was used for mathematical calculations +The matplotlib module was used for plotting generated data +The scipy module was used to compute the minimum value in relation to the gradient descent + diff --git a/marks.txt b/marks.txt new file mode 100644 index 0000000..092e4c0 --- /dev/null +++ b/marks.txt @@ -0,0 +1,100 @@ +34.62365962451697,78.0246928153624,0 +30.28671076822607,43.89499752400101,0 +35.84740876993872,72.90219802708364,0 +60.18259938620976,86.30855209546826,1 +79.0327360507101,75.3443764369103,1 +45.08327747668339,56.3163717815305,0 +61.10666453684766,96.51142588489624,1 +75.02474556738889,46.55401354116538,1 +76.09878670226257,87.42056971926803,1 +84.43281996120035,43.53339331072109,1 +95.86155507093572,38.22527805795094,0 +75.01365838958247,30.60326323428011,0 +82.30705337399482,76.48196330235604,1 +69.36458875970939,97.71869196188608,1 +39.53833914367223,76.03681085115882,0 +53.9710521485623,89.20735013750205,1 +69.07014406283025,52.74046973016765,1 +67.94685547711617,46.67857410673128,0 +70.66150955499435,92.92713789364831,1 +76.97878372747498,47.57596364975532,1 +67.37202754570876,42.83843832029179,0 +89.67677575072079,65.79936592745237,1 +50.534788289883,48.85581152764205,0 +34.21206097786789,44.20952859866288,0 +77.9240914545704,68.9723599933059,1 +62.27101367004632,69.95445795447587,1 +80.1901807509566,44.82162893218353,1 +93.114388797442,38.80067033713209,0 +61.83020602312595,50.25610789244621,0 +38.78580379679423,64.99568095539578,0 +61.379289447425,72.80788731317097,1 +85.40451939411645,57.05198397627122,1 +52.10797973193984,63.12762376881715,0 +52.04540476831827,69.43286012045222,1 +40.23689373545111,71.16774802184875,0 +54.63510555424817,52.21388588061123,0 +33.91550010906887,98.86943574220611,0 +64.17698887494485,80.90806058670817,1 +74.78925295941542,41.57341522824434,0 +34.1836400264419,75.2377203360134,0 +83.90239366249155,56.30804621605327,1 +51.54772026906181,46.85629026349976,0 +94.44336776917852,65.56892160559052,1 +82.36875375713919,40.61825515970618,0 +51.04775177128865,45.82270145776001,0 +62.22267576120188,52.06099194836679,0 +77.19303492601364,70.45820000180959,1 +97.77159928000232,86.7278223300282,1 +62.07306379667647,96.76882412413983,1 +91.56497449807442,88.69629254546599,1 +79.94481794066932,74.16311935043758,1 +99.2725269292572,60.99903099844988,1 +90.54671411399852,43.39060180650027,1 +34.52451385320009,60.39634245837173,0 +50.2864961189907,49.80453881323059,0 +49.58667721632031,59.80895099453265,0 +97.64563396007767,68.86157272420604,1 +32.57720016809309,95.59854761387875,0 +74.24869136721598,69.82457122657193,1 +71.79646205863379,78.45356224515052,1 +75.3956114656803,85.75993667331619,1 +35.28611281526193,47.02051394723416,0 +56.25381749711624,39.26147251058019,0 +30.05882244669796,49.59297386723685,0 +44.66826172480893,66.45008614558913,0 +66.56089447242954,41.09209807936973,0 +40.45755098375164,97.53518548909936,1 +49.07256321908844,51.88321182073966,0 +80.27957401466998,92.11606081344084,1 +66.74671856944039,60.99139402740988,1 +32.72283304060323,43.30717306430063,0 +64.0393204150601,78.03168802018232,1 +72.34649422579923,96.22759296761404,1 +60.45788573918959,73.09499809758037,1 +58.84095621726802,75.85844831279042,1 +99.82785779692128,72.36925193383885,1 +47.26426910848174,88.47586499559782,1 +50.45815980285988,75.80985952982456,1 +60.45555629271532,42.50840943572217,0 +82.22666157785568,42.71987853716458,0 +88.9138964166533,69.80378889835472,1 +94.83450672430196,45.69430680250754,1 +67.31925746917527,66.58935317747915,1 +57.23870631569862,59.51428198012956,1 +80.36675600171273,90.96014789746954,1 +68.46852178591112,85.59430710452014,1 +42.0754545384731,78.84478600148043,0 +75.47770200533905,90.42453899753964,1 +78.63542434898018,96.64742716885644,1 +52.34800398794107,60.76950525602592,0 +94.09433112516793,77.15910509073893,1 +90.44855097096364,87.50879176484702,1 +55.48216114069585,35.57070347228866,0 +74.49269241843041,84.84513684930135,1 +89.84580670720979,45.35828361091658,1 +83.48916274498238,48.38028579728175,1 +42.2617008099817,87.10385094025457,1 +99.31500880510394,68.77540947206617,1 +55.34001756003703,64.9319380069486,1 +74.77589300092767,89.52981289513276,1 \ No newline at end of file