Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 135 additions & 0 deletions Olutomilayo_Amazing-Grace_lgd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 21 16:49:53 2020

@author: AMAZING-GRACE
"""


import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import fmin_tnc

class LogisticRegressionUsingGD:

@staticmethod
def sigmoid(x):
# Activation function used to map any real value between 0 and 1
return 1 / (1 + np.exp(-x))

@staticmethod
def net_input(theta, x):
# Computes the weighted sum of inputs Similar to Linear Regression

return np.dot(x, theta)

def probability(self, theta, x):
# Calculates the probability that an instance belongs to a particular class

return self.sigmoid(self.net_input(theta, x))

def cost_function(self, theta, x, y):
# Computes the cost function for all the training samples
m = x.shape[0]
total_cost = -(1 / m) * np.sum(
y * np.log(self.probability(theta, x)) + (1 - y) * np.log(
1 - self.probability(theta, x)))
return total_cost

def gradient(self, theta, x, y):
# Computes the gradient of the cost function at the point theta
m = x.shape[0]
return (1 / m) * np.dot(x.T, self.sigmoid(self.net_input(theta, x)) - y)

def fit(self, x, y, theta):
"""trains the model from the training data
Uses the fmin_tnc function that is used to find the minimum for any function
It takes arguments as
1) func : function to minimize
2) x0 : initial values for the parameters
3) fprime: gradient for the function defined by 'func'
4) args: arguments passed to the function
Parameters
----------
x: array-like, shape = [n_samples, n_features]
Training samples
y: array-like, shape = [n_samples, n_target_values]
Target classes
theta: initial weights
Returns
-------
self: An instance of self
"""

opt_weights = fmin_tnc(func=self.cost_function, x0=theta, fprime=self.gradient,
args=(x, y.flatten()))
self.w_ = opt_weights[0]
return self

def predict(self, x):
""" Predicts the class labels
Parameters
----------
x: array-like, shape = [n_samples, n_features]
Test samples
Returns
-------
predicted class labels
"""
theta = self.w_[:, np.newaxis]
return self.probability(theta, x)

def accuracy(self, x, actual_classes, probab_threshold=0.5):
"""Computes the accuracy of the classifier
Parameters
----------
x: array-like, shape = [n_samples, n_features]
Training samples
actual_classes : class labels from the training data set
probab_threshold: threshold/cutoff to categorize the samples into different classes
Returns
-------
accuracy: accuracy of the model
"""
predicted_classes = (self.predict(x) >= probab_threshold).astype(int)
predicted_classes = predicted_classes.flatten()
accuracy = np.mean(predicted_classes == actual_classes)
return accuracy * 100


################ TESTING OUR MODEL ###############################################

data = pd.read_csv("marks.txt")

# X = feature values, all the columns except the last column
X = data.iloc[:, :-1]

# y = target values, last column of the data frame
y = data.iloc[:, -1]

# filter out the applicants that got admitted
admitted = data.loc[y == 1]

# filter out the applicants that din't get admission
not_admitted = data.loc[y == 0]

# plots
plt.scatter(admitted.iloc[:, 0], admitted.iloc[:, 1], s=10, label='Admitted')
plt.scatter(not_admitted.iloc[:, 0], not_admitted.iloc[:, 1], s=10,
label='Not Admitted')

# preparing the data for building the model

X = np.c_[np.ones((X.shape[0], 1)), X]
y = y[:, np.newaxis]
theta = np.zeros((X.shape[1], 1))

model = LogisticRegressionUsingGD()
model.fit(X, y, theta)
accuracy = model.accuracy(X, y.flatten())
parameters = model.w_
print("The accuracy of the model is {}".format(accuracy))
print("The model parameters using Gradient descent")
print("\n")
print(parameters)
55 changes: 14 additions & 41 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,41 +1,14 @@
# ML-Logistic-regression-algorithm-challenge


![DSN logo](DSN_logo.png)|DSN Algorithm Challenge|
|---|---|

A lot of data scientists or machine learning enthusiasts do use various machine learning algorithms as a black box without knowing how they work or the mathematics behind it. The purpose of this challenge is to encourage the mathematical understanding of machine learning algorithms, their break and yield point.

In summary, participants are encouraged to understand the fundamental concepts behind machine learning algorithms/models.


The rules and guidelines for this challenge are as follows:

1. Ensure to register at https://bit.ly/dsnmlhack

2. The algorithm challenge is open to all.

3. Participants are expected to design and develop the Logistic Regression algorithm from scratch using Python or R programming.

4. For python developers (numpy is advisable).

5. To push your solution to us, make a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) to DSN's GitHub page at https://www.github.com/datasciencenigeria/ML-Logistic-regression-algorithm-challenge. Ensure to add your readme file to understand your code.

6. The top 3 optimized code will be compensated as follows:

- **1st position**: 20GB data plan.
- **2nd position**: 15GB data plan.
- **3rd position**: 10GB data plan.

7. Add your scripts and readme.MD file as a folder saved as your full name (surname_first_middle name) by making a pull request to the repository.

---
For issues on this challenge kindly reach out to the AI+campus/city managers

**Twitter**: [@DataScienceNIG](https://twitter.com/DataScienceNIG), [@elishatofunmi](https://twitter.com/Elishatofunmi), [@o_funminiyi](https://twitter.com/o_funminiyi), [@gbganalyst](https://twitter.com/gbganalyst)

or

**Call**: +2349062000119,+2349080564419.

Good luck!
# Olutomilayo Amazing-Grace Logistics_Regression_from_Scratch
Implementing standard logistic regression from scratch

Logistic Regression is a generalized Linear Regression in which we do not output the weighted
sum of inputs directly, but is passed through a function (sigmoid) that can map any real value between 0
and 1.

In the Logistic regression class; the hypothesis (sigmoid, net_input, probability functions), the cost function, gradient descent, train (fit, predict funtions) and accuracy were written.

The created model was tested on marks.txt data
The numpy module was used for mathematical calculations
The matplotlib module was used for plotting generated data
The scipy module was used to compute the minimum value in relation to the gradient descent

100 changes: 100 additions & 0 deletions marks.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
34.62365962451697,78.0246928153624,0
30.28671076822607,43.89499752400101,0
35.84740876993872,72.90219802708364,0
60.18259938620976,86.30855209546826,1
79.0327360507101,75.3443764369103,1
45.08327747668339,56.3163717815305,0
61.10666453684766,96.51142588489624,1
75.02474556738889,46.55401354116538,1
76.09878670226257,87.42056971926803,1
84.43281996120035,43.53339331072109,1
95.86155507093572,38.22527805795094,0
75.01365838958247,30.60326323428011,0
82.30705337399482,76.48196330235604,1
69.36458875970939,97.71869196188608,1
39.53833914367223,76.03681085115882,0
53.9710521485623,89.20735013750205,1
69.07014406283025,52.74046973016765,1
67.94685547711617,46.67857410673128,0
70.66150955499435,92.92713789364831,1
76.97878372747498,47.57596364975532,1
67.37202754570876,42.83843832029179,0
89.67677575072079,65.79936592745237,1
50.534788289883,48.85581152764205,0
34.21206097786789,44.20952859866288,0
77.9240914545704,68.9723599933059,1
62.27101367004632,69.95445795447587,1
80.1901807509566,44.82162893218353,1
93.114388797442,38.80067033713209,0
61.83020602312595,50.25610789244621,0
38.78580379679423,64.99568095539578,0
61.379289447425,72.80788731317097,1
85.40451939411645,57.05198397627122,1
52.10797973193984,63.12762376881715,0
52.04540476831827,69.43286012045222,1
40.23689373545111,71.16774802184875,0
54.63510555424817,52.21388588061123,0
33.91550010906887,98.86943574220611,0
64.17698887494485,80.90806058670817,1
74.78925295941542,41.57341522824434,0
34.1836400264419,75.2377203360134,0
83.90239366249155,56.30804621605327,1
51.54772026906181,46.85629026349976,0
94.44336776917852,65.56892160559052,1
82.36875375713919,40.61825515970618,0
51.04775177128865,45.82270145776001,0
62.22267576120188,52.06099194836679,0
77.19303492601364,70.45820000180959,1
97.77159928000232,86.7278223300282,1
62.07306379667647,96.76882412413983,1
91.56497449807442,88.69629254546599,1
79.94481794066932,74.16311935043758,1
99.2725269292572,60.99903099844988,1
90.54671411399852,43.39060180650027,1
34.52451385320009,60.39634245837173,0
50.2864961189907,49.80453881323059,0
49.58667721632031,59.80895099453265,0
97.64563396007767,68.86157272420604,1
32.57720016809309,95.59854761387875,0
74.24869136721598,69.82457122657193,1
71.79646205863379,78.45356224515052,1
75.3956114656803,85.75993667331619,1
35.28611281526193,47.02051394723416,0
56.25381749711624,39.26147251058019,0
30.05882244669796,49.59297386723685,0
44.66826172480893,66.45008614558913,0
66.56089447242954,41.09209807936973,0
40.45755098375164,97.53518548909936,1
49.07256321908844,51.88321182073966,0
80.27957401466998,92.11606081344084,1
66.74671856944039,60.99139402740988,1
32.72283304060323,43.30717306430063,0
64.0393204150601,78.03168802018232,1
72.34649422579923,96.22759296761404,1
60.45788573918959,73.09499809758037,1
58.84095621726802,75.85844831279042,1
99.82785779692128,72.36925193383885,1
47.26426910848174,88.47586499559782,1
50.45815980285988,75.80985952982456,1
60.45555629271532,42.50840943572217,0
82.22666157785568,42.71987853716458,0
88.9138964166533,69.80378889835472,1
94.83450672430196,45.69430680250754,1
67.31925746917527,66.58935317747915,1
57.23870631569862,59.51428198012956,1
80.36675600171273,90.96014789746954,1
68.46852178591112,85.59430710452014,1
42.0754545384731,78.84478600148043,0
75.47770200533905,90.42453899753964,1
78.63542434898018,96.64742716885644,1
52.34800398794107,60.76950525602592,0
94.09433112516793,77.15910509073893,1
90.44855097096364,87.50879176484702,1
55.48216114069585,35.57070347228866,0
74.49269241843041,84.84513684930135,1
89.84580670720979,45.35828361091658,1
83.48916274498238,48.38028579728175,1
42.2617008099817,87.10385094025457,1
99.31500880510394,68.77540947206617,1
55.34001756003703,64.9319380069486,1
74.77589300092767,89.52981289513276,1