diff --git a/CostGradient.py b/CostGradient.py new file mode 100644 index 0000000..e8c6d9b --- /dev/null +++ b/CostGradient.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[2]: + + +import numpy as np +from SigmoidFunction import sigmoid +def cost(coefs,X,y,lmbda): + m = len(y) + y_1 = np.multiply(y,np.log(sigmoid(np.dot(X,coefs)))) + y_0 = np.multiply(1-y,np.log(1-sigmoid(np.dot(X,coefs)))) + return np.sum(y_1 + y_0) / (-m) + np.sum(coefs[1:]**2) * lmbda /(2*m) +def gradient(coefs,X,y,lmbda): + m = len(y) + error = sigmoid(np.dot(X,coefs)) - y + grad_coefs = np.dot(X.T,error) / m + coefs * lmbda / m + grad_coefs[0] = grad_coefs[0] - coefs[0] * lmbda / m + return grad_coefs + diff --git a/DSN_logo.png b/DSN_logo.png deleted file mode 100644 index b88c2bc..0000000 Binary files a/DSN_logo.png and /dev/null differ diff --git a/Ogbonna_Chibuike_Stephen b/Ogbonna_Chibuike_Stephen new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Ogbonna_Chibuike_Stephen @@ -0,0 +1 @@ + diff --git a/README.md b/README.md index d3696d5..b6bd2f2 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,6 @@ # ML-Logistic-regression-algorithm-challenge -![DSN logo](DSN_logo.png)|DSN Algorithm Challenge| -|---|---| - -A lot of data scientists or machine learning enthusiasts do use various machine learning algorithms as a black box without knowing how they work or the mathematics behind it. The purpose of this challenge is to encourage the mathematical understanding of machine learning algorithms, their break and yield point. - -In summary, participants are encouraged to understand the fundamental concepts behind machine learning algorithms/models. - - -The rules and guidelines for this challenge are as follows: - -1. Ensure to register at https://bit.ly/dsnmlhack - -2. The algorithm challenge is open to all. - -3. Participants are expected to design and develop the Logistic Regression algorithm from scratch using Python or R programming. - -4. For python developers (numpy is advisable). - -5. To push your solution to us, make a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) to DSN's GitHub page at https://www.github.com/datasciencenigeria/ML-Logistic-regression-algorithm-challenge. Ensure to add your readme file to understand your code. - -6. The top 3 optimized code will be compensated as follows: - -- **1st position**: 20GB data plan. -- **2nd position**: 15GB data plan. -- **3rd position**: 10GB data plan. - -7. Add your scripts and readme.MD file as a folder saved as your full name (surname_first_middle name) by making a pull request to the repository. - ---- -For issues on this challenge kindly reach out to the AI+campus/city managers - -**Twitter**: [@DataScienceNIG](https://twitter.com/DataScienceNIG), [@elishatofunmi](https://twitter.com/Elishatofunmi), [@o_funminiyi](https://twitter.com/o_funminiyi), [@gbganalyst](https://twitter.com/gbganalyst) - -or - -**Call**: +2349062000119,+2349080564419. - -Good luck! +I created a Regulaized Logistic Regression that performs both binary and multi classifications wth Conjugate Gradient Descent as my optimizer. +First, I created two python scripts, Sigmoid function.py and CostGradient.py. These two script were imported into the Regularised logistic regression notebook. + diff --git a/RegularizedLogisticRegression.ipynb b/RegularizedLogisticRegression.ipynb new file mode 100644 index 0000000..cf4d6d8 --- /dev/null +++ b/RegularizedLogisticRegression.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import scipy.optimize as opt\n", + "from scipy.io import loadmat\n", + "from CostGradient import cost, gradient\n", + "from SigmoidFunction import sigmoid\n", + "class LogisticRegression:\n", + " '''\n", + " Regularised Logistic Regression\n", + " '''\n", + " def __init__(self,lmbda=0.1,num_iter=100,fit_intercept=True,threshold=0.5):\n", + " self.lmbda = lmbda\n", + " self.num_iter = num_iter\n", + " self.fit_intercept = fit_intercept\n", + " self.threshold = threshold\n", + " return None\n", + " \n", + " def intercept(self,X):\n", + " '''\n", + " This allows the y-intercept to be computed\n", + " '''\n", + " ones = np.ones((len(X),1))\n", + " return np.c_[ones,X]\n", + " \n", + " def fit(self,X,y):\n", + " if self.fit_intercept:\n", + " X = self.intercept(X)\n", + " #p = len(y)\n", + " (m,n) = X.shape\n", + " lmbda = self.lmbda\n", + " self.num_class = np.unique(y).size\n", + " \n", + " # binary logistic regression\n", + " if self.num_class == 2:\n", + " coefs = np.zeros((1,n))\n", + " coefs = opt.fmin_cg(f=cost,x0=coefs,fprime=gradient,\n", + " args=(X,y.flatten(),lmbda),maxiter=self.num_iter,disp=False)\n", + " \n", + " # One vs all (multiple class)\n", + " elif self.num_class > 2:\n", + " coefs = np.zeros((self.num_class,n))\n", + " for i in range(self.num_class):\n", + " digit = i \n", + " coefs[i] = opt.fmin_cg(f=cost,x0=coefs[i],fprime=gradient,\n", + " args=(X,(y==digit).flatten(),lmbda),maxiter=self.num_iter,disp=False)\n", + " self.coefs = coefs\n", + " \n", + " def predict_proba(self,X):\n", + " if self.fit_intercept:\n", + " X = self.intercept(X)\n", + " self.proba = sigmoid(np.dot(X,self.coefs.T))\n", + " return self.proba\n", + " \n", + " def predict(self,X):\n", + " if self.num_class == 2:\n", + " predict = (self.predict_proba(X)>=self.threshold).astype(int)\n", + " elif self.num_class > 2:\n", + " if self.fit_intercept:\n", + " X = self.intercept(X)\n", + " predict = np.argmax(np.dot(X,self.coefs.T),axis=1)\n", + " return predict\n", + " \n", + " def score(self,X,y):\n", + " pred = self.predict(X)\n", + " return np.mean(pred==y.flatten())*100\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/SigmoidFunction.py b/SigmoidFunction.py new file mode 100644 index 0000000..eaca743 --- /dev/null +++ b/SigmoidFunction.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +import numpy as np + +def sigmoid(z): + return 1/(1+np.exp(-z)) +