From c20cb040b391587f1edd5ff165c20897f6aac4b8 Mon Sep 17 00:00:00 2001 From: h2o4sure01 Date: Fri, 17 Apr 2020 15:09:16 +0100 Subject: [PATCH] Logistics Regression from sratch --- Untitled.ipynb | 276 +++++++++++++++++++++++++++++++++++++++++++++++++ exchange.py | 52 ++++++++++ 2 files changed, 328 insertions(+) create mode 100644 Untitled.ipynb create mode 100644 exchange.py diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..4d83f1a --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.datasets import load_breast_cancer" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "data = load_breast_cancer()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,\n", + " 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,\n", + " 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n", + " 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,\n", + " 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n", + " 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,\n", + " 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n", + " 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,\n", + " 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n", + " 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,\n", + " 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,\n", + " 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n", + " 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n", + " 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n", + " 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = data.data\n", + "y = data.target\n", + "y" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# the logistic function or sigmod function\n", + "def sigmod(X,w):\n", + " z = np.dot(X,w)\n", + " return 1 / (1 + np.exp(-z))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# calculate for the loss \n", + "\n", + "def loss(h,y):\n", + " return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def SGD(X, h, y):\n", + " return np.dot(X.T, (h - y)) / y.shape[0]\n", + "def update_w_l(w, l_rate, gradient):\n", + " return w - l_rate * gradient" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "num_iter = 100000\n", + "\n", + "intercept = np.ones((X.shape[0], 1)) \n", + "X = np.concatenate((intercept, X), axis=1)\n", + "theta = np.zeros(X.shape[1])\n", + "\n", + "for i in range(num_iter):\n", + " h = sigmod(X, theta)\n", + " gradient = SGD(X, h, y)\n", + " theta = update_w_l(theta, 0.1, gradient)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "result = sigmod(X, theta)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "result = pd.DataFrame(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
01.0
11.0
21.0
31.0
41.0
......
5641.0
5651.0
5661.0
5671.0
5681.0
\n", + "

569 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " 0\n", + "0 1.0\n", + "1 1.0\n", + "2 1.0\n", + "3 1.0\n", + "4 1.0\n", + ".. ...\n", + "564 1.0\n", + "565 1.0\n", + "566 1.0\n", + "567 1.0\n", + "568 1.0\n", + "\n", + "[569 rows x 1 columns]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/exchange.py b/exchange.py new file mode 100644 index 0000000..72d5f3d --- /dev/null +++ b/exchange.py @@ -0,0 +1,52 @@ +import numpy as np +from sklearn.datasets import load_breast_cancer +data = load_breast_cancer() + +X = data.data[:, :2] +y = (data.target != 0) * 1 +class LogisticRegression: + def __init__(self, lr=0.01, num_iter=10, fit_intercept=True): + self.lr = lr + self.num_iter = num_iter + self.fit_intercept = fit_intercept + + def intercepts(self, X): + intercept = np.ones((X.shape[0], 1)) + return np.concatenate((intercept, X), axis=1) + + def sigmoid(self, z): + return 1 / (1 + np.exp(-z)) + def loss(self, h, y): + return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean() + + def fit(self, X, y): + if self.fit_intercept: + X = self.intercepts(X) + + # weights initialization + self.theta = np.zeros(X.shape[1]) + + for i in range(self.num_iter): + z = np.dot(X, self.theta) + h = self.sigmoid(z) + gradient = np.dot(X.T, (h - y)) / y.size + self.theta -= self.lr * gradient + + if(i % 100 == 0): + z = np.dot(X, self.theta) + h = self.sigmoid(z) + print(f'loss: {self.loss(h, y)} \t') + + def predict_prob(self, X): + if self.fit_intercept: + X = self.intercepts(X) + + return self.sigmoid(np.dot(X, self.theta)) + + def predict(self, X, threshold): + return self.predict_prob(X) >= threshold +model = LogisticRegression(lr=0.1, num_iter=30000) +model.fit(X, y) +preds = model.predict(X, threshold=10) +# accuracy +accuracy = print(preds) \ No newline at end of file