From 6b7aecd13da55a62f6008e374af86f89219be375 Mon Sep 17 00:00:00 2001
From: Deekshant Kaul <44500641+dkaul1995@users.noreply.github.com>
Date: Sat, 11 Oct 2025 13:24:16 -0700
Subject: [PATCH] Add unit test for lightweight SVM sentiment analysis
---
...iment_Analysis_SVM_Scikit-checkpoint.ipynb | 6 +
...iment_analysis_svm_scikit-checkpoint.ipynb | 6 +
Sentiment_Analysis_SVM_Scikit.ipynb | 747 ++++++++++++++++++
logs.log | 184 +++++
...is_svm_scikit.cpython-310-pytest-7.4.4.pyc | Bin 0 -> 1549 bytes
.../test_sentiment_analysis_svm_scikit.ipynb | 24 +
tests/test_sentiment_analysis_svm_scikit.py | 24 +
tests_sentiment_analysis_svm_scikit.ipynb | 66 ++
8 files changed, 1057 insertions(+)
create mode 100644 .ipynb_checkpoints/Sentiment_Analysis_SVM_Scikit-checkpoint.ipynb
create mode 100644 .ipynb_checkpoints/tests_sentiment_analysis_svm_scikit-checkpoint.ipynb
create mode 100644 Sentiment_Analysis_SVM_Scikit.ipynb
create mode 100644 logs.log
create mode 100644 tests/__pycache__/test_sentiment_analysis_svm_scikit.cpython-310-pytest-7.4.4.pyc
create mode 100644 tests/test_sentiment_analysis_svm_scikit.ipynb
create mode 100644 tests/test_sentiment_analysis_svm_scikit.py
create mode 100644 tests_sentiment_analysis_svm_scikit.ipynb
diff --git a/.ipynb_checkpoints/Sentiment_Analysis_SVM_Scikit-checkpoint.ipynb b/.ipynb_checkpoints/Sentiment_Analysis_SVM_Scikit-checkpoint.ipynb
new file mode 100644
index 0000000..363fcab
--- /dev/null
+++ b/.ipynb_checkpoints/Sentiment_Analysis_SVM_Scikit-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/tests_sentiment_analysis_svm_scikit-checkpoint.ipynb b/.ipynb_checkpoints/tests_sentiment_analysis_svm_scikit-checkpoint.ipynb
new file mode 100644
index 0000000..363fcab
--- /dev/null
+++ b/.ipynb_checkpoints/tests_sentiment_analysis_svm_scikit-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Sentiment_Analysis_SVM_Scikit.ipynb b/Sentiment_Analysis_SVM_Scikit.ipynb
new file mode 100644
index 0000000..e01c7b0
--- /dev/null
+++ b/Sentiment_Analysis_SVM_Scikit.ipynb
@@ -0,0 +1,747 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "27794ed3-e7d7-4f0c-b597-9b18e50a2aa4",
+ "metadata": {},
+ "source": [
+ "# Sentiment Analysis with SVM using PyCaret\n",
+ "\n",
+ "This example demonstrates how to perform **text sentiment classification** using **Support Vector Machines (SVM)** in [PyCaret](https://pycaret.org/).\n",
+ "\n",
+ "We’ll use the **TweetEval** dataset (a benchmark dataset for Twitter sentiment: positive, negative, neutral).\n",
+ "\n",
+ "### Objectives\n",
+ "- Load text data using the Hugging Face `datasets` library \n",
+ "- Use PyCaret’s `text_features` parameter for automatic text preprocessing \n",
+ "- Train and evaluate SVM models (`svm` and `rbfsvm`) \n",
+ "- Test predictions on sample text\n",
+ "\n",
+ "> Labels: \n",
+ "> 0 → Negative, 1 → Neutral, 2 → Positive\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "53841238-e36c-4702-ae43-821580cb1ba6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: scikit-learn in ./env/lib/python3.13/site-packages (1.7.2)\n",
+ "Requirement already satisfied: datasets in ./env/lib/python3.13/site-packages (4.2.0)\n",
+ "Requirement already satisfied: numpy>=1.22.0 in ./env/lib/python3.13/site-packages (from scikit-learn) (2.3.3)\n",
+ "Requirement already satisfied: scipy>=1.8.0 in ./env/lib/python3.13/site-packages (from scikit-learn) (1.16.2)\n",
+ "Requirement already satisfied: joblib>=1.2.0 in ./env/lib/python3.13/site-packages (from scikit-learn) (1.5.2)\n",
+ "Requirement already satisfied: threadpoolctl>=3.1.0 in ./env/lib/python3.13/site-packages (from scikit-learn) (3.6.0)\n",
+ "Requirement already satisfied: filelock in ./env/lib/python3.13/site-packages (from datasets) (3.20.0)\n",
+ "Requirement already satisfied: pyarrow>=21.0.0 in ./env/lib/python3.13/site-packages (from datasets) (21.0.0)\n",
+ "Requirement already satisfied: dill<0.4.1,>=0.3.0 in ./env/lib/python3.13/site-packages (from datasets) (0.4.0)\n",
+ "Requirement already satisfied: pandas in ./env/lib/python3.13/site-packages (from datasets) (2.3.3)\n",
+ "Requirement already satisfied: requests>=2.32.2 in ./env/lib/python3.13/site-packages (from datasets) (2.32.5)\n",
+ "Requirement already satisfied: httpx<1.0.0 in ./env/lib/python3.13/site-packages (from datasets) (0.28.1)\n",
+ "Requirement already satisfied: tqdm>=4.66.3 in ./env/lib/python3.13/site-packages (from datasets) (4.67.1)\n",
+ "Requirement already satisfied: xxhash in ./env/lib/python3.13/site-packages (from datasets) (3.6.0)\n",
+ "Requirement already satisfied: multiprocess<0.70.17 in ./env/lib/python3.13/site-packages (from datasets) (0.70.16)\n",
+ "Requirement already satisfied: fsspec<=2025.9.0,>=2023.1.0 in ./env/lib/python3.13/site-packages (from fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (2025.9.0)\n",
+ "Requirement already satisfied: huggingface-hub<2.0,>=0.25.0 in ./env/lib/python3.13/site-packages (from datasets) (0.35.3)\n",
+ "Requirement already satisfied: packaging in ./env/lib/python3.13/site-packages (from datasets) (25.0)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in ./env/lib/python3.13/site-packages (from datasets) (6.0.3)\n",
+ "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in ./env/lib/python3.13/site-packages (from fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (3.13.0)\n",
+ "Requirement already satisfied: anyio in ./env/lib/python3.13/site-packages (from httpx<1.0.0->datasets) (4.11.0)\n",
+ "Requirement already satisfied: certifi in ./env/lib/python3.13/site-packages (from httpx<1.0.0->datasets) (2025.10.5)\n",
+ "Requirement already satisfied: httpcore==1.* in ./env/lib/python3.13/site-packages (from httpx<1.0.0->datasets) (1.0.9)\n",
+ "Requirement already satisfied: idna in ./env/lib/python3.13/site-packages (from httpx<1.0.0->datasets) (3.10)\n",
+ "Requirement already satisfied: h11>=0.16 in ./env/lib/python3.13/site-packages (from httpcore==1.*->httpx<1.0.0->datasets) (0.16.0)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in ./env/lib/python3.13/site-packages (from huggingface-hub<2.0,>=0.25.0->datasets) (4.15.0)\n",
+ "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in ./env/lib/python3.13/site-packages (from huggingface-hub<2.0,>=0.25.0->datasets) (1.1.10)\n",
+ "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in ./env/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (2.6.1)\n",
+ "Requirement already satisfied: aiosignal>=1.4.0 in ./env/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (1.4.0)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in ./env/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (25.4.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in ./env/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (1.8.0)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in ./env/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (6.7.0)\n",
+ "Requirement already satisfied: propcache>=0.2.0 in ./env/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (0.4.1)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.17.0 in ./env/lib/python3.13/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets) (1.22.0)\n",
+ "Requirement already satisfied: charset_normalizer<4,>=2 in ./env/lib/python3.13/site-packages (from requests>=2.32.2->datasets) (3.4.3)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in ./env/lib/python3.13/site-packages (from requests>=2.32.2->datasets) (2.5.0)\n",
+ "Requirement already satisfied: sniffio>=1.1 in ./env/lib/python3.13/site-packages (from anyio->httpx<1.0.0->datasets) (1.3.1)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in ./env/lib/python3.13/site-packages (from pandas->datasets) (2.9.0.post0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in ./env/lib/python3.13/site-packages (from pandas->datasets) (2025.2)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in ./env/lib/python3.13/site-packages (from pandas->datasets) (2025.2)\n",
+ "Requirement already satisfied: six>=1.5 in ./env/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
+ "\n",
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.1.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n",
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -U scikit-learn datasets\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "a177a2d0-3a86-44db-a730-7a9a33d6aff3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " text | \n",
+ " label | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 11449 | \n",
+ " I forgot all about Ice Cube being in the movie... | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 26433 | \n",
+ " playoffs are finally set. Chardon plays warren... | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 33669 | \n",
+ " Are we just going to ignore the fact that Ice ... | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 33013 | \n",
+ " If you live in the South Orlando area\\u002c be... | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 13399 | \n",
+ " First record of Colin Baker at the BBC: BBC2 s... | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " text label\n",
+ "11449 I forgot all about Ice Cube being in the movie... 0\n",
+ "26433 playoffs are finally set. Chardon plays warren... 1\n",
+ "33669 Are we just going to ignore the fact that Ice ... 1\n",
+ "33013 If you live in the South Orlando area\\u002c be... 1\n",
+ "13399 First record of Colin Baker at the BBC: BBC2 s... 1"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from datasets import load_dataset\n",
+ "import pandas as pd\n",
+ "\n",
+ "# Load TweetEval dataset (3 classes: 0=Negative, 1=Neutral, 2=Positive)\n",
+ "ds = load_dataset(\"cardiffnlp/tweet_eval\", \"sentiment\")\n",
+ "\n",
+ "# Convert to Pandas DataFrame and take small subset for speed\n",
+ "df = ds[\"train\"].to_pandas().sample(5000, random_state=42)\n",
+ "df.head()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "a4b37bf5-4bf6-4314-8cc1-04f071eeeb9f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " df[\"text\"], df[\"label\"], test_size=0.2, random_state=42\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "cb0f6c8e-89d5-4edf-817a-2fd9a86887a1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sklearn/svm/_classes.py:31: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "Pipeline(steps=[('tfidfvectorizer', TfidfVectorizer(max_features=5000)),\n",
+ " ('linearsvc', LinearSVC())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
+ ],
+ "text/plain": [
+ "Pipeline(steps=[('tfidfvectorizer', TfidfVectorizer(max_features=5000)),\n",
+ " ('linearsvc', LinearSVC())])"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.pipeline import make_pipeline\n",
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+ "from sklearn.svm import LinearSVC\n",
+ "\n",
+ "# TF-IDF + Linear SVM\n",
+ "model = make_pipeline(TfidfVectorizer(max_features=5000), LinearSVC())\n",
+ "\n",
+ "# Train\n",
+ "model.fit(X_train, y_train)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "1400ddc1-4188-4d08-8a5a-86d3644bc188",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " Negative 0.50 0.31 0.38 170\n",
+ " Neutral 0.62 0.64 0.63 443\n",
+ " Positive 0.62 0.70 0.66 387\n",
+ "\n",
+ " accuracy 0.61 1000\n",
+ " macro avg 0.58 0.55 0.56 1000\n",
+ "weighted avg 0.60 0.61 0.60 1000\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.metrics import classification_report\n",
+ "\n",
+ "preds = model.predict(X_test)\n",
+ "print(classification_report(y_test, preds, target_names=[\"Negative\",\"Neutral\",\"Positive\"]))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "5cf4f718-5f87-4a26-ab42-611c8b2ec289",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Positive', 'Negative', 'Positive']"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def predict_sentiment(texts):\n",
+ " labels = [\"Negative\",\"Neutral\",\"Positive\"]\n",
+ " preds = model.predict(texts)\n",
+ " return [labels[p] for p in preds]\n",
+ "\n",
+ "predict_sentiment([\n",
+ " \"I absolutely love this!\",\n",
+ " \"This is terrible.\",\n",
+ " \"It's fine, nothing special.\"\n",
+ "])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "1d30e38a-616a-41be-96db-67664c27bca6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model runs correctly\n"
+ ]
+ }
+ ],
+ "source": [
+ "def test_model_prediction():\n",
+ " sample = [\"Good work!\", \"Awful experience\", \"Not bad\"]\n",
+ " preds = model.predict(sample)\n",
+ " assert all(p in [0,1,2] for p in preds)\n",
+ " print(\"Model runs correctly\")\n",
+ "\n",
+ "test_model_prediction()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ade2929a-f09d-46cb-a85e-16fdb5b59d83",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python (py310)",
+ "language": "python",
+ "name": "py310"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.18"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/logs.log b/logs.log
new file mode 100644
index 0000000..e670218
--- /dev/null
+++ b/logs.log
@@ -0,0 +1,184 @@
+2025-10-10 22:54:20,400:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2025-10-10 22:54:20,400:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2025-10-10 22:54:20,400:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2025-10-10 22:54:20,400:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2025-10-10 22:54:41,537:INFO:PyCaret ClassificationExperiment
+2025-10-10 22:54:41,538:INFO:Logging name: clf-default-name
+2025-10-10 22:54:41,538:INFO:ML Usecase: MLUsecase.CLASSIFICATION
+2025-10-10 22:54:41,538:INFO:version 3.3.2
+2025-10-10 22:54:41,538:INFO:Initializing setup()
+2025-10-10 22:54:41,538:INFO:self.USI: b3fb
+2025-10-10 22:54:41,538:INFO:self._variable_keys: {'seed', 'y_train', 'X_train', 'y', 'X_test', 'exp_id', 'memory', 'target_param', 'fix_imbalance', 'gpu_param', 'X', 'is_multiclass', 'gpu_n_jobs_param', 'idx', 'pipeline', 'n_jobs_param', 'y_test', 'logging_param', '_available_plots', '_ml_usecase', 'fold_shuffle_param', 'log_plots_param', 'fold_groups_param', 'exp_name_log', 'fold_generator', 'html_param', 'data', 'USI'}
+2025-10-10 22:54:41,538:INFO:Checking environment
+2025-10-10 22:54:41,538:INFO:python_version: 3.10.18
+2025-10-10 22:54:41,538:INFO:python_build: ('main', 'Jun 5 2025 08:13:51')
+2025-10-10 22:54:41,538:INFO:machine: x86_64
+2025-10-10 22:54:41,539:INFO:platform: macOS-10.16-x86_64-i386-64bit
+2025-10-10 22:54:41,539:INFO:Memory: svmem(total=8589934592, available=2397630464, percent=72.1, used=4710584320, free=16334848, active=2383736832, inactive=2367266816, wired=2326847488)
+2025-10-10 22:54:41,540:INFO:Physical Core: 4
+2025-10-10 22:54:41,540:INFO:Logical Core: 8
+2025-10-10 22:54:41,540:INFO:Checking libraries
+2025-10-10 22:54:41,540:INFO:System:
+2025-10-10 22:54:41,540:INFO: python: 3.10.18 (main, Jun 5 2025, 08:13:51) [Clang 14.0.6 ]
+2025-10-10 22:54:41,540:INFO:executable: /opt/miniconda3/envs/py310/bin/python
+2025-10-10 22:54:41,540:INFO: machine: macOS-10.16-x86_64-i386-64bit
+2025-10-10 22:54:41,540:INFO:PyCaret required dependencies:
+2025-10-10 22:54:44,930:INFO: pip: 25.2
+2025-10-10 22:54:44,930:INFO: setuptools: 78.1.1
+2025-10-10 22:54:44,930:INFO: pycaret: 3.3.2
+2025-10-10 22:54:44,930:INFO: IPython: 8.30.0
+2025-10-10 22:54:44,930:INFO: ipywidgets: 8.1.7
+2025-10-10 22:54:44,930:INFO: tqdm: 4.67.1
+2025-10-10 22:54:44,930:INFO: numpy: 1.26.4
+2025-10-10 22:54:44,931:INFO: pandas: 2.1.4
+2025-10-10 22:54:44,931:INFO: jinja2: 3.1.6
+2025-10-10 22:54:44,931:INFO: scipy: 1.11.4
+2025-10-10 22:54:44,931:INFO: joblib: 1.3.2
+2025-10-10 22:54:44,931:INFO: sklearn: 1.4.2
+2025-10-10 22:54:44,931:INFO: pyod: 2.0.5
+2025-10-10 22:54:44,931:INFO: imblearn: 0.14.0
+2025-10-10 22:54:44,931:INFO: category_encoders: 2.7.0
+2025-10-10 22:54:44,931:INFO: lightgbm: 4.6.0
+2025-10-10 22:54:44,931:INFO: numba: 0.61.0
+2025-10-10 22:54:44,931:INFO: requests: 2.32.4
+2025-10-10 22:54:44,931:INFO: matplotlib: 3.7.5
+2025-10-10 22:54:44,931:INFO: scikitplot: 0.3.7
+2025-10-10 22:54:44,931:INFO: yellowbrick: 1.5
+2025-10-10 22:54:44,931:INFO: plotly: 5.24.1
+2025-10-10 22:54:44,931:INFO: plotly-resampler: Not installed
+2025-10-10 22:54:44,932:INFO: kaleido: 1.1.0
+2025-10-10 22:54:44,932:INFO: schemdraw: 0.15
+2025-10-10 22:54:44,932:INFO: statsmodels: 0.14.5
+2025-10-10 22:54:44,932:INFO: sktime: 0.26.0
+2025-10-10 22:54:44,932:INFO: tbats: 1.1.3
+2025-10-10 22:54:44,932:INFO: pmdarima: 2.0.4
+2025-10-10 22:54:44,932:INFO: psutil: 5.9.0
+2025-10-10 22:54:44,932:INFO: markupsafe: 3.0.2
+2025-10-10 22:54:44,932:INFO: pickle5: Not installed
+2025-10-10 22:54:44,932:INFO: cloudpickle: 3.1.1
+2025-10-10 22:54:44,932:INFO: deprecation: 2.1.0
+2025-10-10 22:54:44,932:INFO: xxhash: 3.6.0
+2025-10-10 22:54:44,932:INFO: wurlitzer: 3.1.1
+2025-10-10 22:54:44,932:INFO:PyCaret optional dependencies:
+2025-10-10 22:55:07,337:INFO: shap: 0.44.1
+2025-10-10 22:55:07,338:INFO: interpret: 0.7.2
+2025-10-10 22:55:07,338:INFO: umap: 0.5.7
+2025-10-10 22:55:07,338:INFO: ydata_profiling: 4.17.0
+2025-10-10 22:55:07,338:INFO: explainerdashboard: 0.5.1
+2025-10-10 22:55:07,338:INFO: autoviz: Not installed
+2025-10-10 22:55:07,338:INFO: fairlearn: 0.7.0
+2025-10-10 22:55:07,338:INFO: deepchecks: Not installed
+2025-10-10 22:55:07,338:INFO: xgboost: 3.0.5
+2025-10-10 22:55:07,338:INFO: catboost: 1.1.1
+2025-10-10 22:55:07,338:INFO: kmodes: 0.12.2
+2025-10-10 22:55:07,338:INFO: mlxtend: 0.23.4
+2025-10-10 22:55:07,339:INFO: statsforecast: 1.5.0
+2025-10-10 22:55:07,339:INFO: tune_sklearn: 0.5.0
+2025-10-10 22:55:07,339:INFO: ray: 2.49.2
+2025-10-10 22:55:07,339:INFO: hyperopt: 0.2.7
+2025-10-10 22:55:07,339:INFO: optuna: 4.5.0
+2025-10-10 22:55:07,339:INFO: skopt: 0.10.2
+2025-10-10 22:55:07,339:INFO: mlflow: 3.4.0
+2025-10-10 22:55:07,339:INFO: gradio: 5.49.1
+2025-10-10 22:55:07,339:INFO: fastapi: 0.118.3
+2025-10-10 22:55:07,339:INFO: uvicorn: 0.37.0
+2025-10-10 22:55:07,339:INFO: m2cgen: 0.10.0
+2025-10-10 22:55:07,339:INFO: evidently: 0.4.40
+2025-10-10 22:55:07,339:INFO: fugue: 0.8.7
+2025-10-10 22:55:07,339:INFO: streamlit: Not installed
+2025-10-10 22:55:07,339:INFO: prophet: Not installed
+2025-10-10 22:55:07,339:INFO:None
+2025-10-10 22:55:07,339:INFO:Set up data.
+2025-10-10 22:55:07,365:INFO:Set up folding strategy.
+2025-10-10 22:55:07,365:INFO:Set up train/test split.
+2025-10-10 22:55:07,380:INFO:Set up index.
+2025-10-10 22:55:07,380:INFO:Assigning column types.
+2025-10-10 22:55:07,384:INFO:Engine successfully changes for model 'lr' to 'sklearn'.
+2025-10-10 22:55:07,443:INFO:Engine for model 'knn' has not been set explicitly, hence returning None.
+2025-10-10 22:55:07,448:INFO:Engine for model 'rbfsvm' has not been set explicitly, hence returning None.
+2025-10-10 22:55:07,495:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:07,499:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:08,587:INFO:Engine for model 'knn' has not been set explicitly, hence returning None.
+2025-10-10 22:55:08,588:INFO:Engine for model 'rbfsvm' has not been set explicitly, hence returning None.
+2025-10-10 22:55:08,625:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:08,628:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:08,629:INFO:Engine successfully changes for model 'knn' to 'sklearn'.
+2025-10-10 22:55:08,688:INFO:Engine for model 'rbfsvm' has not been set explicitly, hence returning None.
+2025-10-10 22:55:08,724:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:08,728:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:08,788:INFO:Engine for model 'rbfsvm' has not been set explicitly, hence returning None.
+2025-10-10 22:55:08,825:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:08,828:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:08,829:INFO:Engine successfully changes for model 'rbfsvm' to 'sklearn'.
+2025-10-10 22:55:08,927:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:08,930:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:09,026:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:09,030:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:09,035:INFO:Preparing preprocessing pipeline...
+2025-10-10 22:55:09,037:INFO:Set up simple imputation.
+2025-10-10 22:55:09,037:INFO:Set up text embedding.
+2025-10-10 22:55:09,496:INFO:Finished creating preprocessing pipeline.
+2025-10-10 22:55:09,501:INFO:Pipeline: Pipeline(memory=FastMemory(location=/var/folders/yj/3b60s0r14sd5g_658xzzpr1r0000gn/T/joblib),
+ steps=[('numerical_imputer',
+ TransformerWrapper(exclude=None, include=[],
+ transformer=SimpleImputer(add_indicator=False,
+ copy=True,
+ fill_value=None,
+ keep_empty_features=False,
+ missing_values=nan,
+ strategy='mean'))),
+ ('categorical_imputer',
+ TransformerWrapper(exclude=None, include=[],
+ transformer=SimpleImputer(add_indicator=False,
+ copy=True,
+ fill_value=None,
+ keep_empty_features=False,
+ missing_values=nan,
+ strategy='most_frequent'))),
+ ('text_embedding',
+ TransformerWrapper(exclude=None, include=['text'],
+ transformer=EmbedTextFeatures(kwargs=None,
+ method='tf-idf')))],
+ verbose=False)
+2025-10-10 22:55:09,502:INFO:Creating final display dataframe.
+2025-10-10 22:55:21,714:INFO:Setup _display_container: Description Value
+0 Session id 42
+1 Target label
+2 Target type Multiclass
+3 Original data shape (9000, 2)
+4 Transformed data shape (9000, 14266)
+5 Transformed train set shape (6300, 14266)
+6 Transformed test set shape (2700, 14266)
+7 Text features 1
+8 Preprocess True
+9 Imputation type simple
+10 Numeric imputation mean
+11 Categorical imputation mode
+12 Text features embedding method tf-idf
+13 Fold Generator StratifiedKFold
+14 Fold Number 3
+15 CPU Jobs -1
+16 Use GPU False
+17 Log Experiment False
+18 Experiment Name clf-default-name
+19 USI b3fb
+2025-10-10 22:55:21,814:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:21,818:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:21,915:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:21,918:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:21,921:INFO:setup() successfully completed in 40.38s...............
+2025-10-10 22:55:40,926:INFO:Initializing create_model()
+2025-10-10 22:55:40,927:INFO:create_model(self=, estimator=svm, fold=None, round=4, cross_validation=True, predict=True, fit_kwargs=None, groups=None, refit=True, probability_threshold=None, experiment_custom_tags=None, verbose=True, system=True, add_to_model_list=True, metrics=None, display=None, model_only=True, return_train_score=False, error_score=0.0, kwargs={})
+2025-10-10 22:55:40,927:INFO:Checking exceptions
+2025-10-10 22:55:40,958:INFO:Importing libraries
+2025-10-10 22:55:40,958:INFO:Copying training dataset
+2025-10-10 22:55:40,969:INFO:Defining folds
+2025-10-10 22:55:40,970:INFO:Declaring metric variables
+2025-10-10 22:55:40,977:INFO:Importing untrained model
+2025-10-10 22:55:40,983:INFO:SVM - Linear Kernel Imported successfully
+2025-10-10 22:55:40,994:INFO:Starting cross validation
+2025-10-10 22:55:41,157:INFO:Cross validating with StratifiedKFold(n_splits=3, random_state=None, shuffle=False), n_jobs=-1
diff --git a/tests/__pycache__/test_sentiment_analysis_svm_scikit.cpython-310-pytest-7.4.4.pyc b/tests/__pycache__/test_sentiment_analysis_svm_scikit.cpython-310-pytest-7.4.4.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4a9d04bea28f20dc0726cbdfd4b7d152cfcb676
GIT binary patch
literal 1549
zcmZux%WfMt6eZ`O$LL|BDU5j0cG93|1}MgElI|QMZjzz_TBCp+Ed~e%)R3|V;ygl9
zb~KfMw#sK@VP)43=)T{a@nJXOA8Qu(7o`0AYbCxajT6PBwq1MrV>!lyEoK6_2FlC#iz
zmdlA?vR<1NlyN^HxvvCCbD!kYF9oA%qWsHLofi}3Ga(8gL+=_KLB|2sy^M@mT&5-C
zY0iMyIvu5SG+>D;L|QQcRQm^*BI0E5Br%_DN&`BkK7lI#1W@P-pMe7J3|j9%fw$HY
z&yli}y+F!YpoKNZGkoRF5OsDDpu3Bf7Dm7|j2dTVy+vEJ{RYjfr)Yuawrb97Xq}lO
z2h^h-)w*gg(FKx^ly}up%_SD!La%FjIfi-Y&+H|xTW5)B_pQBy=C0}i)1SFZyuiO<
z+H=s{o;zxb_7@mp(SsOlRd-L<($&fv1(S1@j5j)_MJ~g@uDZ{CUR*GcJ(YK=?kO}r
z{D*{wmG@kIDg9zhrm*tbq>jJ<*9HPhJBqzl+T~=(xM-T$7C^Oqbjf6qF*vE9lSJLA
zZhc<}kq$Y7XqQZ+EKgWsHrW)jHDoK2rrrkl6%>8hGzwGDX8<|*3NjS@vI4udL6~p}
z7vT7JV~Z8;Yg~%9VGaEA)7}fogxsTyjpaGX)tF59UO7#OU|@HhbaKyR=n7-G$<3Io
zb2F9~SuB%uoT{*#>h{4I%fSJ0xB~}B2f+Ge2US76NnGb_yv|ZkH$31)LbyB(-+xzq
zMI;!d{4eg6(}#ideqML{M>Vw{$a{vlAIQL)YnyN$*xD|q`v&eCcz9fW`r&SsWP87yj7VWljJD;N!!whIo7bq0LG;;%9x)P;eM^iip}9-K1S#rlRzdpJ5v$F;
d;dPzlwd<_|&~L)op}9=bP!9Us?c(}p{RgrNy@UV&
literal 0
HcmV?d00001
diff --git a/tests/test_sentiment_analysis_svm_scikit.ipynb b/tests/test_sentiment_analysis_svm_scikit.ipynb
new file mode 100644
index 0000000..d0fa792
--- /dev/null
+++ b/tests/test_sentiment_analysis_svm_scikit.ipynb
@@ -0,0 +1,24 @@
+"""
+Unit test for the SVM Sentiment Analysis example.
+Ensures the model can train and predict without errors.
+"""
+
+import pandas as pd
+from sklearn.pipeline import make_pipeline
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import LinearSVC
+
+def tests_model_prediction():
+ """Train an SVM and check predictions."""
+ df = pd.DataFrame({
+ "text": ["I love this!", "This is bad.", "It's okay."],
+ "label": [2, 0, 1] # 2=Positive, 0=Negative, 1=Neutral
+ })
+
+ model = make_pipeline(TfidfVectorizer(), LinearSVC())
+ model.fit(df["text"], df["label"])
+
+ preds = model.predict(["Awesome product!", "Horrible experience"])
+ assert all(p in [0, 1, 2] for p in preds)
+
+ print("Unit test passed successfully.")
\ No newline at end of file
diff --git a/tests/test_sentiment_analysis_svm_scikit.py b/tests/test_sentiment_analysis_svm_scikit.py
new file mode 100644
index 0000000..bb1c56d
--- /dev/null
+++ b/tests/test_sentiment_analysis_svm_scikit.py
@@ -0,0 +1,24 @@
+"""
+Unit test for the SVM Sentiment Analysis example.
+Ensures the model can train and predict without errors.
+"""
+
+import pandas as pd
+from sklearn.pipeline import make_pipeline
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import LinearSVC
+
+def tests_model_prediction():
+ """Train an SVM and check predictions."""
+ df = pd.DataFrame({
+ "text": ["I love this!", "This is bad.", "It's okay."],
+ "label": [2, 0, 1] # 2=Positive, 0=Negative, 1=Neutral
+ })
+
+ model = make_pipeline(TfidfVectorizer(), LinearSVC())
+ model.fit(df["text"], df["label"])
+
+ preds = model.predict(["Awesome product!", "Horrible experience"])
+ assert all(p in [0, 1, 2] for p in preds)
+
+ print("Unit test passed successfully.")
diff --git a/tests_sentiment_analysis_svm_scikit.ipynb b/tests_sentiment_analysis_svm_scikit.ipynb
new file mode 100644
index 0000000..4b7f814
--- /dev/null
+++ b/tests_sentiment_analysis_svm_scikit.ipynb
@@ -0,0 +1,66 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "55acd39d-3a31-44c7-8cab-ffd0ec339bc9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\"\"\"\n",
+ "Unit test for the SVM Sentiment Analysis example.\n",
+ "Ensures the model can train and predict without errors.\n",
+ "\"\"\"\n",
+ "\n",
+ "import pandas as pd\n",
+ "from sklearn.pipeline import make_pipeline\n",
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+ "from sklearn.svm import LinearSVC\n",
+ "\n",
+ "def tests_model_prediction():\n",
+ " \"\"\"Train an SVM and check predictions.\"\"\"\n",
+ " df = pd.DataFrame({\n",
+ " \"text\": [\"I love this!\", \"This is bad.\", \"It's okay.\"],\n",
+ " \"label\": [2, 0, 1] # 2=Positive, 0=Negative, 1=Neutral\n",
+ " })\n",
+ "\n",
+ " model = make_pipeline(TfidfVectorizer(), LinearSVC())\n",
+ " model.fit(df[\"text\"], df[\"label\"])\n",
+ "\n",
+ " preds = model.predict([\"Awesome product!\", \"Horrible experience\"])\n",
+ " assert all(p in [0, 1, 2] for p in preds)\n",
+ "\n",
+ " print(\"Unit test passed successfully.\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ac1fec51-4881-4997-8f32-f6f0e47c3daf",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python (py310)",
+ "language": "python",
+ "name": "py310"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.18"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}