pycaret · vedika0806 · Oct 11, 2025
diff --git a/.ipynb_checkpoints/Sentiment_Analysis_SVM_Scikit-checkpoint.ipynb b/.ipynb_checkpoints/Sentiment_Analysis_SVM_Scikit-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/.ipynb_checkpoints/tests_sentiment_analysis_svm_scikit-checkpoint.ipynb b/.ipynb_checkpoints/tests_sentiment_analysis_svm_scikit-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Sentiment_Analysis_SVM_Scikit.ipynb b/Sentiment_Analysis_SVM_Scikit.ipynb
diff --git a/logs.log b/logs.log
@@ -0,0 +1,184 @@
+2025-10-10 22:54:20,400:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2025-10-10 22:54:20,400:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2025-10-10 22:54:20,400:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2025-10-10 22:54:20,400:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2025-10-10 22:54:41,537:INFO:PyCaret ClassificationExperiment
+2025-10-10 22:54:41,538:INFO:Logging name: clf-default-name
+2025-10-10 22:54:41,538:INFO:ML Usecase: MLUsecase.CLASSIFICATION
+2025-10-10 22:54:41,538:INFO:version 3.3.2
+2025-10-10 22:54:41,538:INFO:Initializing setup()
+2025-10-10 22:54:41,538:INFO:self.USI: b3fb
+2025-10-10 22:54:41,538:INFO:self._variable_keys: {'seed', 'y_train', 'X_train', 'y', 'X_test', 'exp_id', 'memory', 'target_param', 'fix_imbalance', 'gpu_param', 'X', 'is_multiclass', 'gpu_n_jobs_param', 'idx', 'pipeline', 'n_jobs_param', 'y_test', 'logging_param', '_available_plots', '_ml_usecase', 'fold_shuffle_param', 'log_plots_param', 'fold_groups_param', 'exp_name_log', 'fold_generator', 'html_param', 'data', 'USI'}
+2025-10-10 22:54:41,538:INFO:Checking environment
+2025-10-10 22:54:41,538:INFO:python_version: 3.10.18
+2025-10-10 22:54:41,538:INFO:python_build: ('main', 'Jun  5 2025 08:13:51')
+2025-10-10 22:54:41,538:INFO:machine: x86_64
+2025-10-10 22:54:41,539:INFO:platform: macOS-10.16-x86_64-i386-64bit
+2025-10-10 22:54:41,539:INFO:Memory: svmem(total=8589934592, available=2397630464, percent=72.1, used=4710584320, free=16334848, active=2383736832, inactive=2367266816, wired=2326847488)
+2025-10-10 22:54:41,540:INFO:Physical Core: 4
+2025-10-10 22:54:41,540:INFO:Logical Core: 8
+2025-10-10 22:54:41,540:INFO:Checking libraries
+2025-10-10 22:54:41,540:INFO:System:
+2025-10-10 22:54:41,540:INFO:    python: 3.10.18 (main, Jun  5 2025, 08:13:51) [Clang 14.0.6 ]
+2025-10-10 22:54:41,540:INFO:executable: /opt/miniconda3/envs/py310/bin/python
+2025-10-10 22:54:41,540:INFO:   machine: macOS-10.16-x86_64-i386-64bit
+2025-10-10 22:54:41,540:INFO:PyCaret required dependencies:
+2025-10-10 22:54:44,930:INFO:                 pip: 25.2
+2025-10-10 22:54:44,930:INFO:          setuptools: 78.1.1
+2025-10-10 22:54:44,930:INFO:             pycaret: 3.3.2
+2025-10-10 22:54:44,930:INFO:             IPython: 8.30.0
+2025-10-10 22:54:44,930:INFO:          ipywidgets: 8.1.7
+2025-10-10 22:54:44,930:INFO:                tqdm: 4.67.1
+2025-10-10 22:54:44,930:INFO:               numpy: 1.26.4
+2025-10-10 22:54:44,931:INFO:              pandas: 2.1.4
+2025-10-10 22:54:44,931:INFO:              jinja2: 3.1.6
+2025-10-10 22:54:44,931:INFO:               scipy: 1.11.4
+2025-10-10 22:54:44,931:INFO:              joblib: 1.3.2
+2025-10-10 22:54:44,931:INFO:             sklearn: 1.4.2
+2025-10-10 22:54:44,931:INFO:                pyod: 2.0.5
+2025-10-10 22:54:44,931:INFO:            imblearn: 0.14.0
+2025-10-10 22:54:44,931:INFO:   category_encoders: 2.7.0
+2025-10-10 22:54:44,931:INFO:            lightgbm: 4.6.0
+2025-10-10 22:54:44,931:INFO:               numba: 0.61.0
+2025-10-10 22:54:44,931:INFO:            requests: 2.32.4
+2025-10-10 22:54:44,931:INFO:          matplotlib: 3.7.5
+2025-10-10 22:54:44,931:INFO:          scikitplot: 0.3.7
+2025-10-10 22:54:44,931:INFO:         yellowbrick: 1.5
+2025-10-10 22:54:44,931:INFO:              plotly: 5.24.1
+2025-10-10 22:54:44,931:INFO:    plotly-resampler: Not installed
+2025-10-10 22:54:44,932:INFO:             kaleido: 1.1.0
+2025-10-10 22:54:44,932:INFO:           schemdraw: 0.15
+2025-10-10 22:54:44,932:INFO:         statsmodels: 0.14.5
+2025-10-10 22:54:44,932:INFO:              sktime: 0.26.0
+2025-10-10 22:54:44,932:INFO:               tbats: 1.1.3
+2025-10-10 22:54:44,932:INFO:            pmdarima: 2.0.4
+2025-10-10 22:54:44,932:INFO:              psutil: 5.9.0
+2025-10-10 22:54:44,932:INFO:          markupsafe: 3.0.2
+2025-10-10 22:54:44,932:INFO:             pickle5: Not installed
+2025-10-10 22:54:44,932:INFO:         cloudpickle: 3.1.1
+2025-10-10 22:54:44,932:INFO:         deprecation: 2.1.0
+2025-10-10 22:54:44,932:INFO:              xxhash: 3.6.0
+2025-10-10 22:54:44,932:INFO:           wurlitzer: 3.1.1
+2025-10-10 22:54:44,932:INFO:PyCaret optional dependencies:
+2025-10-10 22:55:07,337:INFO:                shap: 0.44.1
+2025-10-10 22:55:07,338:INFO:           interpret: 0.7.2
+2025-10-10 22:55:07,338:INFO:                umap: 0.5.7
+2025-10-10 22:55:07,338:INFO:     ydata_profiling: 4.17.0
+2025-10-10 22:55:07,338:INFO:  explainerdashboard: 0.5.1
+2025-10-10 22:55:07,338:INFO:             autoviz: Not installed
+2025-10-10 22:55:07,338:INFO:           fairlearn: 0.7.0
+2025-10-10 22:55:07,338:INFO:          deepchecks: Not installed
+2025-10-10 22:55:07,338:INFO:             xgboost: 3.0.5
+2025-10-10 22:55:07,338:INFO:            catboost: 1.1.1
+2025-10-10 22:55:07,338:INFO:              kmodes: 0.12.2
+2025-10-10 22:55:07,338:INFO:             mlxtend: 0.23.4
+2025-10-10 22:55:07,339:INFO:       statsforecast: 1.5.0
+2025-10-10 22:55:07,339:INFO:        tune_sklearn: 0.5.0
+2025-10-10 22:55:07,339:INFO:                 ray: 2.49.2
+2025-10-10 22:55:07,339:INFO:            hyperopt: 0.2.7
+2025-10-10 22:55:07,339:INFO:              optuna: 4.5.0
+2025-10-10 22:55:07,339:INFO:               skopt: 0.10.2
+2025-10-10 22:55:07,339:INFO:              mlflow: 3.4.0
+2025-10-10 22:55:07,339:INFO:              gradio: 5.49.1
+2025-10-10 22:55:07,339:INFO:             fastapi: 0.118.3
+2025-10-10 22:55:07,339:INFO:             uvicorn: 0.37.0
+2025-10-10 22:55:07,339:INFO:              m2cgen: 0.10.0
+2025-10-10 22:55:07,339:INFO:           evidently: 0.4.40
+2025-10-10 22:55:07,339:INFO:               fugue: 0.8.7
+2025-10-10 22:55:07,339:INFO:           streamlit: Not installed
+2025-10-10 22:55:07,339:INFO:             prophet: Not installed
+2025-10-10 22:55:07,339:INFO:None
+2025-10-10 22:55:07,339:INFO:Set up data.
+2025-10-10 22:55:07,365:INFO:Set up folding strategy.
+2025-10-10 22:55:07,365:INFO:Set up train/test split.
+2025-10-10 22:55:07,380:INFO:Set up index.
+2025-10-10 22:55:07,380:INFO:Assigning column types.
+2025-10-10 22:55:07,384:INFO:Engine successfully changes for model 'lr' to 'sklearn'.
+2025-10-10 22:55:07,443:INFO:Engine for model 'knn' has not been set explicitly, hence returning None.
+2025-10-10 22:55:07,448:INFO:Engine for model 'rbfsvm' has not been set explicitly, hence returning None.
+2025-10-10 22:55:07,495:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:07,499:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:08,587:INFO:Engine for model 'knn' has not been set explicitly, hence returning None.
+2025-10-10 22:55:08,588:INFO:Engine for model 'rbfsvm' has not been set explicitly, hence returning None.
+2025-10-10 22:55:08,625:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:08,628:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:08,629:INFO:Engine successfully changes for model 'knn' to 'sklearn'.
+2025-10-10 22:55:08,688:INFO:Engine for model 'rbfsvm' has not been set explicitly, hence returning None.
+2025-10-10 22:55:08,724:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:08,728:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:08,788:INFO:Engine for model 'rbfsvm' has not been set explicitly, hence returning None.
+2025-10-10 22:55:08,825:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:08,828:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:08,829:INFO:Engine successfully changes for model 'rbfsvm' to 'sklearn'.
+2025-10-10 22:55:08,927:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:08,930:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:09,026:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:09,030:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:09,035:INFO:Preparing preprocessing pipeline...
+2025-10-10 22:55:09,037:INFO:Set up simple imputation.
+2025-10-10 22:55:09,037:INFO:Set up text embedding.
+2025-10-10 22:55:09,496:INFO:Finished creating preprocessing pipeline.
+2025-10-10 22:55:09,501:INFO:Pipeline: Pipeline(memory=FastMemory(location=/var/folders/yj/3b60s0r14sd5g_658xzzpr1r0000gn/T/joblib),
+         steps=[('numerical_imputer',
+                 TransformerWrapper(exclude=None, include=[],
+                                    transformer=SimpleImputer(add_indicator=False,
+                                                              copy=True,
+                                                              fill_value=None,
+                                                              keep_empty_features=False,
+                                                              missing_values=nan,
+                                                              strategy='mean'))),
+                ('categorical_imputer',
+                 TransformerWrapper(exclude=None, include=[],
+                                    transformer=SimpleImputer(add_indicator=False,
+                                                              copy=True,
+                                                              fill_value=None,
+                                                              keep_empty_features=False,
+                                                              missing_values=nan,
+                                                              strategy='most_frequent'))),
+                ('text_embedding',
+                 TransformerWrapper(exclude=None, include=['text'],
+                                    transformer=EmbedTextFeatures(kwargs=None,
+                                                                  method='tf-idf')))],
+         verbose=False)
+2025-10-10 22:55:09,502:INFO:Creating final display dataframe.
+2025-10-10 22:55:21,714:INFO:Setup _display_container:                        Description             Value
+0                       Session id                42
+1                           Target             label
+2                      Target type        Multiclass
+3              Original data shape         (9000, 2)
+4           Transformed data shape     (9000, 14266)
+5      Transformed train set shape     (6300, 14266)
+6       Transformed test set shape     (2700, 14266)
+7                    Text features                 1
+8                       Preprocess              True
+9                  Imputation type            simple
+10              Numeric imputation              mean
+11          Categorical imputation              mode
+12  Text features embedding method            tf-idf
+13                  Fold Generator   StratifiedKFold
+14                     Fold Number                 3
+15                        CPU Jobs                -1
+16                         Use GPU             False
+17                  Log Experiment             False
+18                 Experiment Name  clf-default-name
+19                             USI              b3fb
+2025-10-10 22:55:21,814:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:21,818:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:21,915:INFO:Soft dependency imported: xgboost: 3.0.5
+2025-10-10 22:55:21,918:INFO:Soft dependency imported: catboost: 1.1.1
+2025-10-10 22:55:21,921:INFO:setup() successfully completed in 40.38s...............
+2025-10-10 22:55:40,926:INFO:Initializing create_model()
+2025-10-10 22:55:40,927:INFO:create_model(self=<pycaret.classification.oop.ClassificationExperiment object at 0x108503f40>, estimator=svm, fold=None, round=4, cross_validation=True, predict=True, fit_kwargs=None, groups=None, refit=True, probability_threshold=None, experiment_custom_tags=None, verbose=True, system=True, add_to_model_list=True, metrics=None, display=None, model_only=True, return_train_score=False, error_score=0.0, kwargs={})
+2025-10-10 22:55:40,927:INFO:Checking exceptions
+2025-10-10 22:55:40,958:INFO:Importing libraries
+2025-10-10 22:55:40,958:INFO:Copying training dataset
+2025-10-10 22:55:40,969:INFO:Defining folds
+2025-10-10 22:55:40,970:INFO:Declaring metric variables
+2025-10-10 22:55:40,977:INFO:Importing untrained model
+2025-10-10 22:55:40,983:INFO:SVM - Linear Kernel Imported successfully
+2025-10-10 22:55:40,994:INFO:Starting cross validation
+2025-10-10 22:55:41,157:INFO:Cross validating with StratifiedKFold(n_splits=3, random_state=None, shuffle=False), n_jobs=-1
diff --git a/tests/__pycache__/test_sentiment_analysis_svm_scikit.cpython-310-pytest-7.4.4.pyc b/tests/__pycache__/test_sentiment_analysis_svm_scikit.cpython-310-pytest-7.4.4.pyc
diff --git a/tests/test_sentiment_analysis_svm_scikit.ipynb b/tests/test_sentiment_analysis_svm_scikit.ipynb
@@ -0,0 +1,24 @@
+"""
+Unit test for the SVM Sentiment Analysis example.
+Ensures the model can train and predict without errors.
+"""
+
+import pandas as pd
+from sklearn.pipeline import make_pipeline
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import LinearSVC
+
+def tests_model_prediction():
+    """Train an SVM and check predictions."""
+    df = pd.DataFrame({
+        "text": ["I love this!", "This is bad.", "It's okay."],
+        "label": [2, 0, 1]  # 2=Positive, 0=Negative, 1=Neutral
+    })
+
+    model = make_pipeline(TfidfVectorizer(), LinearSVC())
+    model.fit(df["text"], df["label"])
+
+    preds = model.predict(["Awesome product!", "Horrible experience"])
+    assert all(p in [0, 1, 2] for p in preds)
+
+    print("Unit test passed successfully.")
diff --git a/tests/test_sentiment_analysis_svm_scikit.py b/tests/test_sentiment_analysis_svm_scikit.py
@@ -0,0 +1,24 @@
+"""
+Unit test for the SVM Sentiment Analysis example.
+Ensures the model can train and predict without errors.
+"""
+
+import pandas as pd
+from sklearn.pipeline import make_pipeline
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import LinearSVC
+
+def tests_model_prediction():
+    """Train an SVM and check predictions."""
+    df = pd.DataFrame({
+        "text": ["I love this!", "This is bad.", "It's okay."],
+        "label": [2, 0, 1]  # 2=Positive, 0=Negative, 1=Neutral
+    })
+
+    model = make_pipeline(TfidfVectorizer(), LinearSVC())
+    model.fit(df["text"], df["label"])
+
+    preds = model.predict(["Awesome product!", "Horrible experience"])
+    assert all(p in [0, 1, 2] for p in preds)
+
+    print("Unit test passed successfully.")
diff --git a/tests_sentiment_analysis_svm_scikit.ipynb b/tests_sentiment_analysis_svm_scikit.ipynb
@@ -0,0 +1,66 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "55acd39d-3a31-44c7-8cab-ffd0ec339bc9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Unit test for the SVM Sentiment Analysis example.\n",
+    "Ensures the model can train and predict without errors.\n",
+    "\"\"\"\n",
+    "\n",
+    "import pandas as pd\n",
+    "from sklearn.pipeline import make_pipeline\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.svm import LinearSVC\n",
+    "\n",
+    "def tests_model_prediction():\n",
+    "    \"\"\"Train an SVM and check predictions.\"\"\"\n",
+    "    df = pd.DataFrame({\n",
+    "        \"text\": [\"I love this!\", \"This is bad.\", \"It's okay.\"],\n",
+    "        \"label\": [2, 0, 1]  # 2=Positive, 0=Negative, 1=Neutral\n",
+    "    })\n",
+    "\n",
+    "    model = make_pipeline(TfidfVectorizer(), LinearSVC())\n",
+    "    model.fit(df[\"text\"], df[\"label\"])\n",
+    "\n",
+    "    preds = model.predict([\"Awesome product!\", \"Horrible experience\"])\n",
+    "    assert all(p in [0, 1, 2] for p in preds)\n",
+    "\n",
+    "    print(\"Unit test passed successfully.\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac1fec51-4881-4997-8f32-f6f0e47c3daf",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (py310)",
+   "language": "python",
+   "name": "py310"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}