Merge pull request #15 from msamsami/update-compatibility

Update compatibility, minor improvements
msamsami · Oct 5, 2023 · dac5411 · dac5411
2 parents e7a0edc + f8c7963
commit dac5411
Show file tree

Hide file tree

Showing 11 changed files with 62 additions and 70 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # WNB: General and weighted naive Bayes classifiers
 
-![](https://img.shields.io/badge/version-v0.1.15-green)
-![](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9-blue)
+![](https://img.shields.io/badge/version-v0.1.16-green)
+![](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)
 ![](https://github.com/msamsami/weighted-naive-bayes/actions/workflows/python-publish.yml/badge.svg)
 [![](https://img.shields.io/pypi/v/wnb)](https://pypi.org/project/wnb/)
 ![](https://img.shields.io/pypi/dm/wnb)
@@ -34,7 +34,7 @@ of gaussian MLD-WNB.
 
 ## Install
 The easiest way to install the wnb library is by using `pip`:
-```commandline
+```
 pip install wnb
 ```
 This library is shipped as an all-in-one module implementation with minimalistic dependencies and requirements. 
@@ -97,6 +97,11 @@ To run the tests, install development requirements:
 pip install -r requirements_dev.txt
 ```
 
+Or, install the package with dev extras:
+```
+pip install wnb[dev]
+```
+
 Then, run pytest:
 ```
 pytest

diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -1,4 +1,5 @@
 pandas>=1.4.1
 scipy>=1.8.0
 scikit-learn>=1.0.2
-pytest==7.3.1
+pytest==7.3.1
+black>=23.9.0
diff --git a/setup.py b/setup.py
@@ -1,11 +1,11 @@
 import codecs
 from os import path
-from setuptools import setup
+from setuptools import setup, find_packages
 
 
 setup(
     name="wnb",
-    version="0.1.15",
+    version="0.1.16",
     description="Python library for the implementations of general and weighted naive Bayes (WNB) classifiers.",
     keywords=["python", "bayes", "naivebayes", "classifier", "probabilistic"],
     author="Mehdi Samsami",
@@ -15,7 +15,7 @@
         path.join(path.abspath(path.dirname(__file__)), "README.md"), encoding="utf-8"
     ).read(),
     long_description_content_type="text/markdown",
-    packages=["wnb"],
+    packages=find_packages(),
     classifiers=[
         "Intended Audience :: Science/Research",
         "Intended Audience :: Developers",
@@ -26,9 +26,11 @@
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "License :: OSI Approved :: MIT License",
     ],
     python_requires=">=3.7",
     install_requires=["pandas==1.4.1", "scikit-learn>=1.0.2"],
-    extras_require={"dev": "pytest==7.3.1"},
+    extras_require={"dev": ["pytest==7.3.1", "black>=23.9.0"]},
 )
diff --git a/tests/test_gnb.py b/tests/test_gnb.py
@@ -71,7 +71,7 @@ def test_gnb_vs_sklearn_bernoulli():
     """
     rng = np.random.RandomState(1)
     X_ = rng.randint(2, size=(150, 100))
-    y_ = rng.randint(1, 5, size=(150, ))
+    y_ = rng.randint(1, 5, size=(150,))
 
     clf1 = GeneralNB(distributions=[D.BERNOULLI for _ in range(100)])
     clf1.fit(X_, y_)
@@ -101,18 +101,20 @@ def test_gnb_vs_sklearn_categorical():
         ["cat", "dog"],
         ["morning", "noon", "afternoon", "evening"],
         ["apple", "orange", "watermelon"],
-        ["black", "white"]
+        ["black", "white"],
     ]
     rng = np.random.RandomState(24)
     X_str_ = np.empty((150, 4)).astype("str")
     X_ = np.zeros((150, 4))
     for i, options in enumerate(categorical_values):
-        rnd_values = rng.randint(len(options), size=(150, ))
+        rnd_values = rng.randint(len(options), size=(150,))
         X_str_[:, i] = np.array(options)[rnd_values]
         X_[:, i] = rnd_values
-    y_ = rng.randint(1, 4, size=(150, ))
+    y_ = rng.randint(1, 4, size=(150,))
 
-    clf1 = GeneralNB(distributions=[D.CATEGORICAL for _ in range(len(categorical_values))])
+    clf1 = GeneralNB(
+        distributions=[D.CATEGORICAL for _ in range(len(categorical_values))]
+    )
     clf1.fit(X_str_, y_)
 
     clf2 = CategoricalNB(alpha=1e-10, force_alpha=True)

diff --git a/tests/test_gwnb.py b/tests/test_gwnb.py
@@ -79,7 +79,7 @@ def test_gwnb_priors():
     clf = GaussianWNB(priors=np.array([0.3, 0.7])).fit(X, y)
     assert_array_almost_equal(
         clf.predict_proba([[-0.1, -0.1]]),
-        np.array([[0.823571, 0.176429]]),
+        np.array([[0.82357095, 0.17642905]]),
         8,
     )
     assert_array_almost_equal(clf.class_prior_, np.array([0.3, 0.7]))

diff --git a/wnb/__init__.py b/wnb/__init__.py
@@ -1,16 +1,17 @@
-__version__ = "0.1.15"
+__version__ = "0.1.16"
 __author__ = "Mehdi Samsami"
 
+
+from ._base import ContinuousDistMixin, DiscreteDistMixin
+from ._enums import Distribution
+from .gnb import GeneralNB
+from .gwnb import GaussianWNB
+
+
 __all__ = [
     "GeneralNB",
     "GaussianWNB",
     "Distribution",
     "ContinuousDistMixin",
     "DiscreteDistMixin",
 ]
-
-
-from ._base import ContinuousDistMixin, DiscreteDistMixin
-from ._enums import Distribution
-from .gnb import GeneralNB
-from .gwnb import GaussianWNB
diff --git a/wnb/_base.py b/wnb/_base.py
@@ -53,8 +53,8 @@ def from_data(cls, data, **kwargs):
 
     @classmethod
     def _get_param_names(cls):
-        """Gets parameter names for the distribution instance.
-
+        """
+        Gets parameter names for the distribution instance.
         """
         init = getattr(cls.__init__, "deprecated_original", cls.__init__)
         if init is object.__init__:
@@ -97,7 +97,6 @@ def support(self) -> Union[List[float], Tuple[float, float]]:
 
         If support is a list, it represents a limited number of discrete values.
         If it is a tuple, it indicates a limited or unlimited range of continuous values.
-
         """
         return self._support
 
@@ -110,8 +109,6 @@ def _check_support(self, x):
                 "Value doesn't lie within the support of the distribution",
                 RuntimeWarning,
             )
-        else:
-            pass
 
     def __repr__(self) -> str:
         return "".join(
@@ -138,8 +135,8 @@ class ContinuousDistMixin(DistMixin, metaclass=ABCMeta):
     _type = "continuous"
 
     def __init__(self, **kwargs):
-        """Initializes an instance of the continuous probability distribution with given parameters.
-
+        """
+        Initializes an instance of the continuous probability distribution with given parameters.
         """
         pass
 
@@ -168,8 +165,8 @@ class DiscreteDistMixin(DistMixin, metaclass=ABCMeta):
     _type = "discrete"
 
     def __init__(self, **kwargs):
-        """Initializes an instance of the discrete probability distribution with given parameters.
-
+        """
+        Initializes an instance of the discrete probability distribution with given parameters.
         """
         pass
 

diff --git a/wnb/_enums.py b/wnb/_enums.py
@@ -1,6 +1,5 @@
 from enum import Enum
 
-
 __all__ = ["Distribution"]
 
 
@@ -17,6 +16,5 @@ class Distribution(str, Enum):
     GAMMA = "Gamma"
     BERNOULLI = "Bernoulli"
     CATEGORICAL = "Categorical"
-    # MULTINOMIAL = "Multinomial"
     GEOMETRIC = "Geometric"
     POISSON = "Poisson"
diff --git a/wnb/dist.py b/wnb/dist.py
@@ -1,12 +1,11 @@
-from typing import Any, Mapping, Sequence
+from typing import Any, Mapping
 
 import numpy as np
 from scipy.special import gamma
 
 from ._base import ContinuousDistMixin, DiscreteDistMixin
 from ._enums import Distribution as D
 
-
 __all__ = [
     "NormalDist",
     "LognormalDist",
@@ -16,7 +15,6 @@
     "GammaDist",
     "BernoulliDist",
     "CategoricalDist",
-    # 'MultinomialDist',
     "GeometricDist",
     "PoissonDist",
 ]
@@ -36,7 +34,7 @@ def from_data(cls, data: np.ndarray, **kwargs):
         return cls(mu=np.average(data), sigma=np.std(data))
 
     def pdf(self, x: float) -> float:
-        return (1.0 / np.sqrt(2 * np.pi * self.sigma ** 2)) * np.exp(
+        return (1.0 / np.sqrt(2 * np.pi * self.sigma**2)) * np.exp(
             -0.5 * (((x - self.mu) / self.sigma) ** 2)
         )
 
@@ -111,7 +109,7 @@ def from_data(cls, data, **kwargs):
 
     def pdf(self, x: float) -> float:
         return (
-            (self.alpha * self.x_m ** self.alpha) / x ** (self.alpha + 1)
+            (self.alpha * self.x_m**self.alpha) / x ** (self.alpha + 1)
             if x >= self.x_m
             else 0.0
         )
@@ -136,12 +134,12 @@ def from_data(cls, data, **kwargs):
             theta=(
                 n * np.sum(data * np.log(data)) - np.sum(data * np.sum(np.log(data)))
             )
-            / n ** 2,
+            / n**2,
         )
 
     def pdf(self, x: float) -> float:
         return (x ** (self.k - 1) * np.exp(-x / self.theta)) / (
-            gamma(self.k) * self.theta ** self.k
+            gamma(self.k) * self.theta**self.k
         )
 
 
@@ -159,7 +157,13 @@ def from_data(cls, data, **kwargs):
         return cls(p=((np.array(data) == 1).sum() + alpha) / len(data))
 
     def pmf(self, x: int) -> float:
-        return 0.0 if x not in self._support else self.p if x == 1 else 1 - self.p
+        if x not in self._support:
+            return 0.0
+        else:
+            if x == 1:
+                return self.p
+            else:
+                return 1 - self.p
 
 
 class CategoricalDist(DiscreteDistMixin):
@@ -180,28 +184,6 @@ def pmf(self, x: Any) -> float:
         return self.prob.get(x, 0.0)
 
 
-# class MultinomialDist(DiscreteDistMixin):
-#     name = D.MULTINOMIAL
-#
-#     def __init__(self, n: int, prob: Mapping[int, float]):
-#         self.n = n
-#         self.prob = prob
-#         self._support = [i for i in range(self.n+1)]
-#         super().__init__()
-#
-#     @classmethod
-#     def from_data(cls, data: Sequence[int], **kwargs):
-#         values, counts = np.unique(data, return_counts=True)
-#         return cls(n=int(np.sum(values)), prob={v: c / len(data) for v, c in zip(values, counts)})
-#
-#     def pmf(self, x: Sequence[int]) -> float:
-#         if sum(x) != self.n:
-#             return 0.0
-#         else:
-#             return np.math.factorial(self.n) * np.product([self.prob.get(v, 0.0)**v for v in x]) / \
-#                 np.product([np.math.factorial(v) for v in x])
-
-
 class GeometricDist(DiscreteDistMixin):
     name = D.GEOMETRIC
     _support = (1, np.inf)
@@ -231,7 +213,7 @@ def from_data(cls, data, **kwargs):
         return cls(rate=np.sum(data) / len(data))
 
     def pmf(self, x: int) -> float:
-        return (np.exp(-self.rate) * self.rate ** x) / np.math.factorial(x)
+        return (np.exp(-self.rate) * self.rate**x) / np.math.factorial(x)
 
 
 AllDistributions = {eval(cls).name: eval(cls) for cls in __all__}

diff --git a/wnb/gnb.py b/wnb/gnb.py
@@ -15,6 +15,10 @@
 from ._enums import Distribution
 from .dist import AllDistributions, NonNumericDistributions
 
+__all__ = [
+    "GeneralNB",
+]
+
 
 class GeneralNB(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):
     """
@@ -58,7 +62,7 @@ def _get_distributions(self):
         try:
             if self.distributions_ is not None:
                 return self.distributions_
-        except:
+        except Exception:
             return self.distributions or []
 
     def _check_inputs(self, X, y):
@@ -218,10 +222,6 @@ def fit(
 
         return self
 
-    def __predict(self, X):
-        p_hat = self.predict_log_proba(X)
-        return np.argmax(p_hat, axis=1)
-
     def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
         """Performs classification on an array of test vectors X.
 
@@ -261,7 +261,7 @@ def predict_log_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
         )
 
         # Check if the number of input features matches the data seen during fit
-        if not X.shape[1] == self.n_features_in_:
+        if X.shape[1] != self.n_features_in_:
             raise ValueError(
                 "Expected input with %d features, got %d instead."
                 % (self.n_features_in_, X.shape[1])

diff --git a/wnb/gwnb.py b/wnb/gwnb.py
@@ -14,6 +14,10 @@
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.multiclass import type_of_target
 
+__all__ = [
+    "GaussianWNB",
+]
+
 
 class GaussianWNB(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):
     """
@@ -361,7 +365,7 @@ def predict_log_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
         )
 
         # Check if the number of input features matches the data seen during fit
-        if not X.shape[1] == self.n_features_in_:
+        if X.shape[1] != self.n_features_in_:
             raise ValueError(
                 "Expected input with %d features, got %d instead."
                 % (self.n_features_in_, X.shape[1])