Skip to content

Commit

Permalink
Merge pull request #15 from msamsami/update-compatibility
Browse files Browse the repository at this point in the history
Update compatibility, minor improvements
  • Loading branch information
msamsami authored Oct 5, 2023
2 parents e7a0edc + f8c7963 commit dac5411
Show file tree
Hide file tree
Showing 11 changed files with 62 additions and 70 deletions.
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# WNB: General and weighted naive Bayes classifiers

![](https://img.shields.io/badge/version-v0.1.15-green)
![](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9-blue)
![](https://img.shields.io/badge/version-v0.1.16-green)
![](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)
![](https://github.com/msamsami/weighted-naive-bayes/actions/workflows/python-publish.yml/badge.svg)
[![](https://img.shields.io/pypi/v/wnb)](https://pypi.org/project/wnb/)
![](https://img.shields.io/pypi/dm/wnb)
Expand Down Expand Up @@ -34,7 +34,7 @@ of gaussian MLD-WNB.

## Install
The easiest way to install the wnb library is by using `pip`:
```commandline
```
pip install wnb
```
This library is shipped as an all-in-one module implementation with minimalistic dependencies and requirements.
Expand Down Expand Up @@ -97,6 +97,11 @@ To run the tests, install development requirements:
pip install -r requirements_dev.txt
```

Or, install the package with dev extras:
```
pip install wnb[dev]
```

Then, run pytest:
```
pytest
Expand Down
3 changes: 2 additions & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pandas>=1.4.1
scipy>=1.8.0
scikit-learn>=1.0.2
pytest==7.3.1
pytest==7.3.1
black>=23.9.0
10 changes: 6 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import codecs
from os import path
from setuptools import setup
from setuptools import setup, find_packages


setup(
name="wnb",
version="0.1.15",
version="0.1.16",
description="Python library for the implementations of general and weighted naive Bayes (WNB) classifiers.",
keywords=["python", "bayes", "naivebayes", "classifier", "probabilistic"],
author="Mehdi Samsami",
Expand All @@ -15,7 +15,7 @@
path.join(path.abspath(path.dirname(__file__)), "README.md"), encoding="utf-8"
).read(),
long_description_content_type="text/markdown",
packages=["wnb"],
packages=find_packages(),
classifiers=[
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
Expand All @@ -26,9 +26,11 @@
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"License :: OSI Approved :: MIT License",
],
python_requires=">=3.7",
install_requires=["pandas==1.4.1", "scikit-learn>=1.0.2"],
extras_require={"dev": "pytest==7.3.1"},
extras_require={"dev": ["pytest==7.3.1", "black>=23.9.0"]},
)
12 changes: 7 additions & 5 deletions tests/test_gnb.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_gnb_vs_sklearn_bernoulli():
"""
rng = np.random.RandomState(1)
X_ = rng.randint(2, size=(150, 100))
y_ = rng.randint(1, 5, size=(150, ))
y_ = rng.randint(1, 5, size=(150,))

clf1 = GeneralNB(distributions=[D.BERNOULLI for _ in range(100)])
clf1.fit(X_, y_)
Expand Down Expand Up @@ -101,18 +101,20 @@ def test_gnb_vs_sklearn_categorical():
["cat", "dog"],
["morning", "noon", "afternoon", "evening"],
["apple", "orange", "watermelon"],
["black", "white"]
["black", "white"],
]
rng = np.random.RandomState(24)
X_str_ = np.empty((150, 4)).astype("str")
X_ = np.zeros((150, 4))
for i, options in enumerate(categorical_values):
rnd_values = rng.randint(len(options), size=(150, ))
rnd_values = rng.randint(len(options), size=(150,))
X_str_[:, i] = np.array(options)[rnd_values]
X_[:, i] = rnd_values
y_ = rng.randint(1, 4, size=(150, ))
y_ = rng.randint(1, 4, size=(150,))

clf1 = GeneralNB(distributions=[D.CATEGORICAL for _ in range(len(categorical_values))])
clf1 = GeneralNB(
distributions=[D.CATEGORICAL for _ in range(len(categorical_values))]
)
clf1.fit(X_str_, y_)

clf2 = CategoricalNB(alpha=1e-10, force_alpha=True)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_gwnb.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_gwnb_priors():
clf = GaussianWNB(priors=np.array([0.3, 0.7])).fit(X, y)
assert_array_almost_equal(
clf.predict_proba([[-0.1, -0.1]]),
np.array([[0.823571, 0.176429]]),
np.array([[0.82357095, 0.17642905]]),
8,
)
assert_array_almost_equal(clf.class_prior_, np.array([0.3, 0.7]))
Expand Down
15 changes: 8 additions & 7 deletions wnb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
__version__ = "0.1.15"
__version__ = "0.1.16"
__author__ = "Mehdi Samsami"


from ._base import ContinuousDistMixin, DiscreteDistMixin
from ._enums import Distribution
from .gnb import GeneralNB
from .gwnb import GaussianWNB


__all__ = [
"GeneralNB",
"GaussianWNB",
"Distribution",
"ContinuousDistMixin",
"DiscreteDistMixin",
]


from ._base import ContinuousDistMixin, DiscreteDistMixin
from ._enums import Distribution
from .gnb import GeneralNB
from .gwnb import GaussianWNB
15 changes: 6 additions & 9 deletions wnb/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ def from_data(cls, data, **kwargs):

@classmethod
def _get_param_names(cls):
"""Gets parameter names for the distribution instance.
"""
Gets parameter names for the distribution instance.
"""
init = getattr(cls.__init__, "deprecated_original", cls.__init__)
if init is object.__init__:
Expand Down Expand Up @@ -97,7 +97,6 @@ def support(self) -> Union[List[float], Tuple[float, float]]:
If support is a list, it represents a limited number of discrete values.
If it is a tuple, it indicates a limited or unlimited range of continuous values.
"""
return self._support

Expand All @@ -110,8 +109,6 @@ def _check_support(self, x):
"Value doesn't lie within the support of the distribution",
RuntimeWarning,
)
else:
pass

def __repr__(self) -> str:
return "".join(
Expand All @@ -138,8 +135,8 @@ class ContinuousDistMixin(DistMixin, metaclass=ABCMeta):
_type = "continuous"

def __init__(self, **kwargs):
"""Initializes an instance of the continuous probability distribution with given parameters.
"""
Initializes an instance of the continuous probability distribution with given parameters.
"""
pass

Expand Down Expand Up @@ -168,8 +165,8 @@ class DiscreteDistMixin(DistMixin, metaclass=ABCMeta):
_type = "discrete"

def __init__(self, **kwargs):
"""Initializes an instance of the discrete probability distribution with given parameters.
"""
Initializes an instance of the discrete probability distribution with given parameters.
"""
pass

Expand Down
2 changes: 0 additions & 2 deletions wnb/_enums.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from enum import Enum


__all__ = ["Distribution"]


Expand All @@ -17,6 +16,5 @@ class Distribution(str, Enum):
GAMMA = "Gamma"
BERNOULLI = "Bernoulli"
CATEGORICAL = "Categorical"
# MULTINOMIAL = "Multinomial"
GEOMETRIC = "Geometric"
POISSON = "Poisson"
44 changes: 13 additions & 31 deletions wnb/dist.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from typing import Any, Mapping, Sequence
from typing import Any, Mapping

import numpy as np
from scipy.special import gamma

from ._base import ContinuousDistMixin, DiscreteDistMixin
from ._enums import Distribution as D


__all__ = [
"NormalDist",
"LognormalDist",
Expand All @@ -16,7 +15,6 @@
"GammaDist",
"BernoulliDist",
"CategoricalDist",
# 'MultinomialDist',
"GeometricDist",
"PoissonDist",
]
Expand All @@ -36,7 +34,7 @@ def from_data(cls, data: np.ndarray, **kwargs):
return cls(mu=np.average(data), sigma=np.std(data))

def pdf(self, x: float) -> float:
return (1.0 / np.sqrt(2 * np.pi * self.sigma ** 2)) * np.exp(
return (1.0 / np.sqrt(2 * np.pi * self.sigma**2)) * np.exp(
-0.5 * (((x - self.mu) / self.sigma) ** 2)
)

Expand Down Expand Up @@ -111,7 +109,7 @@ def from_data(cls, data, **kwargs):

def pdf(self, x: float) -> float:
return (
(self.alpha * self.x_m ** self.alpha) / x ** (self.alpha + 1)
(self.alpha * self.x_m**self.alpha) / x ** (self.alpha + 1)
if x >= self.x_m
else 0.0
)
Expand All @@ -136,12 +134,12 @@ def from_data(cls, data, **kwargs):
theta=(
n * np.sum(data * np.log(data)) - np.sum(data * np.sum(np.log(data)))
)
/ n ** 2,
/ n**2,
)

def pdf(self, x: float) -> float:
return (x ** (self.k - 1) * np.exp(-x / self.theta)) / (
gamma(self.k) * self.theta ** self.k
gamma(self.k) * self.theta**self.k
)


Expand All @@ -159,7 +157,13 @@ def from_data(cls, data, **kwargs):
return cls(p=((np.array(data) == 1).sum() + alpha) / len(data))

def pmf(self, x: int) -> float:
return 0.0 if x not in self._support else self.p if x == 1 else 1 - self.p
if x not in self._support:
return 0.0
else:
if x == 1:
return self.p
else:
return 1 - self.p


class CategoricalDist(DiscreteDistMixin):
Expand All @@ -180,28 +184,6 @@ def pmf(self, x: Any) -> float:
return self.prob.get(x, 0.0)


# class MultinomialDist(DiscreteDistMixin):
# name = D.MULTINOMIAL
#
# def __init__(self, n: int, prob: Mapping[int, float]):
# self.n = n
# self.prob = prob
# self._support = [i for i in range(self.n+1)]
# super().__init__()
#
# @classmethod
# def from_data(cls, data: Sequence[int], **kwargs):
# values, counts = np.unique(data, return_counts=True)
# return cls(n=int(np.sum(values)), prob={v: c / len(data) for v, c in zip(values, counts)})
#
# def pmf(self, x: Sequence[int]) -> float:
# if sum(x) != self.n:
# return 0.0
# else:
# return np.math.factorial(self.n) * np.product([self.prob.get(v, 0.0)**v for v in x]) / \
# np.product([np.math.factorial(v) for v in x])


class GeometricDist(DiscreteDistMixin):
name = D.GEOMETRIC
_support = (1, np.inf)
Expand Down Expand Up @@ -231,7 +213,7 @@ def from_data(cls, data, **kwargs):
return cls(rate=np.sum(data) / len(data))

def pmf(self, x: int) -> float:
return (np.exp(-self.rate) * self.rate ** x) / np.math.factorial(x)
return (np.exp(-self.rate) * self.rate**x) / np.math.factorial(x)


AllDistributions = {eval(cls).name: eval(cls) for cls in __all__}
Expand Down
12 changes: 6 additions & 6 deletions wnb/gnb.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
from ._enums import Distribution
from .dist import AllDistributions, NonNumericDistributions

__all__ = [
"GeneralNB",
]


class GeneralNB(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):
"""
Expand Down Expand Up @@ -58,7 +62,7 @@ def _get_distributions(self):
try:
if self.distributions_ is not None:
return self.distributions_
except:
except Exception:
return self.distributions or []

def _check_inputs(self, X, y):
Expand Down Expand Up @@ -218,10 +222,6 @@ def fit(

return self

def __predict(self, X):
p_hat = self.predict_log_proba(X)
return np.argmax(p_hat, axis=1)

def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
"""Performs classification on an array of test vectors X.
Expand Down Expand Up @@ -261,7 +261,7 @@ def predict_log_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
)

# Check if the number of input features matches the data seen during fit
if not X.shape[1] == self.n_features_in_:
if X.shape[1] != self.n_features_in_:
raise ValueError(
"Expected input with %d features, got %d instead."
% (self.n_features_in_, X.shape[1])
Expand Down
6 changes: 5 additions & 1 deletion wnb/gwnb.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.multiclass import type_of_target

__all__ = [
"GaussianWNB",
]


class GaussianWNB(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):
"""
Expand Down Expand Up @@ -361,7 +365,7 @@ def predict_log_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
)

# Check if the number of input features matches the data seen during fit
if not X.shape[1] == self.n_features_in_:
if X.shape[1] != self.n_features_in_:
raise ValueError(
"Expected input with %d features, got %d instead."
% (self.n_features_in_, X.shape[1])
Expand Down

0 comments on commit dac5411

Please sign in to comment.