Skip to content

Commit

Permalink
Merge pull request #56 from vc1492a/dev
Browse files Browse the repository at this point in the history
merge dev into feature branch
  • Loading branch information
IroNEDR committed Jun 1, 2024
2 parents 600df44 + 2662adf commit e3af240
Show file tree
Hide file tree
Showing 2 changed files with 220 additions and 30 deletions.
168 changes: 162 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,168 @@ nasaValve
rel_research
PyNomaly/loop_dev.py
/PyNomaly.egg-info/
.pytest_cache
build
htmlcov/
*.egg
*.pyc
.coverage
*.coverage.*
.coveragerc
venv/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

82 changes: 58 additions & 24 deletions tests/test_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,28 @@ def X_n8() -> np.ndarray:
return X


@pytest.fixture()
def X_n20_scores() -> tuple[np.ndarray, np.ndarray]:
"""
Fixture that returns a tuple containing a 20 element numpy array
and the precalculated loOP scores based on that array.
:return: tuple(input_data,exptected_scores)
"""
input_data = np.array([0.02059752, 0.32629926, 0.63036653, 0.94409321,
0.63251097, 0.47598494, 0.80204026, 0.34845067,
0.81556468, 0.89183, 0.25210317, 0.11460502,
0.19953434, 0.36955067, 0.06038041, 0.34527368,
0.56621582, 0.90533649, 0.33773613, 0.71573306])

expected_scores = np.array([0.6356276742921594, 0.0, 0.0,
0.48490790006974044, 0.0, 0.0, 0.0, 0.0,
0.021728288376168012, 0.28285086151683225,
0.0, 0.18881886507113213, 0.0, 0.0,
0.45350246469681843, 0.0, 0.07886635748113013,
0.3349068501560546, 0.0, 0.0])
return (input_data, expected_scores)


@pytest.fixture()
def X_n120() -> np.ndarray:
"""
Expand Down Expand Up @@ -122,6 +144,18 @@ def test_loop(X_n8) -> None:
assert np.min(score[-2:]) > np.max(score[:-2])


def test_regression(X_n20_scores) -> None:
"""
Tests for potential regression errors by comparing current results
to the exptected results. Any changes to the code should still return
the same result given the same dataset
"""
input_data, expected_scores = X_n20_scores
clf = loop.LocalOutlierProbability(input_data).fit()
scores = clf.local_outlier_probabilities
assert np.array_equal(scores, expected_scores)


def test_loop_performance(X_n120) -> None:
"""
Using a set of known anomalies (labels), tests the performance (using
Expand Down Expand Up @@ -170,7 +204,7 @@ def test_input_nodata(X_n140_outliers) -> None:
assert len(record) == 1
# check that the message matches
assert record[0].message.args[
0] == "Data or a distance matrix must be provided."
0] == "Data or a distance matrix must be provided."


def test_input_incorrect_type(X_n140_outliers) -> None:
Expand All @@ -192,8 +226,8 @@ def test_input_incorrect_type(X_n140_outliers) -> None:
assert len(record) == 1
# check that the message matches
assert record[0].message.args[
0] == "Argument 'n_neighbors' is not of type (<class 'int'>, " \
"<class 'numpy.integer'>)."
0] == "Argument 'n_neighbors' is not of type (<class 'int'>, " \
"<class 'numpy.integer'>)."


def test_input_neighbor_zero(X_n120) -> None:
Expand All @@ -213,7 +247,7 @@ def test_input_neighbor_zero(X_n120) -> None:
assert len(record) == 1
# check that the message matches
assert record[0].message.args[
0] == "n_neighbors must be greater than 0. Fit with 10 instead."
0] == "n_neighbors must be greater than 0. Fit with 10 instead."


def test_input_distonly(X_n120) -> None:
Expand All @@ -236,8 +270,8 @@ def test_input_distonly(X_n120) -> None:
assert len(record) == 1
# check that the message matches
assert record[0].message.args[
0] == "A neighbor index matrix and distance matrix must both " \
"be provided when not using raw input data."
0] == "A neighbor index matrix and distance matrix must both " \
"be provided when not using raw input data."


def test_input_neighboronly(X_n120) -> None:
Expand All @@ -260,7 +294,7 @@ def test_input_neighboronly(X_n120) -> None:
assert len(record) == 1
# check that the message matches
assert record[0].message.args[
0] == "Data or a distance matrix must be provided."
0] == "Data or a distance matrix must be provided."


def test_input_too_many(X_n120) -> None:
Expand All @@ -284,8 +318,8 @@ def test_input_too_many(X_n120) -> None:
assert len(record) == 1
# check that the message matches
assert record[0].message.args[
0] == "Only one of the following may be provided: data or a " \
"distance matrix (not both)."
0] == "Only one of the following may be provided: data or a " \
"distance matrix (not both)."


def test_distance_neighbor_shape_mismatch(X_n120) -> None:
Expand Down Expand Up @@ -318,8 +352,8 @@ def test_distance_neighbor_shape_mismatch(X_n120) -> None:
assert len(record) == 1
# check that the message matches
assert record[0].message.args[
0] == "The shape of the distance and neighbor " \
"index matrices must match."
0] == "The shape of the distance and neighbor " \
"index matrices must match."


def test_input_neighbor_mismatch(X_n120) -> None:
Expand All @@ -345,10 +379,10 @@ def test_input_neighbor_mismatch(X_n120) -> None:
assert len(record) == 1
# check that the message matches
assert record[0].message.args[
0] == "The shape of the distance or " \
"neighbor index matrix does not " \
"match the number of neighbors " \
"specified."
0] == "The shape of the distance or " \
"neighbor index matrix does not " \
"match the number of neighbors " \
"specified."


def test_loop_dist_matrix(X_n120) -> None:
Expand Down Expand Up @@ -509,13 +543,13 @@ def test_missing_values() -> None:
assert len(record_b) == 1
# check that the message matches
assert record_b[0].message.args[
0] == "Method does not support missing values in input data."
0] == "Method does not support missing values in input data."


def test_small_cluster_size(X_n140_outliers) -> None:
"""
Test to ensure that the program exits when the specified number of neighbors
is larger than the smallest cluster size in the input data.
Test to ensure that the program exits when the specified number of
neighbors is larger than the smallest cluster size in the input data.
:param X_n140_outliers: A pytest Fixture that generates 140 observations.
:return: None
"""
Expand All @@ -541,10 +575,10 @@ def test_small_cluster_size(X_n140_outliers) -> None:
assert len(record_b) == 1
# check that the message matches
assert record_b[0].message.args[
0] == "Number of neighbors specified larger than smallest " \
"cluster. Specify a number of neighbors smaller than " \
"the smallest cluster size (observations in smallest " \
"cluster minus one)."
0] == "Number of neighbors specified larger than smallest " \
"cluster. Specify a number of neighbors smaller than " \
"the smallest cluster size (observations in smallest " \
"cluster minus one)."


def test_stream_fit(X_n140_outliers) -> None:
Expand Down Expand Up @@ -634,8 +668,8 @@ def test_stream_cluster(X_n140_outliers) -> None:
assert len(record) == 1
# check that the message matches
assert record[0].message.args[
0] == "Stream approach does not support clustered data. " \
"Automatically refit using single cluster of points."
0] == "Stream approach does not support clustered data. " \
"Automatically refit using single cluster of points."


def test_stream_performance(X_n140_outliers) -> None:
Expand Down

0 comments on commit e3af240

Please sign in to comment.