From f0ff1ddab83940ee808f94e911398c65547e2b2e Mon Sep 17 00:00:00 2001 From: IroNEDR Date: Thu, 11 Apr 2024 19:32:33 +0200 Subject: [PATCH 1/2] extended gitignore --- .gitignore | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 162 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index cc65586..b80b260 100644 --- a/.gitignore +++ b/.gitignore @@ -6,12 +6,168 @@ nasaValve rel_research PyNomaly/loop_dev.py /PyNomaly.egg-info/ -.pytest_cache -build -htmlcov/ -*.egg *.pyc -.coverage *.coverage.* .coveragerc -venv/ \ No newline at end of file + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + From e24cc980860360b350b2efc5040dd28ff7d68afe Mon Sep 17 00:00:00 2001 From: IroNEDR Date: Thu, 11 Apr 2024 23:15:59 +0200 Subject: [PATCH 2/2] added regression test and test data via fixture --- tests/test_loop.py | 82 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 24 deletions(-) diff --git a/tests/test_loop.py b/tests/test_loop.py index 9e5850f..b7a880b 100644 --- a/tests/test_loop.py +++ b/tests/test_loop.py @@ -51,6 +51,28 @@ def X_n8() -> np.ndarray: return X +@pytest.fixture() +def X_n20_scores() -> tuple[np.ndarray, np.ndarray]: + """ + Fixture that returns a tuple containing a 20 element numpy array + and the precalculated loOP scores based on that array. + :return: tuple(input_data,exptected_scores) + """ + input_data = np.array([0.02059752, 0.32629926, 0.63036653, 0.94409321, + 0.63251097, 0.47598494, 0.80204026, 0.34845067, + 0.81556468, 0.89183, 0.25210317, 0.11460502, + 0.19953434, 0.36955067, 0.06038041, 0.34527368, + 0.56621582, 0.90533649, 0.33773613, 0.71573306]) + + expected_scores = np.array([0.6356276742921594, 0.0, 0.0, + 0.48490790006974044, 0.0, 0.0, 0.0, 0.0, + 0.021728288376168012, 0.28285086151683225, + 0.0, 0.18881886507113213, 0.0, 0.0, + 0.45350246469681843, 0.0, 0.07886635748113013, + 0.3349068501560546, 0.0, 0.0]) + return (input_data, expected_scores) + + @pytest.fixture() def X_n120() -> np.ndarray: """ @@ -122,6 +144,18 @@ def test_loop(X_n8) -> None: assert np.min(score[-2:]) > np.max(score[:-2]) +def test_regression(X_n20_scores) -> None: + """ + Tests for potential regression errors by comparing current results + to the exptected results. Any changes to the code should still return + the same result given the same dataset + """ + input_data, expected_scores = X_n20_scores + clf = loop.LocalOutlierProbability(input_data).fit() + scores = clf.local_outlier_probabilities + assert np.array_equal(scores, expected_scores) + + def test_loop_performance(X_n120) -> None: """ Using a set of known anomalies (labels), tests the performance (using @@ -170,7 +204,7 @@ def test_input_nodata(X_n140_outliers) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Data or a distance matrix must be provided." + 0] == "Data or a distance matrix must be provided." def test_input_incorrect_type(X_n140_outliers) -> None: @@ -192,8 +226,8 @@ def test_input_incorrect_type(X_n140_outliers) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Argument 'n_neighbors' is not of type (, " \ - ")." + 0] == "Argument 'n_neighbors' is not of type (, " \ + ")." def test_input_neighbor_zero(X_n120) -> None: @@ -213,7 +247,7 @@ def test_input_neighbor_zero(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "n_neighbors must be greater than 0. Fit with 10 instead." + 0] == "n_neighbors must be greater than 0. Fit with 10 instead." def test_input_distonly(X_n120) -> None: @@ -236,8 +270,8 @@ def test_input_distonly(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "A neighbor index matrix and distance matrix must both " \ - "be provided when not using raw input data." + 0] == "A neighbor index matrix and distance matrix must both " \ + "be provided when not using raw input data." def test_input_neighboronly(X_n120) -> None: @@ -260,7 +294,7 @@ def test_input_neighboronly(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Data or a distance matrix must be provided." + 0] == "Data or a distance matrix must be provided." def test_input_too_many(X_n120) -> None: @@ -284,8 +318,8 @@ def test_input_too_many(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Only one of the following may be provided: data or a " \ - "distance matrix (not both)." + 0] == "Only one of the following may be provided: data or a " \ + "distance matrix (not both)." def test_distance_neighbor_shape_mismatch(X_n120) -> None: @@ -318,8 +352,8 @@ def test_distance_neighbor_shape_mismatch(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "The shape of the distance and neighbor " \ - "index matrices must match." + 0] == "The shape of the distance and neighbor " \ + "index matrices must match." def test_input_neighbor_mismatch(X_n120) -> None: @@ -345,10 +379,10 @@ def test_input_neighbor_mismatch(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "The shape of the distance or " \ - "neighbor index matrix does not " \ - "match the number of neighbors " \ - "specified." + 0] == "The shape of the distance or " \ + "neighbor index matrix does not " \ + "match the number of neighbors " \ + "specified." def test_loop_dist_matrix(X_n120) -> None: @@ -509,13 +543,13 @@ def test_missing_values() -> None: assert len(record_b) == 1 # check that the message matches assert record_b[0].message.args[ - 0] == "Method does not support missing values in input data." + 0] == "Method does not support missing values in input data." def test_small_cluster_size(X_n140_outliers) -> None: """ - Test to ensure that the program exits when the specified number of neighbors - is larger than the smallest cluster size in the input data. + Test to ensure that the program exits when the specified number of + neighbors is larger than the smallest cluster size in the input data. :param X_n140_outliers: A pytest Fixture that generates 140 observations. :return: None """ @@ -541,10 +575,10 @@ def test_small_cluster_size(X_n140_outliers) -> None: assert len(record_b) == 1 # check that the message matches assert record_b[0].message.args[ - 0] == "Number of neighbors specified larger than smallest " \ - "cluster. Specify a number of neighbors smaller than " \ - "the smallest cluster size (observations in smallest " \ - "cluster minus one)." + 0] == "Number of neighbors specified larger than smallest " \ + "cluster. Specify a number of neighbors smaller than " \ + "the smallest cluster size (observations in smallest " \ + "cluster minus one)." def test_stream_fit(X_n140_outliers) -> None: @@ -634,8 +668,8 @@ def test_stream_cluster(X_n140_outliers) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Stream approach does not support clustered data. " \ - "Automatically refit using single cluster of points." + 0] == "Stream approach does not support clustered data. " \ + "Automatically refit using single cluster of points." def test_stream_performance(X_n140_outliers) -> None: