autotab

optimize pipeline for any machine learning mdoel using hierarchical optimization method for tabular datasets.

Installation

This package can be installed using pip from pypi using following command

pip install autotab

or using github link for the latest code

python -m pip install git+https://github.com/Sara-Iftikhar/autotab.git

or using setup file, go to folder where this repoitory is downloaded

python setup.py install

Example

Click here to or cick here to

from ai4water.datasets import busan_beach
from skopt.plots import plot_objective
from autotab import OptimizePipeline

data = busan_beach()
input_features = data.columns.tolist()[0:-1]
output_features = data.columns.tolist()[-1:]

transformations = ['minmax', 'zscore', 'log', 'log10', 'sqrt', 'robust', 'quantile', 'none', 'scale']

pl = OptimizePipeline(
    inputs_to_transform=data.columns.tolist()[0:-1],
    parent_iterations=400,
    child_iterations=20,
    parent_algorithm='bayes',
    child_algorithm="random",
    cv_parent_hpo=True,
    eval_metric='mse',
    monitor=['r2', 'nse'],
    input_transformations = transformations,
    output_transformations = transformations,
    models=[ "LinearRegression",
            "LassoLars",
            "Lasso",
            "RandomForestRegressor",
            "HistGradientBoostingRegressor",
             "CatBoostRegressor",
             "XGBRegressor",
             "LGBMRegressor",
             "GradientBoostingRegressor",
             "ExtraTreeRegressor",
             "ExtraTreesRegressor"
             ],

    input_features=data.columns.tolist()[0:-1],
    output_features=data.columns.tolist()[-1:],
    cross_validator={"KFold": {"n_splits": 5}},
    split_random=True,
)

get version information

pl._version_info()

perform optimization

results = pl.fit(data=data, process_results=False)

print optimization report

print(pl.report())

show convergence plot

pl.optimizer_._plot_convergence(save=False)

pl.optimizer_._plot_parallel_coords(figsize=(16, 8), save=False)

_ = pl.optimizer_._plot_distributions(save=False)

pl.optimizer_.plot_importance(save=False)

pl.optimizer_.plot_importance(save=False, plot_type="bar")

_ = plot_objective(results)

pl.optimizer._plot_evaluations(save=False)

pl.optimizer._plot_edf(save=False)

pl.dumbbell_plot(data=data)

pl.dumbbell_plot(data=data, metric_name='r2')

pl.taylor_plot(data=data, save=False, figsize=(6,6))

pl.compare_models()

pl.compare_models(plot_type="bar_chart")

pl.compare_models("r2", plot_type="bar_chart")

get best pipeline with respect to evaluation metric

pl.get_best_pipeline_by_metric('r2')

build fit and evaluate the best pipeline

model = pl.bfe_best_model_from_scratch(data=data)

pl.evaluate_model(model, data=data)

pl.evaluate_model(model, data=data, metric_name='nse')

pl.evaluate_model(model, data=data, metric_name='r2')

get best pipeline with respect to $R^2$

pl.get_best_pipeline_by_metric('r2')

model = pl.bfe_best_model_from_scratch(data=data, metric_name='r2')

pl.evaluate_model(model, data=data, metric_name='r2')

print(f"all results are save in {pl.path} folder")

Name		Name	Last commit message	Last commit date
Latest commit History 84 Commits
.binder		.binder
.github/workflows		.github/workflows
autotab		autotab
docs		docs
examples		examples
tests		tests
.gitattributes		.gitattributes
.gitignore		.gitignore
readme.md		readme.md
requirements.txt		requirements.txt
setup.py		setup.py

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

autotab

Installation

Example

About

Releases 2

Packages

Contributors 2

Languages

Sara-Iftikhar/AutoTab

Folders and files

Latest commit

History

Repository files navigation

autotab

Installation

Example

About

Topics

Resources

Stars

Watchers

Forks

Releases 2

Packages 0

Contributors 2

Languages

Packages