You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Problem: On a large training dataset with over 3 billion samples CatBoost throws an exception when incorrectly typecasting a large object count value. (On a side note, any future plans to use 64-bit variables for dataset object counts?)
catboost version: 1.2.3
Operating System: Linux RHEL 8.6
CPU: Intel Xeon Platinum 8480+
GPU: A100-SXM4-80GB * 8
Stack trace:
---------------------------------------------------------------------------
CatBoostError Traceback (most recent call last)
Cell In[10, line 1
----> 1 model.fit(samples_df, targets_df, sample_weight=weights_df, verbose=True)
File /pythondir/lib/python3.10/site-packages/catboost/core.py:5807, in CatBoostRegressor.fit(self, X, y, cat_features, text_features, embedding_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file,, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
5804 if 'loss_function' in params:
5805 CatBoostRegressor._check_is_compatible_loss(params['loss_function'])
-> 5807 return self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline,
5808 use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
5809 verbose_eval, metric_period, silent, early_stopping_rounds,
5810 save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
File /pythondir/lib/python3.10/site-packages/catboost/core.py:2381, in CatBoost._fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
2378 if y is None and not isinstance(X, PATH_TYPES + (Pool,)):
2379 raise CatBoostError("y may be None only when X is an instance of catboost.Pool or string")
-> 2381 train_params = self._prepare_train_params(
2382 X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
2383 pairs=pairs, sample_weight=sample_weight, group_id=group_id, group_weight=group_weight,
2384 subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline, use_best_model=use_best_model,
2385 eval_set=eval_set, verbose=verbose, logging_level=logging_level, plot=plot, plot_file=plot_file,
2386 column_description=column_description, verbose_eval=verbose_eval, metric_period=metric_period,
2387 silent=silent, early_stopping_rounds=early_stopping_rounds, save_snapshot=save_snapshot,
2388 snapshot_file=snapshot_file, snapshot_interval=snapshot_interval, init_model=init_model,
2389 callbacks=callbacks
2390 )
2391 params = train_params["params"]
2392 train_pool = train_params["train_pool"]
File /pythondir/lib/python3.10/site-packages/catboost/core.py:2261, in CatBoost._prepare_train_params(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks)
2258 text_features = _process_feature_indices(text_features, X, params, 'text_features')
2259 embedding_features = _process_feature_indices(embedding_features, X, params, 'embedding_features')
-> 2261 train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs,
2262 sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
2263 baseline, column_description)
2264 if train_pool.is_empty_:
2265 raise CatBoostError("X is empty.")
File /pythondir/lib/python3.10/site-packages/catboost/core.py:1499, in _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, column_description)
1497 if y is None:
1498 raise CatBoostError("y has not initialized in fic(): X is not catboost.Pool object, y must be not None in fit().")
-> 1499 train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, weight=sample_weight, group_id=group_id,
1500 group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
1501 return train_pool
File /pythondir/lib/python3.10/site-packages/catboost/core.py:844, in Pool.__init__(self, data, label, cat_features, text_features, embedding_features, embedding_features_data, column_description, pairs, delimiter, has_header, ignore_csv_quoting, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count, log_cout, log_cerr, data_can_be_none)
838 if isinstance(feature_names, PATH_TYPES):
839 raise CatBoostError(
840 "feature_names must be None of have not-string type when the pool is created from "
841 "python objects."
842 )
--> 844 self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
845 group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
846 elif not data_can_be_none:
847 raise CatBoostError("'data' parameter can't be None")
File /pythondir/lib/python3.10/site-packages/catboost/core.py:1477, in Pool._init(self, data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
1475 if feature_tags is not None:
1476 feature_tags = self._check_transform_tags(feature_tags, feature_names)
-> 1477 self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
1478 group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
File _catboost.pyx:4159, in _catboost._PoolBase._init_pool()
File _catboost.pyx:4209, in _catboost._PoolBase._init_pool()
File _catboost.pyx:4003, in _catboost._PoolBase._init_features_order_layout_pool()
CatBoostError: /src/catboost/util/generic/cast.h:131: Conversion 'unsigned int{3284861059}' to 'int', positive value converted to negative
Thank you in advance
The text was updated successfully, but these errors were encountered:
Problem: On a large training dataset with over 3 billion samples CatBoost throws an exception when incorrectly typecasting a large object count value. (On a side note, any future plans to use 64-bit variables for dataset object counts?)
catboost version: 1.2.3
Operating System: Linux RHEL 8.6
CPU: Intel Xeon Platinum 8480+
GPU: A100-SXM4-80GB * 8
Stack trace:
Thank you in advance
The text was updated successfully, but these errors were encountered: