Skip to content

[Bug]: When use_spark = True and mlflow_logging = True are set, an error is reported when logging the best model: 'NoneType' object has no attribute 'save' #1431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
hexiang-x opened this issue May 14, 2025 · 0 comments
Labels
bug Something isn't working

Comments

@hexiang-x
Copy link
Contributor

Describe the bug

Here is the error detail:---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[9], line 23
21 # 开始训练,请设置 mlflow 实验名和任务名,训练中将同步记录模型参数到实验管理,同时记录最优模型制品
22 with mlflow.start_run(run_name="test_ts_2"):
---> 23 automl.fit(
24 X_train=X_train[:84], # a single column of timestamp
25 y_train=y_train, # value for each timestamp
26 period=12, # time horizon to forecast, e.g., 12 months
27 task="ts_forecast",
28 log_file_name="ts_forecast.log",
29 eval_method="holdout",
30 use_spark=True,
31 mlflow_logging=True
32 )

File /data/conda/env/dead/lib/python3.9/site-packages/flaml/automl/automl.py:1980, in AutoML.fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, preserve_checkpoint, early_stop, force_cancel, append_log, auto_augment, min_sample_size, use_ray, use_spark, free_mem_ratio, metric_constraints, custom_hp, time_col, cv_score_agg_func, skip_transform, mlflow_logging, fit_kwargs_by_estimator, mlflow_exp_name, **fit_kwargs)
1978 with training_log_writer(log_file_name, append_log) as save_helper:
1979 self._training_log = save_helper
-> 1980 self._search()
1981 else:
1982 self._training_log = None

File /data/conda/env/dead/lib/python3.9/site-packages/flaml/automl/automl.py:2542, in AutoML._search(self)
2540 if self._best_estimator:
2541 if self.mlflow_integration:
-> 2542 self.mlflow_integration.log_automl(self)
2543 if mlflow.active_run() is None:
2544 if self.mlflow_integration.parent_run_id is not None and self.mlflow_integration.autolog:
2545 # ensure result of retrain autolog to parent run

File /data/conda/env/dead/lib/python3.9/site-packages/flaml/fabric/mlflow.py:525, in MLflowIntegration.log_automl(self, automl)
523 self.has_summary = True
524 if automl._trained_estimator is not None and not self.has_model:
--> 525 self.log_model(
526 automl._trained_estimator._model,
527 automl.best_estimator,
528 signature=automl.estimator_signature,
529 )
530 self.pickle_and_log_automl_artifacts(
531 automl, automl.model, automl.best_estimator, signature=automl.pipeline_signature
532 )
533 self.has_model = True

File /data/conda/env/dead/lib/python3.9/site-packages/flaml/fabric/mlflow.py:349, in MLflowIntegration.log_model(self, model, estimator, signature)
347 mlflow.transformers.log_model(model, estimator, signature=signature)
348 elif estimator in ["arima", "sarimax", "holt-winters", "snaive", "naive", "savg", "avg", "ets"]:
--> 349 mlflow.statsmodels.log_model(model, estimator, signature=signature)
350 elif estimator in ["tcn", "tft"]:
351 mlflow.pytorch.log_model(model, estimator, signature=signature)

File /data/conda/env/dead/lib/python3.9/site-packages/mlflow/statsmodels/init.py:260, in log_model(statsmodels_model, artifact_path, conda_env, code_paths, registered_model_name, remove_data, signature, input_example, await_registration_for, pip_requirements, extra_pip_requirements, metadata, **kwargs)
216 @format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME))
217 def log_model(
218 statsmodels_model,
(...)
230 **kwargs,
231 ):
232 """
233 Log a statsmodels model as an MLflow artifact for the current run.
234
(...)
258 of the logged model.
259 """
--> 260 return Model.log(
261 artifact_path=artifact_path,
262 flavor=mlflow.statsmodels,
263 registered_model_name=registered_model_name,
264 statsmodels_model=statsmodels_model,
265 conda_env=conda_env,
266 code_paths=code_paths,
267 signature=signature,
268 input_example=input_example,
269 await_registration_for=await_registration_for,
270 remove_data=remove_data,
271 pip_requirements=pip_requirements,
272 extra_pip_requirements=extra_pip_requirements,
273 metadata=metadata,
274 **kwargs,
275 )

File /data/conda/env/dead/lib/python3.9/site-packages/mlflow/models/model.py:726, in Model.log(cls, artifact_path, flavor, registered_model_name, await_registration_for, metadata, run_id, resources, **kwargs)
722 run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id
723 mlflow_model = cls(
724 artifact_path=artifact_path, run_id=run_id, metadata=metadata, resources=resources
725 )
--> 726 flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)
727 # save_model calls load_model to infer the model requirements, which may result in
728 # pycache directories being created in the model directory.
729 for pycache in Path(local_path).rglob("pycache"):

File /data/conda/env/dead/lib/python3.9/site-packages/mlflow/statsmodels/init.py:151, in save_model(statsmodels_model, path, conda_env, code_paths, mlflow_model, remove_data, signature, input_example, pip_requirements, extra_pip_requirements, metadata)
148 mlflow_model.metadata = metadata
150 # Save a statsmodels model
--> 151 statsmodels_model.save(model_data_path, remove_data)
152 if _save_model_called_from_autolog and not remove_data:
153 saved_model_size = os.path.getsize(model_data_path)

AttributeError: 'NoneType' object has no attribute 'save'

Steps to reproduce

Here is my code:
import numpy as np
from wedata.ts_automl.flaml import WeDataTimeSeriesAutoML
import mlflow
import flaml
from flaml import AutoML

mlflow.set_tracking_uri("http://x.x.x.x:5000")

Construct time series data

X_train = np.arange("2014-01", "2022-01", dtype="datetime64[M]")
y_train = np.random.random(size=84)

automl_settings = {
"time_budget": 10,
"metric": 'accuracy',
"n_concurrent_trials": 1,
"use_spark": True,
"force_cancel": False, # Activating the force_cancel option can immediately halt Spark jobs once they exceed the allocated time_budget.
}
automl = AutoML(settings=automl_settings)
mlflow.set_experiment("wedata_demo")

Start training. Please set the mlflow experiment name and task name. During training, model parameters will be logged to the experiment management system, and the best model artifact will also be recorded.

with mlflow.start_run(run_name="test_ts_2"):
automl.fit(
X_train=X_train, # a single column of timestamp
y_train=y_train, # value for each timestamp
period=12, # time horizon to forecast, e.g., 12 months
task="ts_forecast",
log_file_name="ts_forecast.log",
eval_method="holdout",
use_spark=True,
mlflow_logging=True
)

Model Used

AutoML

Expected Behavior

The Best Model can be logged correctly.

Screenshots and logs

No response

Additional Information

FLAML 2.3.4
Python 3.9.21
OS: MacOS and Linux

@hexiang-x hexiang-x added the bug Something isn't working label May 14, 2025
@hexiang-x hexiang-x changed the title [Bug]: [Bug]: When use_spark = True and mlflow_logging = True are set, an error is reported when logging the best model: 'NoneType' object has no attribute 'save' [Bug]: When use_spark = True and mlflow_logging = True are set, an error is reported when logging the best model: 'NoneType' object has no attribute 'save' May 14, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

1 participant