[Bug]: When use_spark = True and mlflow_logging = True are set, an error is reported when logging the best model: 'NoneType' object has no attribute 'save' #1431
Labels
bug
Something isn't working
Describe the bug
Here is the error detail:---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[9], line 23
21 # 开始训练,请设置 mlflow 实验名和任务名,训练中将同步记录模型参数到实验管理,同时记录最优模型制品
22 with mlflow.start_run(run_name="test_ts_2"):
---> 23 automl.fit(
24 X_train=X_train[:84], # a single column of timestamp
25 y_train=y_train, # value for each timestamp
26 period=12, # time horizon to forecast, e.g., 12 months
27 task="ts_forecast",
28 log_file_name="ts_forecast.log",
29 eval_method="holdout",
30 use_spark=True,
31 mlflow_logging=True
32 )
File /data/conda/env/dead/lib/python3.9/site-packages/flaml/automl/automl.py:1980, in AutoML.fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, preserve_checkpoint, early_stop, force_cancel, append_log, auto_augment, min_sample_size, use_ray, use_spark, free_mem_ratio, metric_constraints, custom_hp, time_col, cv_score_agg_func, skip_transform, mlflow_logging, fit_kwargs_by_estimator, mlflow_exp_name, **fit_kwargs)
1978 with training_log_writer(log_file_name, append_log) as save_helper:
1979 self._training_log = save_helper
-> 1980 self._search()
1981 else:
1982 self._training_log = None
File /data/conda/env/dead/lib/python3.9/site-packages/flaml/automl/automl.py:2542, in AutoML._search(self)
2540 if self._best_estimator:
2541 if self.mlflow_integration:
-> 2542 self.mlflow_integration.log_automl(self)
2543 if mlflow.active_run() is None:
2544 if self.mlflow_integration.parent_run_id is not None and self.mlflow_integration.autolog:
2545 # ensure result of retrain autolog to parent run
File /data/conda/env/dead/lib/python3.9/site-packages/flaml/fabric/mlflow.py:525, in MLflowIntegration.log_automl(self, automl)
523 self.has_summary = True
524 if automl._trained_estimator is not None and not self.has_model:
--> 525 self.log_model(
526 automl._trained_estimator._model,
527 automl.best_estimator,
528 signature=automl.estimator_signature,
529 )
530 self.pickle_and_log_automl_artifacts(
531 automl, automl.model, automl.best_estimator, signature=automl.pipeline_signature
532 )
533 self.has_model = True
File /data/conda/env/dead/lib/python3.9/site-packages/flaml/fabric/mlflow.py:349, in MLflowIntegration.log_model(self, model, estimator, signature)
347 mlflow.transformers.log_model(model, estimator, signature=signature)
348 elif estimator in ["arima", "sarimax", "holt-winters", "snaive", "naive", "savg", "avg", "ets"]:
--> 349 mlflow.statsmodels.log_model(model, estimator, signature=signature)
350 elif estimator in ["tcn", "tft"]:
351 mlflow.pytorch.log_model(model, estimator, signature=signature)
File /data/conda/env/dead/lib/python3.9/site-packages/mlflow/statsmodels/init.py:260, in log_model(statsmodels_model, artifact_path, conda_env, code_paths, registered_model_name, remove_data, signature, input_example, await_registration_for, pip_requirements, extra_pip_requirements, metadata, **kwargs)
216 @format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME))
217 def log_model(
218 statsmodels_model,
(...)
230 **kwargs,
231 ):
232 """
233 Log a statsmodels model as an MLflow artifact for the current run.
234
(...)
258 of the logged model.
259 """
--> 260 return Model.log(
261 artifact_path=artifact_path,
262 flavor=mlflow.statsmodels,
263 registered_model_name=registered_model_name,
264 statsmodels_model=statsmodels_model,
265 conda_env=conda_env,
266 code_paths=code_paths,
267 signature=signature,
268 input_example=input_example,
269 await_registration_for=await_registration_for,
270 remove_data=remove_data,
271 pip_requirements=pip_requirements,
272 extra_pip_requirements=extra_pip_requirements,
273 metadata=metadata,
274 **kwargs,
275 )
File /data/conda/env/dead/lib/python3.9/site-packages/mlflow/models/model.py:726, in Model.log(cls, artifact_path, flavor, registered_model_name, await_registration_for, metadata, run_id, resources, **kwargs)
722 run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id
723 mlflow_model = cls(
724 artifact_path=artifact_path, run_id=run_id, metadata=metadata, resources=resources
725 )
--> 726 flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)
727 #
save_model
callsload_model
to infer the model requirements, which may result in728 # pycache directories being created in the model directory.
729 for pycache in Path(local_path).rglob("pycache"):
File /data/conda/env/dead/lib/python3.9/site-packages/mlflow/statsmodels/init.py:151, in save_model(statsmodels_model, path, conda_env, code_paths, mlflow_model, remove_data, signature, input_example, pip_requirements, extra_pip_requirements, metadata)
148 mlflow_model.metadata = metadata
150 # Save a statsmodels model
--> 151 statsmodels_model.save(model_data_path, remove_data)
152 if _save_model_called_from_autolog and not remove_data:
153 saved_model_size = os.path.getsize(model_data_path)
AttributeError: 'NoneType' object has no attribute 'save'
Steps to reproduce
Here is my code:
import numpy as np
from wedata.ts_automl.flaml import WeDataTimeSeriesAutoML
import mlflow
import flaml
from flaml import AutoML
mlflow.set_tracking_uri("http://x.x.x.x:5000")
Construct time series data
X_train = np.arange("2014-01", "2022-01", dtype="datetime64[M]")
y_train = np.random.random(size=84)
automl_settings = {
"time_budget": 10,
"metric": 'accuracy',
"n_concurrent_trials": 1,
"use_spark": True,
"force_cancel": False, # Activating the force_cancel option can immediately halt Spark jobs once they exceed the allocated time_budget.
}
automl = AutoML(settings=automl_settings)
mlflow.set_experiment("wedata_demo")
Start training. Please set the mlflow experiment name and task name. During training, model parameters will be logged to the experiment management system, and the best model artifact will also be recorded.
with mlflow.start_run(run_name="test_ts_2"):
automl.fit(
X_train=X_train, # a single column of timestamp
y_train=y_train, # value for each timestamp
period=12, # time horizon to forecast, e.g., 12 months
task="ts_forecast",
log_file_name="ts_forecast.log",
eval_method="holdout",
use_spark=True,
mlflow_logging=True
)
Model Used
AutoML
Expected Behavior
The Best Model can be logged correctly.
Screenshots and logs
No response
Additional Information
FLAML 2.3.4
Python 3.9.21
OS: MacOS and Linux
The text was updated successfully, but these errors were encountered: