[原创]mlops测试

# -*- coding: utf-8 -*-

import pandas as pd
from flaml import AutoML
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score

# 1. 准备数据（示例）
# df = pd.read_csv("financial_data.csv")
# X = df.drop("default", axis=1)
# y = df["default"]

# 为演示，用 sklearn 的 make_classification 生成模拟金融数据
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=10000, n_features=20, n_informative=10,
n_redundant=5, n_clusters_per_class=1, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. 启动 MLflow 实验
mlflow.set_experiment("FLAML_cc1")

with mlflow.start_run(run_name="FLAML_cc1"):

# 3. 配置 FLAML
automl = AutoML()
settings = {
"time_budget": 60, # 60秒自动搜索
"metric": "roc_auc", # 金融常用 AUC
"task": "classification",
"log_file_name": "flaml.log",
"seed": 42,
"estimator_list": ["lgbm", "xgboost", "rf"], # 限定金融常用模型

# 4. 开始自动训练
automl.fit(X_train, y_train, **settings)

# 5. 获取最佳模型和预测
y_pred_proba = automl.predict_proba(X_test)[:, 1]
test_auc = roc_auc_score(y_test, y_pred_proba)
test_acc = accuracy_score(y_test, automl.predict(X_test))

# 6. 记录到 MLflow
# (a) 记录 FLAML 自动选择的超参数
mlflow.log_params(automl.best_config)

# (b) 记录评估指标
mlflow.log_metric("test_roc_auc", test_auc)
mlflow.log_metric("test_accuracy", test_acc)

# (c) 记录使用的模型类型
mlflow.log_param("best_estimator", automl.best_estimator)

# (d) 保存特征数量（金融建模常关注特征维度）
mlflow.log_param("n_features", X_train.shape[1])

# (e) 保存最终模型（FLAML 的 best_model 是 sklearn 兼容模型）
mlflow.sklearn.log_model(automl.model, "flaml_best_model")

if hasattr(automl.model, "feature_importances_"):
fi = automl.model.feature_importances_
for i, imp in enumerate(fi):

回复或点赞可查看完整内容

---
来源: 看雪论坛
原文链接: https://bbs.kanxue.com/thread-290747.htm