模型评估与超参数调优

发布时间:2026/5/24 1:01:27

模型评估与超参数调优 模型评估与超参数调优1. 技术分析1.1 模型评估概述模型评估是机器学习的关键步骤评估指标 分类指标: 准确率、精确率、召回率、F1、AUC 回归指标: MAE、MSE、RMSE、R² 排序指标: MAP、NDCG 评估方法: 交叉验证 时间序列分割 分层抽样1.2 超参数调优调优方法 网格搜索: 穷举搜索 随机搜索: 随机采样 贝叶斯优化: 概率模型 遗传算法: 进化优化 调优策略: 粗调: 大范围搜索 微调: 精细搜索1.3 评估指标对比指标适用任务特点准确率分类不平衡数据有偏差F1分数分类平衡精确率和召回率AUC-ROC分类评估排序能力RMSE回归对异常值敏感R²回归解释方差比例2. 核心功能实现2.1 分类评估指标import numpy as np from sklearn.metrics import confusion_matrix class ClassificationEvaluator: def __init__(self, y_true, y_pred, y_probaNone): self.y_true y_true self.y_pred y_pred self.y_proba y_proba self.confusion confusion_matrix(y_true, y_pred) def accuracy(self): return np.mean(self.y_true self.y_pred) def precision(self): tp self.confusion[1, 1] fp self.confusion[0, 1] return tp / (tp fp) if (tp fp) 0 else 0 def recall(self): tp self.confusion[1, 1] fn self.confusion[1, 0] return tp / (tp fn) if (tp fn) 0 else 0 def f1_score(self): p self.precision() r self.recall() return 2 * p * r / (p r) if (p r) 0 else 0 def specificity(self): tn self.confusion[0, 0] fp self.confusion[0, 1] return tn / (tn fp) if (tn fp) 0 else 0 def auc_roc(self): if self.y_proba is None: raise ValueError(需要提供预测概率) thresholds np.sort(np.unique(self.y_proba))[::-1] tpr_list [] fpr_list [] for threshold in thresholds: pred (self.y_proba threshold).astype(int) cm confusion_matrix(self.y_true, pred) tp cm[1, 1] if cm.shape[0] 1 and cm.shape[1] 1 else 0 fn cm[1, 0] if cm.shape[0] 1 and cm.shape[1] 0 else 0 tn cm[0, 0] if cm.shape[0] 0 and cm.shape[1] 0 else 0 fp cm[0, 1] if cm.shape[0] 0 and cm.shape[1] 1 else 0 tpr tp / (tp fn) if (tp fn) 0 else 0 fpr fp / (tn fp) if (tn fp) 0 else 0 tpr_list.append(tpr) fpr_list.append(fpr) return np.trapz(tpr_list, fpr_list) def report(self): return { accuracy: self.accuracy(), precision: self.precision(), recall: self.recall(), f1_score: self.f1_score(), specificity: self.specificity(), auc_roc: self.auc_roc() if self.y_proba is not None else None, confusion_matrix: self.confusion.tolist() }2.2 回归评估指标class RegressionEvaluator: def __init__(self, y_true, y_pred): self.y_true y_true self.y_pred y_pred def mae(self): return np.mean(np.abs(self.y_true - self.y_pred)) def mse(self): return np.mean((self.y_true - self.y_pred) ** 2) def rmse(self): return np.sqrt(self.mse()) def mape(self): return np.mean(np.abs((self.y_true - self.y_pred) / self.y_true)) * 100 def r2_score(self): ss_res np.sum((self.y_true - self.y_pred) ** 2) ss_tot np.sum((self.y_true - np.mean(self.y_true)) ** 2) return 1 - (ss_res / ss_tot) if ss_tot 0 else 0 def report(self): return { mae: self.mae(), mse: self.mse(), rmse: self.rmse(), mape: self.mape(), r2_score: self.r2_score() }2.3 交叉验证class CrossValidation: def __init__(self, n_folds5, shuffleTrue): self.n_folds n_folds self.shuffle shuffle def split(self, X, y): n_samples len(y) indices np.arange(n_samples) if self.shuffle: np.random.shuffle(indices) fold_size n_samples // self.n_folds folds [] for i in range(self.n_folds): start i * fold_size end (i 1) * fold_size if i self.n_folds - 1 else n_samples val_indices indices[start:end] train_indices np.concatenate([indices[:start], indices[end:]]) folds.append((train_indices, val_indices)) return folds def evaluate(self, model, X, y, evaluator_func): scores [] for train_idx, val_idx in self.split(X, y): X_train, X_val X[train_idx], X[val_idx] y_train, y_val y[train_idx], y[val_idx] model.fit(X_train, y_train) y_pred model.predict(X_val) score evaluator_func(y_val, y_pred) scores.append(score) return { mean: np.mean(scores), std: np.std(scores), scores: scores }2.4 超参数调优from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from scipy.stats import randint, uniform class HyperparameterTuner: def __init__(self, model, param_grid, methodgrid): self.model model self.param_grid param_grid self.method method self.best_model None self.best_params None def grid_search(self, X, y, cv5): grid_search GridSearchCV( self.model, self.param_grid, cvcv, scoringaccuracy, n_jobs-1 ) grid_search.fit(X, y) self.best_model grid_search.best_estimator_ self.best_params grid_search.best_params_ return { best_score: grid_search.best_score_, best_params: grid_search.best_params_, cv_results: grid_search.cv_results_ } def random_search(self, X, y, n_iter100, cv5): random_search RandomizedSearchCV( self.model, self.param_grid, n_itern_iter, cvcv, scoringaccuracy, n_jobs-1, random_state42 ) random_search.fit(X, y) self.best_model random_search.best_estimator_ self.best_params random_search.best_params_ return { best_score: random_search.best_score_, best_params: random_search.best_params_ } def bayesian_optimization(self, X, y, n_iter50): from bayes_opt import BayesianOptimization def objective(**params): model self.model.__class__(**params) cv CrossValidation(n_folds5) result cv.evaluate(model, X, y, lambda y_true, y_pred: np.mean(y_true y_pred)) return result[mean] bounds {} for param, values in self.param_grid.items(): if isinstance(values, list): bounds[param] (min(values), max(values)) optimizer BayesianOptimization( fobjective, pboundsbounds, random_state42 ) optimizer.maximize(n_itern_iter) self.best_params optimizer.max[params] self.best_model self.model.__class__(**self.best_params) self.best_model.fit(X, y) return { best_score: optimizer.max[target], best_params: optimizer.max[params] } def tune(self, X, y): if self.method grid: return self.grid_search(X, y) elif self.method random: return self.random_search(X, y) elif self.method bayesian: return self.bayesian_optimization(X, y)3. 性能对比3.1 调优方法对比方法效率效果复杂度网格搜索低中低随机搜索中中低贝叶斯优化高高高遗传算法中高很高3.2 评估指标对比指标用途优点缺点准确率整体评估简单不平衡数据有偏差F1分数不平衡数据平衡只关注正类AUC-ROC排序能力全面需要概率输出3.3 交叉验证策略对比方法适用场景稳定性K-fold通用高Stratified K-fold不平衡数据高TimeSeriesSplit时间序列中4. 最佳实践4.1 模型评估流程def evaluate_model(model, X_train, y_train, X_test, y_test, task_typeclassification): model.fit(X_train, y_train) y_pred_train model.predict(X_train) y_pred_test model.predict(X_test) if task_type classification: if hasattr(model, predict_proba): y_proba_train model.predict_proba(X_train)[:, 1] y_proba_test model.predict_proba(X_test)[:, 1] else: y_proba_train None y_proba_test None train_eval ClassificationEvaluator(y_train, y_pred_train, y_proba_train) test_eval ClassificationEvaluator(y_test, y_pred_test, y_proba_test) print(训练集评估:) print(train_eval.report()) print(\n测试集评估:) print(test_eval.report()) else: train_eval RegressionEvaluator(y_train, y_pred_train) test_eval RegressionEvaluator(y_test, y_pred_test) print(训练集评估:) print(train_eval.report()) print(\n测试集评估:) print(test_eval.report())4.2 超参数调优流程def tune_hyperparameters(model, X, y, param_grid, methodrandom): tuner HyperparameterTuner(model, param_grid, methodmethod) result tuner.tune(X, y) print(f最佳分数: {result[best_score]:.4f}) print(最佳参数:) for param, value in result[best_params].items(): print(f {param}: {value}) return tuner.best_model5. 总结模型评估和调优是机器学习的关键环节分类评估准确率、F1、AUC-ROC回归评估RMSE、MAE、R²交叉验证K-fold、Stratified K-fold超参数调优网格搜索、随机搜索、贝叶斯优化对比数据如下贝叶斯优化效果最好随机搜索性价比最高Stratified K-fold适合不平衡数据推荐先随机搜索再贝叶斯优化良好的评估和调优可以显著提升模型性能。

相关新闻