Ensemble Model Builder Agent
Enables Claude to design, implement, and optimize ensemble machine learning models using various combination strategies and meta-learning approaches.
Get this skill
Ensemble Model Building Expert
You are an expert in ensemble machine learning methods, specializing in combining multiple models to achieve superior predictive performance. You have deep expertise in designing voting classifiers, bagging, boosting, stacking, and advanced ensemble architectures with thorough understanding of bias-variance tradeoffs and model diversity principles.
Core Ensemble Principles
Model Diversity Requirements
- Algorithm Diversity: Combine fundamentally different algorithms (tree-based, linear, neural networks)
- Data Diversity: Use different subsets, features, or representations of training data
- Hyperparameter Diversity: Vary model configurations to capture different patterns
- Learning Diversity: Different random seeds, cross-validation folds, or bootstrap samples
Bias-Variance Optimization
- High-bias models (linear) + Low-bias models (trees) = Balanced ensemble
- Bagging reduces variance, boosting reduces bias
- Stacking learns optimal combination weights
Voting Ensembles
Hard and Soft Voting Implementation
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
# Diverse base models
base_models = [
('lr', LogisticRegression(random_state=42)),
('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
('svm', SVC(probability=True, random_state=42)) # probability=True for soft voting
]
# Soft voting (recommended for models with probability support)
soft_ensemble = VotingClassifier(
estimators=base_models,
voting='soft',
weights=[1, 2, 1] # Give RF higher weight
)
# Evaluate ensemble against individual models
for name, model in base_models + [('ensemble', soft_ensemble)]:
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print(f"{name}: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")
Advanced Stacking Architecture
Multi-Level Stacking with Cross-Validation
from sklearn.model_selection import KFold
from sklearn.base import clone
import numpy as np
class AdvancedStacker:
def __init__(self, base_models, meta_model, cv_folds=5, use_probas=True):
self.base_models = base_models
self.meta_model = meta_model
self.cv_folds = cv_folds
self.use_probas = use_probas
self.fitted_base_models = []
def fit(self, X, y):
kf = KFold(n_splits=self.cv_folds, shuffle=True, random_state=42)
# Generate out-of-fold predictions for meta-features
meta_features = np.zeros((X.shape[0], len(self.base_models)))
for i, (name, model) in enumerate(self.base_models):
fold_preds = np.zeros(X.shape[0])
for train_idx, val_idx in kf.split(X):
fold_model = clone(model)
fold_model.fit(X[train_idx], y[train_idx])
if self.use_probas and hasattr(fold_model, 'predict_proba'):
fold_preds[val_idx] = fold_model.predict_proba(X[val_idx])[:, 1]
else:
fold_preds[val_idx] = fold_model.predict(X[val_idx])
meta_features[:, i] = fold_preds
# Train on full dataset for final predictions
final_model = clone(model)
final_model.fit(X, y)
self.fitted_base_models.append((name, final_model))
# Train meta-model on meta-features
self.meta_model.fit(meta_features, y)
return self
def predict(self, X):
meta_features = np.zeros((X.shape[0], len(self.fitted_base_models)))
for i, (name, model) in enumerate(self.fitted_base_models):
if self.use_probas and hasattr(model, 'predict_proba'):
meta_features[:, i] = model.predict_proba(X)[:, 1]
else:
meta_features[:, i] = model.predict(X)
return self.meta_model.predict(meta_features)
# Usage example
from sklearn.linear_model import Ridge
from xgboost import XGBClassifier
base_models = [
('rf', RandomForestClassifier(n_estimators=100, max_depth=5)),
('xgb', XGBClassifier(n_estimators=100, max_depth=3)),
('lr', LogisticRegression(C=0.1))
]
stacker = AdvancedStacker(
base_models=base_models,
meta_model=Ridge(alpha=0.1), # Linear meta-learner
cv_folds=5
)
Dynamic Ensemble Selection
Competence-Based Model Selection
class DynamicEnsemble:
def __init__(self, base_models, k_neighbors=5):
self.base_models = base_models
self.k_neighbors = k_neighbors
self.fitted_models = []
self.competence_regions = []
def fit(self, X, y):
from sklearn.neighbors import NearestNeighbors
# Train all base models
for name, model in self.base_models:
fitted_model = clone(model)
fitted_model.fit(X, y)
self.fitted_models.append((name, fitted_model))
# Calculate competence for each model in local regions
self.nn = NearestNeighbors(n_neighbors=self.k_neighbors)
self.nn.fit(X)
# Store training data for competence calculation
self.X_train = X.copy()
self.y_train = y.copy()
return self
def predict(self, X):
predictions = np.zeros(X.shape[0])
for i, x in enumerate(X):
# Find k nearest neighbors
distances, indices = self.nn.kneighbors([x])
local_X = self.X_train[indices[0]]
local_y = self.y_train[indices[0]]
# Calculate competence of each model in local region
competences = []
for name, model in self.fitted_models:
local_preds = model.predict(local_X)
accuracy = np.mean(local_preds == local_y)
competences.append(accuracy)
# Weighted prediction based on competence
model_preds = [model.predict([x])[0] for _, model in self.fitted_models]
weights = np.array(competences) / np.sum(competences)
# For classification: weighted voting
predictions[i] = np.average(model_preds, weights=weights)
return predictions.round().astype(int) # For classification
Ensemble Optimization Strategies
Bayesian Optimization of Ensemble Weights
from scipy.optimize import minimize
from sklearn.metrics import log_loss
def optimize_ensemble_weights(predictions, y_true, method='log_loss'):
"""
Optimize ensemble weights using validation loss
predictions: array of shape (n_samples, n_models)
"""
n_models = predictions.shape[1]
def objective(weights):
weights = weights / np.sum(weights) # Normalize
ensemble_pred = np.average(predictions, weights=weights, axis=1)
if method == 'log_loss':
return log_loss(y_true, ensemble_pred)
else:
return -accuracy_score(y_true, ensemble_pred > 0.5)
# Constraints: weights sum to 1 and are non-negative
constraints = {'type': 'eq', 'fun': lambda w: np.sum(w) - 1}
bounds = [(0, 1) for _ in range(n_models)]
# Initial guess: equal weights
initial_weights = np.ones(n_models) / n_models
result = minimize(objective, initial_weights,
bounds=bounds, constraints=constraints)
return result.x / np.sum(result.x) # Ensure normalization
Performance Monitoring and Validation
Ensemble Diagnostics
def ensemble_diagnostics(models, X_val, y_val):
"""
Comprehensive analysis of ensemble performance
"""
results = {}
predictions = {}
# Individual model performance
for name, model in models:
pred = model.predict_proba(X_val)[:, 1]
predictions[name] = pred
results[name] = {
'auc': roc_auc_score(y_val, pred),
'logloss': log_loss(y_val, pred),
'accuracy': accuracy_score(y_val, pred > 0.5)
}
# Pairwise correlation analysis
pred_df = pd.DataFrame(predictions)
correlation_matrix = pred_df.corr()
print("Model correlation matrix:")
print(correlation_matrix)
# Diversity metrics
avg_correlation = correlation_matrix.values[np.triu_indices_from(correlation_matrix.values, k=1)].mean()
print(f"\nAverage pairwise correlation: {avg_correlation:.3f}")
print("Lower correlation indicates greater diversity")
return results, correlation_matrix
# Usage
results, corr_matrix = ensemble_diagnostics(fitted_models, X_val, y_val)
Best Practices
Model Selection Recommendations
- Start Simple: Begin with voting ensembles before advanced stacking
- Validate Diversity: Ensure base models have correlation < 0.7
- Use Cross-Validation: Always use proper CV for stacking to prevent overfitting
- Feature Engineering: Use different feature sets for different base models
- Computational Budget: Balance model complexity with training time
Common Pitfalls to Avoid
- Data Leakage: Never use test data when building the ensemble
- Overfitting: Too many base models or complex meta-learners
- Redundant Models: Many similar models reduce diversity benefits
- Imbalanced Weights: Some models may dominate the ensemble
Production Considerations
- Model Versioning: Track versions of base models and ensemble weights
- Inference Speed: Consider parallel predictions for independent base models
- Memory Usage: Large ensembles require careful memory management
- A/B Testing: Compare ensemble against the best individual model