# === Setup: paths, imports, theme ===

from pathlib import Path
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import joblib

# sklearn imports
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline


# Dev convenience
%load_ext autoreload
%autoreload 2

# Display options
pd.set_option("display.max_columns", 120)
pd.set_option("display.width", 120)

# Resolve repository root (if in notebooks/, step up one level)
REPO_ROOT = Path.cwd().resolve().parent if Path.cwd().name.lower() == "notebooks" else Path.cwd().resolve()

# Canonical paths
DATA_DIR    = REPO_ROOT / "data"
RESULTS_DIR = REPO_ROOT / "results" / "classification"
FIG_DIR     = RESULTS_DIR / "figures"
TAB_DIR     = RESULTS_DIR / "tables"
MODEL_DIR   = RESULTS_DIR / "models"

# Ensure results dirs exist
FIG_DIR.mkdir(parents=True, exist_ok=True)
MODEL_DIR.mkdir(parents=True, exist_ok=True)
TAB_DIR.mkdir(parents=True, exist_ok=True)

# Dataset path
DATA_PATH = DATA_DIR / "processed" / "cs2_playstyles_2024_with_residuals.parquet"
assert DATA_PATH.exists(), f"Dataset not found at {DATA_PATH}"

# Local helpers
SRC_DIR = REPO_ROOT / "src"
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

from style import (
    set_mpl_theme,
    set_seaborn_theme,
    ROLE_COLOURS,
    get_role_colour,
)

# Import classification utilities
from classification_utils import (
    get_feature_sets,
    evaluate_baseline_models,
    summarise_feature_set_results,
    fit_and_visualise_logreg,
    evaluate_per_class_metrics,
    evaluate_model_cv,
    plot_confusion_matrices_comparison,
    plot_model_stability_boxplots,
    prepare_classification_data,
    run_sensitivity_analysis,
    run_model_tuning,
    compile_model_leaderboard,
    save_champion_model,
    select_and_save_champion_models,
    compare_rf_feature_importance,
    plot_prediction_confidence,
    plot_shap_beeswarm_grid,
    prepare_champion_data,
    plot_single_player_waterfall,
    plot_comparison_waterfall,
    get_player_percentiles,
    get_repeated_cv_predictions,
    prepare_interpretation_model,
    plot_igl_feature_distribution,
)

# Themes
set_mpl_theme(mode="dark", preferred_font="Georgia")
set_seaborn_theme(mode="dark", preferred_font="Georgia")

# Echo key paths
REPO_ROOT, DATA_PATH, FIG_DIR, TAB_DIR

(WindowsPath('P:/cs2-playstyle-analysis-2024'),
 WindowsPath('P:/cs2-playstyle-analysis-2024/data/processed/cs2_playstyles_2024_with_residuals.parquet'),
 WindowsPath('P:/cs2-playstyle-analysis-2024/results/classification/figures'),
 WindowsPath('P:/cs2-playstyle-analysis-2024/results/classification/tables'))

# Load processed dataset
df = pd.read_parquet(DATA_PATH)
# After loading the dataset
MIN_MAPS = 40
df = df[df['map_count'] >= MIN_MAPS].copy()
print(f"After filtering (MIN_MAPS={MIN_MAPS}): {len(df)} players")

# --- DELIBERATE ERROR INJECTION FOR DIAGNOSTIC DEMONSTRATION ---
# We deliberately inject known mislabels for HooXi and Spinx to demonstrate the model's
# diagnostic capability (identifying "confident errors" in the analysis later).
# In the EDA notebook, these are correct (Mixed and Rotator respectively), but we revert
# them here to the original "noisy" labels found during the project.
mask_hooxi = df['player_name'] == 'HooXi'
mask_spinx = df['player_name'] == 'Spinx'
df.loc[mask_hooxi, 'role_ct'] = 'Anchor'  # True role: Mixed
df.loc[mask_spinx, 'role_ct'] = 'Anchor'  # True role: Rotator
print("\n> [DIAGNOSTIC SETUP] Injected deliberate CT-role errors for HooXi (Mixed->Anchor) and Spinx (Rotator->Anchor).")
# ---------------------------------------------------------------

# Structural checks
print("Shape:", df.shape)
display(df.head(3))

# Verify expected columns exist (especially residual features)
expected_residuals = ['adat_residual_t', 'adat_residual_ct']
assert all(col in df.columns for col in expected_residuals), "Missing residual features"

# Quick overview of role distribution
print("\nRole distribution (T-side):")
display(df['role_t'].value_counts())
print("\nRole distribution (CT-side):")
display(df['role_ct'].value_counts())

After filtering (MIN_MAPS=40): 84 players

> [DIAGNOSTIC SETUP] Injected deliberate CT-role errors for HooXi (Mixed->Anchor) and Spinx (Rotator->Anchor).
Shape: (84, 27)

Role distribution (T-side):

role_t
Spacetaker     31
Lurker         24
AWPer          17
Half-Lurker    12
Name: count, dtype: int64

Role distribution (CT-side):

role_ct
Rotator    27
Anchor     23
AWPer      17
Mixed      17
Name: count, dtype: int64

# === Cross-Validation Strategy ===

# Parameters
N_SPLITS = 4
N_REPEATS = 20
RANDOM_STATE = 42

# Create CV strategy (will be used for all models)
# Note: Will need to specify which side (T or CT) and target column when fitting
cv_strategy = RepeatedStratifiedKFold(
    n_splits=N_SPLITS,
    n_repeats=N_REPEATS,
    random_state=RANDOM_STATE
)
print(f"Cross-validation strategy: {N_SPLITS} splits × {N_REPEATS} repeats = {N_SPLITS * N_REPEATS} total folds per side")

Cross-validation strategy: 4 splits × 20 repeats = 80 total folds per side

# === Feature Sets for Ablation Study ===

# Generate feature sets for both sides using modelling utility
FEATURE_SETS_T = get_feature_sets('t')
FEATURE_SETS_CT = get_feature_sets('ct')

# Store in a nested dictionary for easy access
FEATURE_SETS = {
    't': FEATURE_SETS_T,
    'ct': FEATURE_SETS_CT
}

# Define ambiguous roles 
EXCLUDED_ROLES = {'t': 'Half-Lurker', 'ct': 'Mixed'}

# Display feature sets for both sides
print("=" * 60)
print("T-SIDE FEATURE SETS")
print("=" * 60)
for name, features in FEATURE_SETS_T.items():
    print(f"\n{name} Features ({len(features)} features):")
    print(features)

print("\n" + "=" * 60)
print("CT-SIDE FEATURE SETS")
print("=" * 60)
for name, features in FEATURE_SETS_CT.items():
    print(f"\n{name} Features ({len(features)} features):")
    print(features)

============================================================
T-SIDE FEATURE SETS
============================================================

Raw Features (6 features):
['tapd_t', 'oap_t', 'podt_t', 'pokt_t', 'adnt_rank_t', 'adat_rank_t']

Orthogonal Features (6 features):
['tapd_t', 'oap_t', 'podt_t', 'pokt_t', 'adnt_rank_t', 'adat_residual_t']

Residuals Features (5 features):
['tapd_t', 'oap_t', 'podt_t', 'pokt_t', 'adat_residual_t']

Full Features (7 features):
['tapd_t', 'oap_t', 'podt_t', 'pokt_t', 'adnt_rank_t', 'adat_rank_t', 'adat_residual_t']

============================================================
CT-SIDE FEATURE SETS
============================================================

Raw Features (6 features):
['tapd_ct', 'oap_ct', 'podt_ct', 'pokt_ct', 'adnt_rank_ct', 'adat_rank_ct']

Orthogonal Features (6 features):
['tapd_ct', 'oap_ct', 'podt_ct', 'pokt_ct', 'adnt_rank_ct', 'adat_residual_ct']

Residuals Features (5 features):
['tapd_ct', 'oap_ct', 'podt_ct', 'pokt_ct', 'adat_residual_ct']

Full Features (7 features):
['tapd_ct', 'oap_ct', 'podt_ct', 'pokt_ct', 'adnt_rank_ct', 'adat_rank_ct', 'adat_residual_ct']

# === Baseline Models: Run & Display Results ===

baseline_results = {}

for side in ['t', 'ct']:
    print(f"Running baselines for {side.upper()}-side...")
    baseline_res = evaluate_baseline_models(
        df=df,
        side=side,
        feature_sets_dict=FEATURE_SETS[side],
        cv_strategy=cv_strategy
    )
    baseline_results[side] = baseline_res
    
    # Save raw results
    baseline_res[['model', 'feature_set', 'mean_f1', 'std_f1']].to_csv(
        TAB_DIR / f"baseline_results_{side}.csv", index=False
    )

# Display full comparison tables for both sides
print("\n" + "=" * 60)
print("FEATURE SET COMPARISON (Logistic Regression F1-Macro)")
print("=" * 60)

for side in ['t', 'ct']:
    comparison = summarise_feature_set_results(baseline_results[side], side=side)
    print(f"\n{side.upper()}-Side:")
    display(comparison)

# Combined comparison for saving
comparison_t = summarise_feature_set_results(baseline_results['t'], side='t')
comparison_ct = summarise_feature_set_results(baseline_results['ct'], side='ct')
comparison_df = pd.concat([comparison_t, comparison_ct], ignore_index=True)
comparison_df.to_csv(TAB_DIR / "feature_set_comparison.csv", index=False)
print(f"\nComparison table saved to: {TAB_DIR / 'feature_set_comparison.csv'}")

Running baselines for T-side...
Running baselines for CT-side...

============================================================
FEATURE SET COMPARISON (Logistic Regression F1-Macro)
============================================================

T-Side:

CT-Side:

Comparison table saved to: P:\cs2-playstyle-analysis-2024\results\classification\tables\feature_set_comparison.csv

# === Feature Set Selection ===
feature_set_selections = {'t': 'Orthogonal', 'ct': 'Orthogonal'}
BEST_FEATURE_SET_T = feature_set_selections['t']
BEST_FEATURE_SET_CT = feature_set_selections['ct']
print(f"Selected: {feature_set_selections}")

Selected: {'t': 'Orthogonal', 'ct': 'Orthogonal'}

# === Per-Class Metrics: Full Dataset (T & CT) ===

per_class_dfs = {}
full_f1_scores = {}

for side in ['t', 'ct']:
    target_col = f'role_{side}'
    feats = FEATURE_SETS[side][feature_set_selections[side]]
    
    # Prepare Data
    df_clean = df.dropna(subset=[target_col]).copy()
    X = df_clean[feats].values
    y = df_clean[target_col].values
    
    # Evaluate
    clf = LogisticRegression(max_iter=1000, random_state=42)
    metrics = evaluate_per_class_metrics(clf, X, y, cv_strategy)
    
    # Store F1-Macro for later comparison
    cv_res = evaluate_model_cv(clf, X, y, cv_strategy)
    full_f1_scores[side] = cv_res['mean_score']
    
    # Store & Save
    per_class_dfs[side] = metrics
    metrics.to_csv(TAB_DIR / f"per_class_metrics_{side}.csv", index=False)
    
    print(f"\n{side.upper()}-Side Per-Class Metrics (Full Dataset):")
    # Show worst performing roles first to highlight ambiguous roles
    display(metrics.sort_values('f1_mean', ascending=True))
    print(f"Full table saved to: {TAB_DIR / f'per_class_metrics_{side}.csv'}")

T-Side Per-Class Metrics (Full Dataset):

Full table saved to: P:\cs2-playstyle-analysis-2024\results\classification\tables\per_class_metrics_t.csv

CT-Side Per-Class Metrics (Full Dataset):

Full table saved to: P:\cs2-playstyle-analysis-2024\results\classification\tables\per_class_metrics_ct.csv

# === Sensitivity Analysis: Run & Display F1 Comparison ===

# Run sensitivity analysis using the helper function
sensitivity_results = {}
for side in ['t', 'ct']:
    sensitivity_results[side] = run_sensitivity_analysis(
        df=df,
        side=side,
        excluded_role=EXCLUDED_ROLES[side],
        feature_names=FEATURE_SETS[side][feature_set_selections[side]],
        cv_strategy=cv_strategy,
        tab_dir=TAB_DIR
    )

# Build and display F1 comparison table
comparison_rows = []
for side in ['t', 'ct']:
    res = sensitivity_results[side]
    comparison_rows.append({
        'Side': f"{side.upper()}-Side",
        'Excluded_Role': EXCLUDED_ROLES[side],
        'N_Full': res['n_full'],
        'N_Filtered': res['n_filtered'],
        'F1_Full': res['full_f1'],
        'F1_Filtered': res['filtered_f1'],
        'Delta': res['delta']
    })

print("=== Sensitivity Analysis: Impact of Excluding Ambiguous Roles ===\n")
impact_df = pd.DataFrame(comparison_rows)
display(impact_df)

# === Per-Class Metrics: Filtered Dataset ===

print("=== Per-Class Metrics (Core Roles Only) ===")
for side in ['t', 'ct']:
    print(f"\n{side.upper()}-Side (excluding {EXCLUDED_ROLES[side]}):")
    display(sensitivity_results[side]['per_class_filtered'].sort_values('f1_mean', ascending=False))

=== Sensitivity Analysis: Impact of Excluding Ambiguous Roles ===

=== Per-Class Metrics (Core Roles Only) ===

T-Side (excluding Half-Lurker):

CT-Side (excluding Mixed):

# === T-Side Confusion Matrices: Full vs Filtered ===

side = 't'
feats = FEATURE_SETS[side][feature_set_selections[side]]
clf = LogisticRegression(max_iter=1000, random_state=42)

plot_confusion_matrices_comparison(
    df=df,
    side=side,
    feature_names=feats,
    excluded_role=EXCLUDED_ROLES[side],
    model=clf,
    cv_strategy=cv_strategy,
    fig_dir=FIG_DIR
)
plt.show()

# === CT-Side Confusion Matrices: Full vs Filtered ===

side = 'ct'
feats = FEATURE_SETS[side][feature_set_selections[side]]
clf = LogisticRegression(max_iter=1000, random_state=42)

plot_confusion_matrices_comparison(
    df=df,
    side=side,
    feature_names=feats,
    excluded_role=EXCLUDED_ROLES[side],
    model=clf,
    cv_strategy=cv_strategy,
    fig_dir=FIG_DIR
)
plt.show()

# === Coefficient Visualisation: Core Roles (Filtered Dataset) ===

# Generate coefficient plots for filtered dataset (core roles only)
for side in ['t', 'ct']:
    excluded_role = EXCLUDED_ROLES[side]
    df_filtered = df[df[f'role_{side}'] != excluded_role].copy()
    
    print(f"\nGenerating {side.upper()}-side coefficients (excluding {excluded_role})...")
    
    _, _, fig = fit_and_visualise_logreg(
        df=df_filtered,
        side=side,
        feature_set_name=feature_set_selections[side],
        feature_names=FEATURE_SETS[side][feature_set_selections[side]],
        fig_dir=FIG_DIR,
        tab_dir=TAB_DIR,
        suffix='_filtered'
    )
    plt.show()
    
    print(f"  Coefficients saved to: {TAB_DIR / f'logreg_coefficients_{side}_filtered.csv'}")

Generating T-side coefficients (excluding Half-Lurker)...

  Coefficients saved to: P:\cs2-playstyle-analysis-2024\results\classification\tables\logreg_coefficients_t_filtered.csv

Generating CT-side coefficients (excluding Mixed)...

  Coefficients saved to: P:\cs2-playstyle-analysis-2024\results\classification\tables\logreg_coefficients_ct_filtered.csv

# === Section 3 Setup ===

# Feature sets to evaluate (excluding 'Residuals' as per plan)
FEATURE_SETS_TO_TEST = ['Raw', 'Orthogonal', 'Full']

print("Section 3 setup complete. Ready for model evaluation.")

Section 3 setup complete. Ready for model evaluation.

# === SVM: Define Grid & Run ===

svm_param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto'],
    'class_weight': ['balanced'],
    'probability': [True]
}

print("Running SVM with Nested CV...")
print(f"Grid size: {np.prod([len(v) for v in svm_param_grid.values()])} combinations per inner CV")

svm_results = run_model_tuning(
    model_class=SVC,
    param_grid=svm_param_grid,
    model_name='SVM',
    df=df,
    feature_sets=FEATURE_SETS,
    feature_set_names=FEATURE_SETS_TO_TEST,
    cv_strategy=cv_strategy,
    excluded_roles=EXCLUDED_ROLES
)

print("\nSVM Results:")
display(svm_results.round(3))

# Save results
svm_results.to_csv(TAB_DIR / "model_results_svm.csv", index=False)

Running SVM with Nested CV...
Grid size: 16 combinations per inner CV

SVM Results:

# === Random Forest: Define Grid & Run (Regularised) ===

rf_param_grid = {
    'n_estimators': [100],
    'max_depth': [2, 3, 5],              # Shallower to prevent overfitting
    'min_samples_leaf': [5, 8, 10],   # Higher values force generalisation
    'max_features': ['sqrt'],
    'class_weight': ['balanced']
}

print("Running Random Forest with Nested CV (Regularised Grid)...")
print(f"Grid size: {np.prod([len(v) for v in rf_param_grid.values()])} combinations per inner CV")

rf_results = run_model_tuning(
    model_class=RandomForestClassifier,
    param_grid=rf_param_grid,
    model_name='RandomForest',
    df=df,
    feature_sets=FEATURE_SETS,
    feature_set_names=FEATURE_SETS_TO_TEST,
    cv_strategy=cv_strategy,
    excluded_roles=EXCLUDED_ROLES
)

print("\nRandom Forest Results:")
display(rf_results.round(3))

# Save results
rf_results.to_csv(TAB_DIR / "model_results_rf.csv", index=False)

Running Random Forest with Nested CV (Regularised Grid)...
Grid size: 9 combinations per inner CV

Random Forest Results:

# === XGBoost: Define Grid & Run ===

xgb_param_grid = {
    'n_estimators': [100],
    'learning_rate': [0.01, 0.1],
    'max_depth': [2, 3, 5],              # Shallower to prevent overfitting
    'min_child_weight': [5, 10],      # Key regulariser for small data
    'subsample': [0.8],
    'eval_metric': ['mlogloss']
}

print("Running XGBoost with Nested CV...")
print(f"Grid size: {np.prod([len(v) for v in xgb_param_grid.values()])} combinations per inner CV")

xgb_results = run_model_tuning(
    model_class=XGBClassifier,
    param_grid=xgb_param_grid,
    model_name='XGBoost',
    df=df,
    feature_sets=FEATURE_SETS,
    feature_set_names=FEATURE_SETS_TO_TEST,
    cv_strategy=cv_strategy,
    excluded_roles=EXCLUDED_ROLES
)

print("\nXGBoost Results:")
display(xgb_results.round(3))

# Save results
xgb_results.to_csv(TAB_DIR / "model_results_xgb.csv", index=False)

Running XGBoost with Nested CV...
Grid size: 12 combinations per inner CV

XGBoost Results:

# === Aggregate Results: Build Leaderboard ===

# Compile leaderboard (includes LogReg baseline on core roles for fair comparison)
all_results_sorted = compile_model_leaderboard(
    df=df,
    feature_sets=FEATURE_SETS,
    excluded_roles=EXCLUDED_ROLES,
    cv_strategy=cv_strategy,
    model_results={
        'SVM': svm_results,
        'RandomForest': rf_results,
        'XGBoost': xgb_results
    },
    feature_sets_to_test=FEATURE_SETS_TO_TEST,
    tab_dir=TAB_DIR
)

print("=" * 70)
print("MODEL LEADERBOARD (Core Roles Only)")
print("=" * 70)
print("\nRanked by F1-Macro.")

# Display columns (excluding All_Scores for readability)
display_cols = ['Side', 'Model', 'Feature_Set', 'Mean_F1', 'Std_F1', 'Mean_Accuracy', 'Mean_Train_F1', 'Overfitting_Gap']
display(all_results_sorted[display_cols].round(3))

======================================================================
MODEL LEADERBOARD (Core Roles Only)
======================================================================

Ranked by F1-Macro.

# === Model Stability Visualisation ===
# Boxplots showing CV score distribution for top configurations per side
# Fixed 0.4-1 axis for honest comparison across sides

# T-Side stability
fig = plot_model_stability_boxplots(
    all_results=all_results_sorted,
    side='t',
    top_n=6,
    fig_dir=FIG_DIR,
    xlim=(0.4, 1.0)
)
plt.show()

# CT-Side stability
fig = plot_model_stability_boxplots(
    all_results=all_results_sorted,
    side='ct',
    top_n=6,
    fig_dir=FIG_DIR,
    xlim=(0.4, 1.0)
)
plt.show()

# === Champion Selection: Best Model per Side ===

# Explicitly select champions (not just highest F1) for interpretability & pipeline consistency
CHAMPION_SELECTION = {
    'T-Side': {'Model': 'LogisticRegression', 'Feature_Set': 'Orthogonal'},
    'CT-Side': {'Model': 'RandomForest', 'Feature_Set': 'Orthogonal'}
}

champions = select_and_save_champion_models(
    leaderboard_df=all_results_sorted,
    df=df,
    feature_sets=FEATURE_SETS,
    excluded_roles=EXCLUDED_ROLES,
    model_dir=MODEL_DIR,
    champion_criteria=CHAMPION_SELECTION
)

======================================================================
CHAMPION MODELS (Best F1-Macro per Side)
======================================================================

T-Side:
  Model: LogisticRegression
  Feature Set: Orthogonal
  F1-Macro: 0.921 ± 0.057
  Accuracy: 0.922 ± 0.054
  Hyperparameters: Default (baseline, no tuning)
  Saved to: P:\cs2-playstyle-analysis-2024\results\classification\models\champion_t_LogisticRegression.joblib

CT-Side:
  Model: RandomForest
  Feature Set: Orthogonal
  F1-Macro: 0.751 ± 0.090
  Accuracy: 0.755 ± 0.087
  Hyperparameters: {'class_weight': 'balanced', 'max_depth': 2, 'max_features': 'sqrt', 'min_samples_leaf': 8, 'n_estimators': 100}
  Saved to: P:\cs2-playstyle-analysis-2024\results\classification\models\champion_ct_RandomForest.joblib

======================================================================

# === Section 4 Setup: Prepare Models ===

# Load/retrain models for interpretation
t_model = prepare_interpretation_model(
    side='t',
    model_name='RandomForest',
    feature_set_name='Orthogonal',
    df=df,
    feature_sets=FEATURE_SETS,
    excluded_roles=EXCLUDED_ROLES,
    model_dir=MODEL_DIR,
    tab_dir=TAB_DIR,
    load_if_saved=False  # Retrain T-side for interpretation
)

ct_model = prepare_interpretation_model(
    side='ct',
    model_name='RandomForest',
    feature_set_name='Orthogonal',
    df=df,
    feature_sets=FEATURE_SETS,
    excluded_roles=EXCLUDED_ROLES,
    model_dir=MODEL_DIR,
    tab_dir=TAB_DIR,
    load_if_saved=True  # Load saved CT champion
)

# Unpack for downstream compatibility
champion_t_rf = t_model['pipeline']
champion_ct = ct_model['pipeline']
X_t, y_t = t_model['X'], t_model['y']
X_ct, y_ct = ct_model['X'], ct_model['y']
feature_names_t = t_model['feature_names']
feature_names_ct = ct_model['feature_names']
champion_data = {'t': t_model, 'ct': ct_model}

print("\nModels ready for interpretation:")
print(f"  T-Side:  RandomForest (F1 ~{t_model['f1_score']:.3f})")
print(f"  CT-Side: {type(ct_model['pipeline'].named_steps['clf']).__name__} (F1 ~{ct_model['f1_score']:.3f})")

Retraining T-Side RandomForest with params: {'class_weight': 'balanced', 'max_depth': 2, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'n_estimators': 100}
Loading saved model from P:\cs2-playstyle-analysis-2024\results\classification\models\champion_ct_RandomForest.joblib

Models ready for interpretation:
  T-Side:  RandomForest (F1 ~0.813)
  CT-Side: RandomForestClassifier (F1 ~0.751)

# === Global Feature Importance (Gini) ===

compare_rf_feature_importance(
    pipeline_t=champion_t_rf,
    feature_names_t=feature_names_t,
    pipeline_ct=champion_ct,
    feature_names_ct=feature_names_ct,
    fig_dir=FIG_DIR
)
plt.show()

# === Separate SHAP Analysis (T & CT) ===

# T-Side SHAP
print("Generating T-Side SHAP Plot...")
fig_t = plot_shap_beeswarm_grid(
    pipeline=champion_t_rf,
    X=X_t,
    feature_names=feature_names_t,
    side='t',
    fig_dir=FIG_DIR
)
plt.show()

# CT-Side SHAP
print("Generating CT-Side SHAP Plot...")
fig_ct = plot_shap_beeswarm_grid(
    pipeline=champion_ct,
    X=X_ct,
    feature_names=feature_names_ct,
    side='ct',
    fig_dir=FIG_DIR
)
plt.show()

Generating T-Side SHAP Plot...

Generating CT-Side SHAP Plot...

# === T-Side Probability Analysis ===


# 1. Generate Repeated CV Predictions
print("Generating T-Side Predictions...")
mean_probas_t, std_probas_t = get_repeated_cv_predictions(
    model=champion_t_rf,
    X=X_t,
    y=y_t
)

# Get player names
player_names_t = champion_data['t']['df']['player_name'].tolist()

# 2. Plot with Stability
fig = plot_prediction_confidence(
    y_true=y_t,
    mean_probas=mean_probas_t,
    std_probas=std_probas_t,
    class_names=champion_t_rf.classes_,
    side='t',
    player_names=player_names_t,
    fig_dir=FIG_DIR
)
plt.show()

# === CT-Side Probability Analysis ===
print("Generating CT-Side Predictions...")
mean_probas_ct, std_probas_ct = get_repeated_cv_predictions(
    model=champion_ct,
    X=X_ct,
    y=y_ct
)

# Get player names
player_names_ct = champion_data['ct']['df']['player_name'].tolist()

fig = plot_prediction_confidence(
    y_true=y_ct,
    mean_probas=mean_probas_ct,
    std_probas=std_probas_ct,
    class_names=champion_ct.classes_,
    side='ct',
    player_names=player_names_ct,
    fig_dir=FIG_DIR
)
plt.show()

Generating T-Side Predictions...

Generating CT-Side Predictions...

# Side-by-side comparison of predicted vs. true role for Brollan
plot_comparison_waterfall(
    pipeline_template=champion_t_rf,
    df=df,
    player_name="Brollan",
    feature_names=feature_names_t,
    side='t',
    excluded_role=EXCLUDED_ROLES['t'],
    fig_dir=FIG_DIR,
)
plt.show()
plot_comparison_waterfall(
    pipeline_template=champion_t_rf,
    df=df,
    player_name="jabbi",
    feature_names=feature_names_t,
    side='t',
    excluded_role=EXCLUDED_ROLES['t'],
    fig_dir=FIG_DIR,
)
plt.show()

plot_comparison_waterfall(
    pipeline_template=champion_t_rf,
    df=df,
    player_name="ultimate",
    feature_names=feature_names_t,
    side='t',
    excluded_role=EXCLUDED_ROLES['t'],
    fig_dir=FIG_DIR,
)
plt.show()

ultimate_t_percentiles = get_player_percentiles(
    df=df,
    player_name="ultimate",
    side='t',
    features=feature_names_t,
    excluded_role=EXCLUDED_ROLES['t'],
)
print("Ultimate vs. labelled role (AWPer):")
display(ultimate_t_percentiles)

Ultimate vs. labelled role (AWPer):

plot_comparison_waterfall(
    pipeline_template=champion_ct,
    df=df,
    player_name="Spinx",
    feature_names=feature_names_ct,
    side='ct',
    excluded_role=EXCLUDED_ROLES['ct'],
    fig_dir=FIG_DIR,
)
plt.show()

plot_comparison_waterfall(
    pipeline_template=champion_ct,
    df=df,
    player_name="HooXi",
    feature_names=feature_names_ct,
    side='ct',
    excluded_role=EXCLUDED_ROLES['ct'],
    fig_dir=FIG_DIR,
)
plt.show()

plot_comparison_waterfall(
    pipeline_template=champion_ct,
    df=df,
    player_name="ultimate",
    feature_names=feature_names_ct,
    side='ct',
    excluded_role=EXCLUDED_ROLES['ct'],
    fig_dir=FIG_DIR,
)
plt.show()

plot_comparison_waterfall(
    pipeline_template=champion_ct,
    df=df,
    player_name="malbsMd",
    feature_names=feature_names_ct,
    side='ct',
    excluded_role=EXCLUDED_ROLES['ct'],
    fig_dir=FIG_DIR,
)
plt.show()

plot_comparison_waterfall(
    pipeline_template=champion_ct,
    df=df,
    player_name="biguzera",
    feature_names=feature_names_ct,
    side='ct',
    excluded_role=EXCLUDED_ROLES['ct'],
    fig_dir=FIG_DIR,
)
plt.show() 
plot_comparison_waterfall(
    pipeline_template=champion_ct,
    df=df,
    player_name="chopper",
    feature_names=feature_names_ct,
    side='ct',
    excluded_role=EXCLUDED_ROLES['ct'],
    fig_dir=FIG_DIR,
)
plt.show() 
plot_comparison_waterfall(
    pipeline_template=champion_ct,
    df=df,
    player_name="bLitz",
    feature_names=feature_names_ct,
    side='ct',
    excluded_role=EXCLUDED_ROLES['ct'],
    fig_dir=FIG_DIR,
)
plt.show() 
plot_comparison_waterfall(
    pipeline_template=champion_ct,
    df=df,
    player_name="apEX",
    feature_names=feature_names_ct,
    side='ct',
    excluded_role=EXCLUDED_ROLES['ct'],
    fig_dir=FIG_DIR,
)
plt.show()

plot_igl_feature_distribution(
    df=df,
    feature_name="adat_residual_ct",
    side="ct",
    fig_dir=FIG_DIR,
)
plt.show()

	steamid	player_name	team_clan_name	map_count	tapd_ct	tapd_t	tapd_overall	oap_ct	oap_t	oap_overall	podt_ct	podt_t	podt_overall	pokt_ct	pokt_t	pokt_overall	adnt_rank_ct	adnt_rank_t	adnt_rank_overall	adat_rank_ct	adat_rank_t	adat_rank_overall	role_overall	role_t	role_ct	adat_residual_t	adat_residual_ct
0	76561198041683378	NiKo	G2 Esports	158	60.952893	59.136540	60.136000	24.745965	24.093423	24.424242	21.020276	24.857741	22.507740	17.051295	21.586555	19.197995	0.562493	0.621199	0.593089	0.547525	0.695336	0.616046	Lurker	Spacetaker	Rotator	0.074668	-0.014643
1	76561198012872053	huNter	G2 Esports	158	62.048685	62.871661	62.589800	16.852540	14.807692	15.847511	21.585198	27.994772	24.696747	17.195516	27.180894	22.538284	0.480859	0.643004	0.571875	0.406082	0.615021	0.455108	Flex	Lurker	Rotator	-0.026997	-0.073699
2	76561198074762801	m0NESY	G2 Esports	155	62.786553	66.632594	64.362519	23.914373	17.754078	20.873335	19.122381	23.094640	21.473108	17.397469	26.423178	21.056274	0.577785	0.423733	0.453028	0.515617	0.409889	0.427645	AWPer	AWPer	AWPer	-0.017438	-0.061984

	Side	Model	Feature_Set	Mean_F1_Macro	Std_F1_Macro	Mean_Accuracy	Std_Accuracy
0	T-Side	LogisticRegression	Full	0.693830	0.088836	0.770833	0.068633
1	T-Side	LogisticRegression	Raw	0.685843	0.085449	0.767857	0.069160
2	T-Side	LogisticRegression	Orthogonal	0.674325	0.074047	0.763690	0.063640
3	T-Side	LogisticRegression	Residuals	0.466390	0.085475	0.579762	0.083427
4	T-Side	Dummy	Orthogonal	0.253056	0.110655	0.305357	0.110322
5	T-Side	Dummy	Raw	0.253056	0.110655	0.305357	0.110322
6	T-Side	Dummy	Residuals	0.253056	0.110655	0.305357	0.110322
7	T-Side	Dummy	Full	0.253056	0.110655	0.305357	0.110322

	Side	Model	Feature_Set	Mean_F1_Macro	Std_F1_Macro	Mean_Accuracy	Std_Accuracy
0	CT-Side	LogisticRegression	Raw	0.508231	0.082315	0.564881	0.075196
1	CT-Side	LogisticRegression	Full	0.501163	0.085292	0.556548	0.084714
2	CT-Side	LogisticRegression	Orthogonal	0.495041	0.078242	0.554167	0.079769
3	CT-Side	LogisticRegression	Residuals	0.400822	0.077293	0.454167	0.082115
4	CT-Side	Dummy	Orthogonal	0.255153	0.090947	0.283333	0.092490
5	CT-Side	Dummy	Raw	0.255153	0.090947	0.283333	0.092490
6	CT-Side	Dummy	Residuals	0.255153	0.090947	0.283333	0.092490
7	CT-Side	Dummy	Full	0.255153	0.090947	0.283333	0.092490

	role	precision_mean	precision_std	recall_mean	recall_std	f1_mean	f1_std
1	Half-Lurker	0.253542	0.335552	0.183333	0.240947	0.196806	0.240360
3	Spacetaker	0.763843	0.107101	0.843527	0.137287	0.793073	0.094453
2	Lurker	0.795965	0.141438	0.883333	0.127475	0.824978	0.094273
0	AWPer	0.930179	0.114774	0.861250	0.173561	0.882445	0.124662

	role	precision_mean	precision_std	recall_mean	recall_std	f1_mean	f1_std
2	Mixed	0.186280	0.265918	0.106875	0.135264	0.126678	0.155462
0	AWPer	0.637991	0.214509	0.597500	0.245701	0.588312	0.190074
3	Rotator	0.604961	0.136919	0.689286	0.153239	0.631406	0.112602
1	Anchor	0.604894	0.142322	0.697083	0.182361	0.633769	0.122807

03 - Role Classification and Model Evaluation¶

Executive Summary¶

Objectives¶

1. Setup and Experimental Design¶

Load Dataset¶

Cross-Validation Strategy¶

Feature Sets¶

2. Baseline Models & Feature Selection¶

Baseline Models & Feature Selection¶

Feature Set Selection: Baseline Analysis¶

Per-Class Performance Diagnosis¶

Sensitivity Analysis: Core Role Separability¶

Model Interpretation: Feature–Role Associations¶

Summary & Interpretations¶

3. Advanced Model Comparison¶

3.1 Support Vector Machine (SVM)¶

3.2 Random Forest¶

3.3 XGBoost¶

3.4 Model Comparison & Champion Selection¶

Section 3 Summary & Champion Selection¶

1. T-Side Champion: Logistic Regression (Orthogonal)¶

2. CT-Side Champion: Random Forest (Orthogonal)¶

3. Methodological Note: Constraints for Small Data¶

4. Model Interpretation¶

4.1 Setup & Model Retrieval¶

4.2 Global Feature Importance (Gini)¶

4.3 Directional Feature Analysis (SHAP)¶

4.4 Probability Analysis (The "Discrimination Plot")¶

Observations: Probability & Misclassification Analysis¶

4.5 Individual Misclassification Analysis: Waterfall Plots (LOOCV)¶

T side waterfall plots¶

CT side waterfall plots¶

Section 4 Summary & Interpretation¶

5. Synthesis & Conclusion¶

5.1 Methodology Recap & Feature Engineering¶

5.2 Results Synthesis¶

5.3 Key Insights & The "IGL Confound"¶

5.4 Conclusion & Future Directions¶

	role	precision_mean	precision_std	recall_mean	recall_std	f1_mean	f1_std
1	Lurker	0.908631	0.097040	0.950000	0.080795	0.923538	0.061559
0	AWPer	0.962292	0.082653	0.899375	0.139305	0.922455	0.094451
2	Spacetaker	0.932222	0.075045	0.912723	0.099624	0.917367	0.064466

	role	precision_mean	precision_std	recall_mean	recall_std	f1_mean	f1_std
1	Anchor	0.807009	0.124657	0.760833	0.144316	0.769876	0.092408
2	Rotator	0.651553	0.158925	0.687202	0.186215	0.654556	0.137992
0	AWPer	0.655898	0.230986	0.575000	0.227211	0.586142	0.190224

	Side	Model	Feature_Set	Mean_F1	Std_F1	Mean_Accuracy	Std_Accuracy	Mean_Train_F1	Std_Train_F1	Mean_Fit_Time	Best_Params	All_Scores
0	T-Side	SVM	Raw	0.874	0.066	0.872	0.065	0.961	0.025	0.144	{'C': 1, 'class_weight': 'balanced', 'gamma': ...	[1.0, 0.8766884531590414, 0.8384208384208384, ...
1	T-Side	SVM	Orthogonal	0.913	0.053	0.910	0.054	0.962	0.017	0.060	{'C': 0.1, 'class_weight': 'balanced', 'gamma'...	[0.9407407407407407, 0.9027777777777778, 0.889...
2	T-Side	SVM	Full	0.907	0.056	0.903	0.057	0.965	0.020	0.063	{'C': 0.1, 'class_weight': 'balanced', 'gamma'...	[1.0, 0.8303872053872053, 0.8384208384208384, ...
3	CT-Side	SVM	Raw	0.704	0.098	0.711	0.095	0.867	0.060	0.068	{'C': 1, 'class_weight': 'balanced', 'gamma': ...	[0.5285714285714286, 0.810966810966811, 0.7146...
4	CT-Side	SVM	Orthogonal	0.693	0.093	0.699	0.092	0.868	0.062	0.070	{'C': 1, 'class_weight': 'balanced', 'gamma': ...	[0.6813186813186812, 0.7523809523809524, 0.714...
5	CT-Side	SVM	Full	0.708	0.091	0.715	0.089	0.875	0.058	0.069	{'C': 0.1, 'class_weight': 'balanced', 'gamma'...	[0.6794871794871794, 0.810966810966811, 0.7146...

	Side	Model	Feature_Set	Mean_F1	Std_F1	Mean_Accuracy	Std_Accuracy	Mean_Train_F1	Std_Train_F1	Mean_Fit_Time	Best_Params	All_Scores
0	T-Side	RandomForest	Raw	0.822	0.091	0.822	0.088	0.954	0.027	0.573	{'class_weight': 'balanced', 'max_depth': 3, '...	[0.8857142857142857, 0.9500891265597149, 0.773...
1	T-Side	RandomForest	Orthogonal	0.813	0.103	0.813	0.101	0.957	0.035	0.547	{'class_weight': 'balanced', 'max_depth': 2, '...	[0.8363636363636363, 0.8962962962962964, 0.824...
2	T-Side	RandomForest	Full	0.828	0.084	0.825	0.083	0.955	0.029	0.508	{'class_weight': 'balanced', 'max_depth': 2, '...	[0.9407407407407407, 0.9500891265597149, 0.773...
3	CT-Side	RandomForest	Raw	0.756	0.084	0.758	0.080	0.870	0.029	0.523	{'class_weight': 'balanced', 'max_depth': 2, '...	[0.7579365079365079, 0.6892736892736893, 0.776...
4	CT-Side	RandomForest	Orthogonal	0.751	0.090	0.755	0.087	0.861	0.034	0.523	{'class_weight': 'balanced', 'max_depth': 2, '...	[0.6895104895104894, 0.7594405594405594, 0.714...
5	CT-Side	RandomForest	Full	0.764	0.084	0.767	0.081	0.863	0.033	0.517	{'class_weight': 'balanced', 'max_depth': 2, '...	[0.6897546897546897, 0.8773892773892774, 0.776...

	Side	Model	Feature_Set	Mean_F1	Std_F1	Mean_Accuracy	Std_Accuracy	Mean_Train_F1	Std_Train_F1	Mean_Fit_Time	Best_Params	All_Scores
0	T-Side	XGBoost	Raw	0.794	0.084	0.798	0.079	0.942	0.023	0.181	{'eval_metric': 'mlogloss', 'learning_rate': 0...	[0.8857142857142857, 0.8850408850408851, 0.824...
1	T-Side	XGBoost	Orthogonal	0.812	0.099	0.816	0.097	0.973	0.015	0.180	{'eval_metric': 'mlogloss', 'learning_rate': 0...	[0.8169191919191919, 0.8962962962962964, 0.773...
2	T-Side	XGBoost	Full	0.819	0.080	0.820	0.078	0.965	0.019	0.165	{'eval_metric': 'mlogloss', 'learning_rate': 0...	[0.8857142857142857, 0.8850408850408851, 0.824...
3	CT-Side	XGBoost	Raw	0.709	0.105	0.720	0.092	0.889	0.029	0.183	{'eval_metric': 'mlogloss', 'learning_rate': 0...	[0.6813186813186812, 0.7633477633477633, 0.776...
4	CT-Side	XGBoost	Orthogonal	0.719	0.100	0.726	0.091	0.854	0.032	0.170	{'eval_metric': 'mlogloss', 'learning_rate': 0...	[0.6190476190476191, 0.5712250712250713, 0.771...
5	CT-Side	XGBoost	Full	0.730	0.097	0.737	0.089	0.874	0.029	0.182	{'eval_metric': 'mlogloss', 'learning_rate': 0...	[0.7579365079365079, 0.7594405594405594, 0.776...

	player_name	side	role_group	feature	value	percentile	rank
0	ultimate	t	AWPer	tapd_t	59.090479	5.882353	1/17
1	ultimate	t	AWPer	oap_t	19.145299	94.117647	16/17
2	ultimate	t	AWPer	podt_t	25.097174	94.117647	16/17
3	ultimate	t	AWPer	pokt_t	22.911286	11.764706	2/17
4	ultimate	t	AWPer	adnt_rank_t	0.298120	5.882353	1/17
5	ultimate	t	AWPer	adat_residual_t	0.063184	88.235294	15/17

	Side	Excluded_Role	N_Full	N_Filtered	F1_Full	F1_Filtered	Delta
0	T-Side	Half-Lurker	84	72	0.674325	0.921120	0.246794
1	CT-Side	Mixed	84	67	0.495041	0.670192	0.175150