Source code for aplusml.plot

"""
Plotting functions for visualizing simulation results.

Some quick notes:
    - For all functions, the `df_preds` dataframe must contain two columns -- 'y' (ground truth) and 'y_hat' (predicted labels).
    - For all functions, the `utilities` dictionary must contain four keys -- 'tp', 'fp', 'tn', 'fn' -- which map to the utility values for each class.
"""
from typing import Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.metrics
import sklearn.calibration
from sklearn.neighbors import KernelDensity
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import plotnine as p9
import warnings

plt.style.use('ggplot')
warnings.filterwarnings( "ignore", module = "plotnine\..*" )

################################################
# Simulation analysis
################################################
[docs] def plot_mean_utility_v_threshold(title: str, df: pd.DataFrame, label_sort_order: list[str] = None, label_names: list[str] = None, label_title: str = '') -> p9.ggplot: """Plot mean utility vs. threshold Args: title (str): Title of the plot df (pd.DataFrame): Output of `run.run_test()`. Columns: threshold, mean_utility, std_utility, sem_utility, mean_work_per_timestep, label label_sort_order (list[str], optional): Custom ordering for x-axis labels. Defaults to None. label_names (list[str], optional): Display names for labels. Defaults to None. label_title (str, optional): Title of label legend. Defaults to None. Returns: p9.ggplot: Plot object """ df = df.copy() df['label'].fillna('N/A', inplace=True) df['label'] = pd.Categorical(df['label'], categories=label_sort_order if label_sort_order is not None else pd.unique(df['label'])) if not label_names: label_names = df['label'].unique() # Reorder labels ordered from lowest to highest max(mean_utility)) p = ( p9.ggplot(df, p9.aes(y='mean_utility', x='threshold', color='label')) + p9.geom_ribbon(p9.aes(ymin='mean_utility - sem_utility', ymax='mean_utility + sem_utility', fill='label'), alpha=0.3, outline_type = None, color=None) + p9.geom_point(size=0.5, show_legend=False) + p9.labs(x="Model Cutoff Threshold", y="Achieved Utility Per Patient", title=f"{title}") + p9.scale_fill_discrete(name = label_title or 'Setting', labels=label_names) ) return p
[docs] def plot_dodged_bar_mean_utilities(title: str, df: pd.DataFrame, label_sort_order: list[str] = None, label_names: list[str] = None, color_sort_order: list[str] = None, color_names: list[str] = None, is_percent_of_optimistic: bool = False, x_label: str = None) -> p9.ggplot: """Plot relative utility using the optimistic model as a baseline. Creates a bar plot where bars are grouped by label (x-axis) and colored by a secondary category. Utility values are measured relative to a baseline, optionally as a percentage of the optimistic model's performance. Args: title (str): Title of the plot df (pd.DataFrame): DataFrame containing columns: y, label, color label_sort_order (list[str], optional): Custom ordering for x-axis labels. Defaults to None. label_names (list[str], optional): Display names for labels. Defaults to None. color_sort_order (list[str], optional): Custom ordering for color categories. Defaults to None. color_names (list[str], optional): Display names for color categories. Defaults to None. is_percent_of_optimistic (bool, optional): If True, show utilities as percentage of optimistic model. Defaults to False. x_label (str, optional): Custom x-axis label. Defaults to None. Returns: p9.ggplot: A plotnine plot object showing grouped bars of relative utilities """ df = df.copy() # Sort utilities baseline = df['y'].min() if is_percent_of_optimistic: # We scale everything by the 'optimistic' pathway that yields the max utility (since there may be multiple 'optimistic' rows) utility_optimistic = df[df['label'] == 'optimistic']['y'].max() df['y'] = (df['y'] - baseline) / (utility_optimistic - baseline) else: df['y'] = df['y'] - baseline df = df.sort_values('y') # NOTE: need to `fillna` b/c Pandas doesn't let you set None as a categorical value df['label'].fillna('N/A', inplace=True) df['label'] = pd.Categorical(df['label'], categories=label_sort_order if label_sort_order is not None else pd.unique(df['label'])) if not label_names: label_names = df['label'].unique() df['color'].fillna('N/A', inplace=True) df['color'] = pd.Categorical(df['color'], categories=color_sort_order if color_sort_order is not None else pd.unique(df['color'])) if not color_names: color_names = df['color'].unique() # Make plot p = ( p9.ggplot(df, p9.aes(x = 'label', y = 'y', fill = 'color')) + p9.geom_col(stat='identity', position='dodge') + p9.labs(x = f"{title}" if not x_label else x_label, y = f"Achieved Utility Per Patient Over 'Treat None'\n { 'as Fraction of Optimistic Pathway Utility' if is_percent_of_optimistic else ''}", title = f"{title}", color='') + p9.scale_fill_discrete(name = "Model", labels=color_names) ) return p
[docs] def plot_line_mean_utilities(title: str, df: pd.DataFrame, group_sort_order: list = None, groups_to_drop: list = None, label_sort_order: list = None, color_sort_order: list = None, color_names: list[str] = None, shape_sort_order: list = None, is_percent_of_optimistic: bool = False, color_title: str = None, shape_title: str = None, x_label: str = None) -> p9.ggplot: """Creates a line plot comparing mean utilities across different groups and settings. This function creates a line plot where each line represents a group of related points. Lines can be differentiated by color and points by shape. Unlike the dodged bar plot, this supports multiple grouping dimensions. Args: title (str): Title of the plot df (pd.DataFrame): DataFrame containing at minimum these columns: - y: numeric utility values - label: categories for x-axis - group: groups points that should be connected by lines Optional columns: - color: categories for line colors - shape: categories for point shapes group_sort_order (list, optional): Custom ordering for line groups. Defaults to None. groups_to_drop (list, optional): Groups to exclude from plot. Defaults to None. label_sort_order (list, optional): Custom ordering for x-axis labels. Defaults to None. color_sort_order (list, optional): Custom ordering for colors. Defaults to None. color_names (list[str], optional): Display names for color categories. Defaults to None. shape_sort_order (list, optional): Custom ordering for point shapes. Defaults to None. is_percent_of_optimistic (bool, optional): If True, show utilities as percentage of optimistic model. Defaults to False. color_title (str, optional): Title for color legend. Defaults to None. shape_title (str, optional): Title for shape legend. Defaults to None. x_label (str, optional): Custom x-axis label. Defaults to None. Returns: p9.ggplot: A plotnine plot object showing utility comparisons across groups """ df = df.copy() # Sort utilities baseline = df['y'].min() if is_percent_of_optimistic: # We scale everything by the 'optimistic' pathway that yields the max utility (since there may be multiple 'optimistic' rows) utility_optimistic = df[df['label'] == 'optimistic']['y'].max() df['y'] = 100*(df['y'] - baseline) / (utility_optimistic - baseline) else: df['y'] = df['y'] - baseline df = df.sort_values('y') # NOTE: need to `fillna` b/c Pandas doesn't let you set None as a categorical value df['label'] = pd.Categorical(df['label'], categories=label_sort_order if label_sort_order is not None else pd.unique(df['label'])) df['group'].fillna('N/A', inplace=True) df['group'] = pd.Categorical(df['group'], categories=group_sort_order if group_sort_order is not None else pd.unique(df['group'])) if 'color' in df: df['color'].fillna('N/A', inplace=True) df['color'] = pd.Categorical(df['color'], categories=color_sort_order if color_sort_order is not None else pd.unique(df['color'])) if 'shape' in df: df['shape'].fillna('N/A', inplace=True) df['shape'] = pd.Categorical(df['shape'], categories=shape_sort_order if shape_sort_order is not None else pd.unique(df['shape'])) # Drop certain groups (if specified) if groups_to_drop is not None: for g in groups_to_drop: df = df[df['group'] != g] if not color_names: color_names = df['color'].unique() # Make plot p = ( p9.ggplot(df, p9.aes(**{ 'x': 'label', 'y': 'y', 'group': 'group', **({ 'color': 'color' } if 'color' in df else {}), **({ 'shape': 'shape' } if 'shape' in df else {}), })) + p9.geom_point(size=2) + p9.geom_line(size=0.5) + p9.labs(x = f"{title}" if not x_label else x_label, y = f"Achieved Utility Over Baseline { ' v. Optimistic (%)' if is_percent_of_optimistic else ''}", title = f"{title}", color=color_title if color_title else '', shape=shape_title if shape_title else '') + p9.scale_color_hue(name = "Model", labels = color_names) ) return p
[docs] def plot_bar_mean_utilities(title: str, plot_avg_utilities: dict[float]) -> p9.ggplot: """Plots bar chart of mean utilities for different settings Args: title (str): Title of the plot plot_avg_utilities (dict[float]): Dictionary of mean utilities for different settings Returns: p9.ggplot: A plotnine plot object showing bar chart of mean utilities """ # Sort utilities sorted_plot_avg_utilities = sorted(list(plot_avg_utilities.items()), key = lambda x: x[1]) labels = [x[0] for x in sorted_plot_avg_utilities] values = np.array([x[1] for x in sorted_plot_avg_utilities]) baseline = np.min(list(plot_avg_utilities.values())) percents = 100*(values - baseline) / (plot_avg_utilities['optimistic'] - baseline) # Dataframe df = pd.DataFrame() df['labels'] = labels df['values'] = list(values) df['percents'] = list(percents) df['labels'] = pd.Categorical(df['labels'], categories=pd.unique(df['labels'])) df = df.sort_values('percents') # Make plot p = ( p9.ggplot(df, p9.aes(x = 'labels', y = 'percents')) + p9.geom_bar(stat='identity', fill="lightblue") + p9.labs(x = f"{title}", y = "Achieved Relative Utility Over Baseline v. Optimistic (%)", title = f"Impact of {title}") ) return p
[docs] def plot_line_compare_multiple_settings(title: str, df: list[pd.DataFrame], x_label: str = None, line_labels: list[str] = None) -> p9.ggplot: """Plots multiple lines on the same x- and y-axis. Used to generate Figure 2 from Jung et al. 2021 Args: title (str): Title of the plot df (list[pd.DataFrame]): List of DataFrames, each containing columns: x, y, line x_label (str, optional): Label for the x-axis. Defaults to None. line_labels (list[str], optional): Labels for the lines. Defaults to None. Returns: p9.ggplot: A plotnine plot object showing multiple lines """ p = ( p9.ggplot(df, p9.aes(x = 'x', y = 'y', color = 'line', group = 'line')) + p9.geom_point(size = 1) + p9.geom_line(size = 1) + (p9.scale_color_hue(labels=line_labels) if line_labels else ()) + p9.labs(x = f"{x_label if x_label else ''}", y = "Change in Achieved Utility v. Baseline", title = f"{title}", color = '') + p9.theme( legend_direction='vertical', legend_position = (0.75, 0.75), legend_background=p9.element_rect(fill='white'), legend_title=p9.element_blank(), ) ) return p
################################################ # Theoretical Utility Analysis ################################################ PADDING = 0.05 # How much padding to add to plots that get bounded by (0,1) - e.g. ROC Curve
[docs] def get_df_utility_from_df_preds(df_preds: pd.DataFrame, utilities: dict, thresholds: np.ndarray, is_add_0_and_1: bool = True ) -> pd.DataFrame: """Adds metrics (Utilities + TP/FP/TN/FN) to DataFrame of predictions Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict): Dictionary containing utility values for each class. Must contain four keys: 'tp', 'fp', 'tn', 'fn' thresholds (np.ndarray): Array of model thresholds to consider is_add_0_and_1 (bool): If TRUE, add 0 and 1 to the start/end of the `thresholds` array Returns: pd.DataFrame: DataFrame that mirrors "df_preds", but with additional columns """ utility_rows = [] thresholds = np.sort(np.array(list(set(thresholds).union(set([0,1] if is_add_0_and_1 else []))))) for t in thresholds: preds = df_preds['y_hat'] >= t tp = np.sum((df_preds['y'] == preds).values & (preds == 1).values) fp = np.sum((df_preds['y'] != preds).values & (preds == 1).values) tn = np.sum((df_preds['y'] == preds).values & (preds == 0).values) fn = np.sum((df_preds['y'] != preds).values & (preds == 0).values) ppv = tp / (tp + fp) if (tp + fp) > 0 else 0 tpr = tp / (tp + fn) if (tp + fn) > 0 else 0 fpr = fp / (fp + tn) if (fp + tn) > 0 else 0 utility_rows.append({ 'threshold' : t, 'prevalence' : (tp + fn) / (tp + fp + tn + fn) if (tp + fp + tn + fn) > 0 else 0, # Proportion of y's that are TRUE 'ppv' : ppv, # Same as precision 'precision' : ppv, # Same as PPV 'tpr' : tpr, # Same as recall 'recall' : tpr, # Same as TPR 'fpr' : fpr, 'tp' : tp, 'fp' : fp, 'tn' : tn, 'fn' : fn, 'total' : tp + fp + tn + fn, 'work' : tp + fp, 'unit_utility' : (utilities['tp'] * tp + utilities['fp'] * fp + utilities['tn'] * tn + utilities['fn'] * fn) / (tp + fp + tn + fn), }) df_utility = pd.DataFrame(utility_rows) return df_utility
[docs] def plot_hist_pred(df_preds: pd.DataFrame, ax: plt.Axes = None) -> plt.figure: """Generate Histogram of predictions - x-axis = predictions ('y_hat') - y-axis = density Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # Plot ax.hist(df_preds['y_hat'].values, bins=50) ax.set_title(f"Histogram of Predictions", fontdict={'fontsize' : 12}) ax.set_ylabel("Count", fontdict={'fontsize' : 10}) ax.set_xlabel("Prediction", fontdict={'fontsize' : 10}) return fig
[docs] def plot_pred_v_true(df_preds: pd.DataFrame, ax: plt.Axes = None) -> plt.figure: """Generate a scatter plot comparing predicted vs true values with density coloring. Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # Calculate Pearson Correlation pearson_corr = calc_pearsonr(df_preds['y_hat'], df_preds['y']) spearman_corr = calc_spearmanr(df_preds['y_hat'], df_preds['y']) # Plot # Source: https://stackoverflow.com/a/20107592/3015186 x = df_preds['y_hat'].values y = df_preds['y'].values xy = np.vstack([x, y]) kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(xy.T) z = np.exp(kde.score_samples(xy.T)) density = ax.scatter(x,y, c=z, s=10) ax.set_title(f"Predictions v. Ground Truth\nPearson $r$={round(pearson_corr, 3)}, Spearman $\\rho$={round(spearman_corr, 3)}", fontdict={'fontsize' : 12}) ax.set_ylabel("True Label", fontdict={'fontsize' : 10}) ax.set_xlabel("Prediction", fontdict={'fontsize' : 10}) ax.set_xlim(0 - PADDING, 1 + PADDING) ax.set_ylim(0 - PADDING, 1 + PADDING) return fig
[docs] def plot_calibration_curve(df_preds: pd.DataFrame, ax: plt.Axes = None) -> plt.figure: """Generate Calibration curve where: - x-axis = mean predicted probability for each bin - y-axis = fraction of positive cases in each bin The calibration curve shows how well the predicted probabilities match the actual observed frequencies. A perfectly calibrated model will follow the diagonal line y=x. Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) x = np.linspace(0,1,1000) # Calibration Curve # Calculate fop, mpv = sklearn.calibration.calibration_curve(df_preds['y'], df_preds['y_hat'], n_bins=10) # Plot ax.plot(mpv, fop, '-bo', label='Model') ax.plot(x, x, linestyle='--', label='Perfect') ax.legend() ax.set_title(f"Calibration curve", fontdict={'fontsize' : 12}) ax.set_ylabel("Fraction of Positives", fontdict={'fontsize' : 10}) ax.set_xlabel("Predicted Probability of Positive", fontdict={'fontsize' : 10}) ax.set_xlim(0 - PADDING, 1 + PADDING) ax.set_ylim(0 - PADDING, 1 + PADDING) return fig
[docs] def plot_confusion_matrix(df_preds: pd.DataFrame, utilities: dict = None, threshold: float = None, ax: plt.Axes = None) -> plt.figure: """Generate Confusion Matrix @ highest utility threshold (if `threshold` is not specified) where: - x-axis = predictions ('y_hat') - y-axis = ground truth ('y') Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict, optional): Dictionary containing utility values for each class threshold (float, optional): Cutoff threshold for confusion matrix. If not specified, uses the threshold with highest utility ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # Calculate thresholds = np.sort(df_preds['y_hat'].unique())[::-1] df_utility = get_df_utility_from_df_preds(df_preds, utilities, thresholds, is_add_0_and_1=True) max_utility_idx = df_utility['unit_utility'].argmax() threshold = df_utility['threshold'][max_utility_idx] # Plot cm = confusion_matrix(df_preds['y'], df_preds['y_hat'] >= threshold) disp = ConfusionMatrixDisplay(confusion_matrix=cm) if ax: disp.plot(ax = ax) else: fig = disp.figure_ ax = disp.ax_ ax.set_title(f"Confusion Matrix\nUsing Cutoff Threshold $t$ with Max Unit Utility\n($t$ = {round(threshold, 3)})", fontdict={'fontsize' : 12}) ax.set_ylabel("True Label", fontdict={'fontsize' : 10}) ax.set_xlabel("Predicted Label", fontdict={'fontsize' : 10}) ax.grid(False) return fig
[docs] def plot_roc_curve(df_preds: pd.DataFrame, utilities: dict = None, ax: plt.Axes = None) -> plt.figure: """Generate ROC curve with indifference curves where: - x-axis = false positive rate (FPR) - y-axis = true positive rate (TPR) Explanation of indifference curves: http://www0.cs.ucl.ac.uk/staff/ucacbbl/roc/ Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict, optional): Dictionary containing utility values for each class ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) x = np.linspace(0,1,1000) # ROC Curve # Calculate fpr, tpr, thresholds = sklearn.metrics.roc_curve(df_preds['y'], df_preds['y_hat']) auc = round(sklearn.metrics.auc(fpr, tpr), 3) # Plot ax.plot(fpr, tpr, 'b', label='ROC Curve') ax.plot(x, x, 'k--', label='Random') # Utility if utilities: # Calculate df_utility = get_df_utility_from_df_preds(df_preds, utilities, thresholds) max_utility_idx = df_utility['unit_utility'].argmax() prevalence = df_preds['y'].mean() # Proportion of positive cases indifference_curve_slope = (1-prevalence) / prevalence * (utilities['tn'] - utilities['fp']) / (utilities['tp'] - utilities['fn']) # Plot # Max utility max_utility_x = df_utility.iloc[max_utility_idx]['fpr'] max_utility_y = df_utility.iloc[max_utility_idx]['tpr'] ax.plot(max_utility_x, max_utility_y, 'bo') ax.plot(x, indifference_curve_slope * x + (max_utility_y - indifference_curve_slope * max_utility_x), 'b--', label='Max Utility') # Treat All all_x = df_utility.iloc[0]['fpr'] all_y = df_utility.iloc[0]['tpr'] ax.plot(all_x, all_y, 'ro') ax.plot(x, indifference_curve_slope * x + (all_y - indifference_curve_slope * all_x), 'r--', label='Treat All Utility') # Treat None none_x = df_utility.iloc[-1]['fpr'] none_y = df_utility.iloc[-1]['tpr'] ax.plot(none_x, none_y, 'go') ax.plot(x, indifference_curve_slope * x + (none_y - indifference_curve_slope * none_x), 'g--', label='Treat None Utility') ax.legend() ax.set_title(f"ROC curve{' with indifference curves' if utilities else ''}\nAUC={auc}", fontdict={'fontsize' : 12}) ax.set_ylabel("TPR", fontdict={'fontsize' : 10}) ax.set_xlabel("FPR", fontdict={'fontsize' : 10}) ax.set_xlim(0 - PADDING, 1 + PADDING) ax.set_ylim(0 - PADDING, 1 + PADDING) return fig
[docs] def plot_precision_recall_curve(df_preds: pd.DataFrame, utilities: dict = None, ax: plt.Axes = None) -> plt.figure: """Generate Precision-Recall curve with utility contours where: - x-axis = recall (TPR) - y-axis = precision (PPV) Explanation of Expected Utility (EU) calculations (Appendix A): https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2804257/pdf/nihms-108324.pdf Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict, optional): Dictionary containing utility values for each class ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) x = np.linspace(0,1,1000) # ROC Curve # Calculate precision, recall, thresholds = sklearn.metrics.precision_recall_curve(df_preds['y'], df_preds['y_hat']) auc = round(sklearn.metrics.auc(recall, precision), 3) prevalence = df_preds['y'].mean() # Proportion of positive cases # Plot ax.plot(recall, precision, 'b', label='PR Curve') ax.plot(x, [prevalence] * 1000, 'k--', label='Random') # Utility if utilities: # Calculate df_utility = get_df_utility_from_df_preds(df_preds, utilities, thresholds) max_utility_idx = df_utility['unit_utility'].argmax() # Generate EU contours for plot utility_contour_x, utility_contour_y = np.meshgrid(np.arange(0, 1, 0.01), np.arange(0, 1, 0.01)) with np.errstate(divide='ignore',invalid='ignore'): # Ignore divide by 0 expected_utility = (prevalence * utility_contour_x * (utilities['fp'] - utilities['tn'])) / utility_contour_y + utilities['tn'] + prevalence * (utilities['fn'] - utilities['tn'] + utility_contour_x * (utilities['tn'] + utilities['tp'] - utilities['fp'] - utilities['fn'])) # Max utility max_utility_x = df_utility.iloc[max_utility_idx]['recall'] # TPR max_utility_y = df_utility.iloc[max_utility_idx]['precision'] # PPV ax.plot(max_utility_x, max_utility_y, 'bo') # Utility contour cs = ax.contour(utility_contour_x, utility_contour_y, expected_utility) ax.clabel(cs, inline=1, fontsize=8) # for i in range(len(cs.levels)): # cs.collections[i].set_label(cs.levels[i]) ax.legend() ax.set_title(f"Precision-Recall curve{' with indifference curves ' if utilities else ''}\nAUC={auc}", fontdict={'fontsize' : 12}) ax.set_ylabel("Precision (PPV)", fontdict={'fontsize' : 10}) ax.set_xlabel("Recall (TPR)", fontdict={'fontsize' : 10}) ax.set_xlim(0 - PADDING, 1 + PADDING) ax.set_ylim(0 - PADDING, 1 + PADDING) return fig
[docs] def plot_ppv_v_utility_curve(df_preds: pd.DataFrame, utilities: dict, ax: plt.Axes = None) -> plt.figure: """Generate PPV v. Unit Utility curve where: - x-axis = precision (PPV) - y-axis = unit utility Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict, optional): Dictionary containing utility values for each class ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # Calculate thresholds = np.sort(df_preds['y_hat'].unique())[::-1] df_utility = get_df_utility_from_df_preds(df_preds, utilities, thresholds, is_add_0_and_1=False) max_utility_idx = df_utility['unit_utility'].argmax() # Plot # Curve ax.plot(df_utility['ppv'], df_utility['unit_utility'], 'k', label='Utility Curve') # Max utility max_utility_x = df_utility.iloc[max_utility_idx]['ppv'] max_utility_y = df_utility.iloc[max_utility_idx]['unit_utility'] ax.plot(max_utility_x, max_utility_y, 'bo') ax.legend() ax.set_title(f"PPV v. Unit Utility", fontdict={'fontsize' : 12}) ax.set_ylabel("Unit Utility", fontdict={'fontsize' : 10}) ax.set_xlabel("Precision (PPV)", fontdict={'fontsize' : 10}) ax.set_xlim(0 - PADDING, 1 + PADDING) return fig
[docs] def plot_threshold_v_utility_curve(df_preds: pd.DataFrame, utilities: dict, ax: plt.Axes = None) -> plt.figure: """Generate Cutoff Threshold v. Unit Utility curve where: - x-axis = cutoff threshold - y-axis = unit utility Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict, optional): Dictionary containing utility values for each class ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # Calculate thresholds = np.sort(df_preds['y_hat'].unique())[::-1] df_utility = get_df_utility_from_df_preds(df_preds, utilities, thresholds, is_add_0_and_1=True) max_utility_idx = df_utility['unit_utility'].argmax() # Plot # Curve ax.plot(df_utility['threshold'], df_utility['unit_utility'], 'k', label='Utility Curve') # Max utility max_utility_x = df_utility.iloc[max_utility_idx]['threshold'] max_utility_y = df_utility.iloc[max_utility_idx]['unit_utility'] ax.plot(max_utility_x, max_utility_y, 'bo') ax.legend() ax.set_title(f"Cutoff Threshold v. Unit Utility", fontdict={'fontsize' : 12}) ax.set_ylabel("Unit Utility", fontdict={'fontsize' : 10}) ax.set_xlabel("Cutoff Threshold", fontdict={'fontsize' : 10}) ax.set_xlim(0 - PADDING, 1 + PADDING) return fig
[docs] def plot_work_v_utility(df_preds: pd.DataFrame, utilities: dict, ax: plt.Axes = None) -> plt.figure: """Generate Work v. Unit Utility curve where: - x-axis = work (%) - y-axis = unit utility Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict, optional): Dictionary containing utility values for each class ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # Calculate thresholds = np.sort(df_preds['y_hat'].unique())[::-1] df_utility = get_df_utility_from_df_preds(df_preds, utilities, thresholds, is_add_0_and_1=True) max_utility_idx = df_utility['unit_utility'].argmax() # Plot # Curve ax.plot(df_utility['work'] / df_utility['total'], df_utility['unit_utility'], 'k', label='Utility Curve') # Max utility max_utility_x = df_utility.iloc[max_utility_idx]['work'] / df_utility.iloc[max_utility_idx]['total'] max_utility_y = df_utility.iloc[max_utility_idx]['unit_utility'] ax.plot(max_utility_x, max_utility_y, 'bo') ax.legend() ax.set_title(f"Work v. Unit Utility", fontdict={'fontsize' : 12}) ax.set_ylabel("Unit Utility", fontdict={'fontsize' : 10}) ax.set_xlabel("% Positive Predictions", fontdict={'fontsize' : 10}) ax.set_xlim(0 - PADDING, 1 + PADDING) return fig
[docs] def plot_work_v_ppv_tpr_fpr(df_preds: pd.DataFrame, utilities: dict, ax: plt.Axes = None) -> plt.figure: """Generate Work v. PPV/TPR/FPR curve where: - x-axis = work (%) - y-axis = PPV/TPR/FPR Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict, optional): Dictionary containing utility values for each class ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # Calculate thresholds = np.sort(df_preds['y_hat'].unique())[::-1] df_utility = get_df_utility_from_df_preds(df_preds, utilities, thresholds, is_add_0_and_1=False) # Plot # PPV ax.plot(df_utility['work'] / df_utility['total'], df_utility['ppv'], 'r', label='PPV') # TPR ax.plot(df_utility['work'] / df_utility['total'], df_utility['tpr'], 'g', label='TPR') # FPR ax.plot(df_utility['work'] / df_utility['total'], df_utility['fpr'], 'b', label='FPR') ax.legend() ax.set_title(f"Work v. PPV/TPR/FPR", fontdict={'fontsize' : 12}) ax.set_ylabel("Value", fontdict={'fontsize' : 10}) ax.set_xlabel("% Positive Predictions", fontdict={'fontsize' : 10}) ax.set_ylim(0 - PADDING, 1 + PADDING) return fig
[docs] def plot_threshold_v_ppv_tpr_work(df_preds: pd.DataFrame, utilities: dict, ax: plt.Axes = None) -> plt.figure: """Generate Cutoff Threshold v. PPV/TPR/Work curve where: - x-axis = cutoff threshold - y-axis = PPV/TPR/Work Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict, optional): Dictionary containing utility values for each class ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # Calculate thresholds = np.sort(df_preds['y_hat'].unique())[::-1] df_utility = get_df_utility_from_df_preds(df_preds, utilities, thresholds, is_add_0_and_1=False) # Plot # PPV ax.plot(df_utility['threshold'], df_utility['ppv'], 'r', label='PPV') # TPR ax.plot(df_utility['threshold'], df_utility['tpr'], 'g', label='TPR') # Fraction Positive (Work / Total) ax.plot(df_utility['threshold'], df_utility['work'] / df_utility['total'], 'b', label='% Positive Predictions') ax.legend() ax.set_title(f"Cutoff Threshold v. PPV/TPR/Work", fontdict={'fontsize' : 12}) ax.set_ylabel("Value", fontdict={'fontsize' : 10}) ax.set_xlabel("Cutoff Threshold", fontdict={'fontsize' : 10}) ax.set_ylim(0 - PADDING, 1 + PADDING) return fig
[docs] def plot_decision_curve(df_preds: pd.DataFrame, utilities: dict, ax: plt.Axes = None) -> plt.figure: """Generate Decision Curve for model where: - y-axis = X - U_none (i.e. everything is relative to Treat None) - x-axis = R Risk Threshold (R) is defined as "the cutpoint for calling a result positive that maximizes expected utility" Thus, "Treat None" on this graph = U_none - U_none = 0. Thus, at R = r, the "Treat All" graph shows what the net benefit would be if you treated everyone AND the optimal risk threshold = r. Guarantee: "Treat All" and "Treat None" lines should intersect at R = Prevalence. Args: df_preds (pd.DataFrame): DataFrame containing 'y_hat' (predictions) and 'y' (ground truth) columns utilities (dict, optional): Dictionary containing utility values for each class ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # Calculate # NOTE: 'thresholds' may be a diff size than df_utility['threshold'] b/c we add 0,1 to it in 'get_df_utility_from_df_preds' thresholds = np.sort(df_preds['y_hat'].unique())[::-1] df_utility = get_df_utility_from_df_preds(df_preds, utilities, thresholds, is_add_0_and_1=False) prevalence = df_preds['y'].mean() # Proportion of positive cases # Plot # Treat All # Explanation of how to calculate net benefits (Page 8): # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2804257/pdf/nihms-108324.pdf # Explanation of risk thresholds / curves (Page 5): # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2577036/pdf/nihms74373.pdf net_benefit_all = (prevalence - (1 - prevalence) * df_utility['threshold'] / (1 - df_utility['threshold']) ) ax.plot(df_utility['threshold'], net_benefit_all, 'r--', label='Treat All') # Treat None net_benefit_none = [0] * df_utility['threshold'].shape[0] ax.plot(df_utility['threshold'], net_benefit_none, 'g--', label='Treat None') # Model net_benefit_model = (prevalence * df_utility['tpr'] - (1 - prevalence) * df_utility['fpr'] * df_utility['threshold'] / (1 - df_utility['threshold']) ) ax.plot(df_utility['threshold'], net_benefit_model, 'k', label='Model') ax.legend() ax.set_title(f"Decision Curve", fontdict={'fontsize' : 12}) ax.set_ylabel("Net Benefit", fontdict={'fontsize' : 10}) ax.set_xlabel("Risk Threshold", fontdict={'fontsize' : 10}) ax.set_xlim(0, 1) ax.set_ylim(-np.maximum(net_benefit_all.max(), net_benefit_model.max()), np.maximum(net_benefit_all.max(), net_benefit_model.max())) return fig
[docs] def plot_relative_utility_curve(lines: list[dict[pd.DataFrame]], utilities: dict, xlim: Tuple = (0, 1), ylim: Tuple = (-0.1, None), ax: plt.Axes = None) -> plt.figure: """Generate Relative Utility Curve where: - y-axis = relative utility - x-axis = risk threshold "The relative utility curve plots the fraction of the expected utility of perfect prediction obtained by the risk prediction model at the optimal cut point associated with the risk threshold R" Args: lines (list[dict[pd.DataFrame]]): List of dictionaries containing 'df' (dataframe) and 'label' (string) utilities (dict, optional): Dictionary containing utility values for each class xlim (Tuple, optional): Tuple containing the x-axis limits. Defaults to (0, 1). ylim (Tuple, optional): Tuple containing the y-axis limits. Defaults to (-0.1, None). ax (plt.Axes, optional): Matplotlib axes to plot on. If None, creates new figure. Defaults to None. Returns: plt.figure: Matplotlib figure object containing the plot """ fig, ax = plt.subplots() if ax is None else (None, ax) # NOTE: 'thresholds' may be a diff size than df_utility['threshold'] b/c we add 0,1 to it in 'get_df_utility_from_df_preds' thresholds = np.sort(lines[0]['df']['y_hat'].unique())[::-1] df_utility = get_df_utility_from_df_preds(lines[0]['df'], utilities, thresholds, is_add_0_and_1=True) prevalence = lines[0]['df']['y'].mean() # Proportion of positive cases x = df_utility['threshold'] # Treat All # Explanation of how to calculate net benefits (Page 8): # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2804257/pdf/nihms-108324.pdf # Explanation of risk thresholds / curves (Page 5): # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2577036/pdf/nihms74373.pdf utility_perfect = prevalence * utilities['tp'] + (1 - prevalence) * utilities['tn'] utility_all = prevalence * utilities['tp'] + (1 - prevalence) * utilities['fp'] utility_none = prevalence * utilities['fn'] + (1 - prevalence) * utilities['tn'] # Model y_max = np.max(df_utility['unit_utility']) for line in lines: df_preds = line['df'] df_utility = get_df_utility_from_df_preds(df_preds, utilities, x) ru_none = (df_utility['unit_utility'] - utility_none) / (utility_perfect - utility_none) ru_all = (df_utility['unit_utility'] - utility_all) / (utility_perfect - utility_all) # TODO # y = ru_all until prevalence then ru_all ax.plot(x, y, label=line['label']) y_max = y.max() if y.max() > y_max else y_max ax.legend() ax.set_title(f"Relative Utility Curve", fontdict={'fontsize' : 12}) ax.set_ylabel("Relative Utility", fontdict={'fontsize' : 10}) ax.set_xlabel("Risk Threshold", fontdict={'fontsize' : 10}) ax.set_xlim(xlim[0], xlim[1]) ax.set_ylim(ylim[0], y_max) return fig
[docs] def calc_model_performance_metrics(df_preds: pd.DataFrame, thresholds: list[float] = [0.5]) -> dict: """Returns a dictionary containing performance metrics for this model Args: df_preds (pd.DataFrame): Dataframe with two columns, 'y' and 'y_hat' thresholds (list[float]): Cutoffs for thresholding predictions to binary 0/1 (used for accuracy, Matthews correlation) Returns: dict: Dictionary containing performance metrics """ pearson_corr = calc_pearsonr(df_preds['y_hat'], df_preds['y']) spearman_corr = calc_spearmanr(df_preds['y_hat'], df_preds['y']) fpr, tpr, t = sklearn.metrics.roc_curve(df_preds['y'], df_preds['y_hat']) auroc = sklearn.metrics.auc(fpr, tpr) precision, recall, t = sklearn.metrics.precision_recall_curve(df_preds['y'], df_preds['y_hat']) auprc = sklearn.metrics.auc(recall, precision) # Threshold dependent calculations matthews_corr = [ sklearn.metrics.matthews_corrcoef(df_preds['y'], df_preds['y_hat'] >= x) for x in thresholds ] accuracy = [ np.mean(df_preds['y'] == (df_preds['y_hat'] >= x)) for x in thresholds ] results = { 'auroc' : auroc, 'auprc' : auprc, 'pearson_corr' : pearson_corr, 'spearman_corr' : spearman_corr, 'spearman_corr' : spearman_corr, 'accuracies' : accuracy, 'matthews_corrs' : matthews_corr, 'thresholds' : thresholds, } return results
[docs] def make_model_utility_plots(df_preds: pd.DataFrame, utilities: dict, is_show: bool = False, path_to_plots_prefix: str = None): """Run all plotting functions Args: df_preds (pd.DataFrame): - dataframe with two columns, 'y' and 'y_hat' is_show (bool, optional): If TRUE, show plots. Defaults to False. path_to_plots_prefix (str, optional): Prefix for path to saved plot files. Defaults to None. """ if path_to_plots_prefix: is_show = False if not is_show and path_to_plots_prefix is None: print("ERROR - Either `is_show = True` or `path_to_plots_prefix` must be specified") return plots = [ plot_roc_curve(df_preds, utilities), plot_precision_recall_curve(df_preds, utilities), plot_ppv_v_utility_curve(df_preds, utilities), plot_threshold_v_utility_curve(df_preds, utilities), plot_work_v_utility(df_preds, utilities), plot_work_v_ppv_tpr_fpr(df_preds, utilities), plot_threshold_v_ppv_tpr_work(df_preds, utilities), plot_calibration_curve(df_preds), plot_hist_pred(df_preds), plot_pred_v_true(df_preds), plot_confusion_matrix(df_preds, utilities), plot_decision_curve(df_preds, utilities), ] suffixes = [ 'roc.png', 'pr.png', 'ppv_v_utility.png', 'threshold_v_utility.png', 'work_v_utility.png', 'work_v_ppv_tpr_fpr.png', 'threshold_v_ppv_tpr_work.png', 'decision_curve.png', 'calibration.png', 'pred_histogram.png', 'pred_v_true.png', 'confusion_matrix.png', ] if is_show: plt.show() else: for fig, suffix in zip(plots, suffixes): fig.savefig(path_to_plots_prefix + suffix) plt.close('all')
[docs] def calc_pearsonr(a: np.ndarray, b: np.ndarray) -> float: """Pearson correlation coefficient between two 1d vectors Args: a (np.ndarray): First vector b (np.ndarray): Second vector Returns: float: Pearson correlation coefficient """ return np.corrcoef(a, b)[0,1]
[docs] def calc_spearmanr(a: np.ndarray, b: np.ndarray) -> float: """Spearman correlation coefficient between two 1d vectors Args: a (np.ndarray): First vector b (np.ndarray): Second vector Returns: float: Spearman correlation coefficient """ return np.corrcoef(a, b)[0,1]
if __name__ == '__main__': pass