Source code for vtool.confusion

# -*- coding: utf-8 -*-
"""
Module for -- Confusion matrix, contingency, error matrix,

References:
    http://en.wikipedia.org/wiki/Confusion_matrix
"""
from __future__ import absolute_import, division, print_function
import utool as ut
import ubelt as ub
import numpy as np
import scipy.interpolate


[docs]def testdata_scores_labels(): scores = [ 2, 3, 4, 6, 9, 9, 13, 17, 19, 22, 22, 23, 26, 26, 34, 59, 63, 75, 80, 81, 89, ] labels = [0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1] return scores, labels
[docs]def nan_to_num(arr, num): arr[np.isnan(arr)] = num return arr
[docs]class ConfusionMetrics(ub.NiceRepr): r""" Can compute average percision using the PASCAL definition References: http://www.flinders.edu.au/science_engineering/fms/School-CSEM/publications/tech_reps-research_artfcts/TRRA_2007.pdf http://www.alta.asn.au/events/altss_w2003_proc/altss/courses/powers/Bookmaker-all/200302-ICCS-Bookmaker.pdfcs http://www.cs.bris.ac.uk/Publications/Papers/1000704.pdf http://en.wikipedia.org/wiki/Information_retrieval http://en.wikipedia.org/wiki/Precision_and_recall https://en.wikipedia.org/wiki/Confusion_matrix http://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_curve.html#sklearn.metrics.roc_curve SeeAlso: sklearn.metrics.ranking._binary_clf_curve Notes: From oxford: Precision is defined as the ratio of retrieved positive images to the total number retrieved. Recall is defined as the ratio of the number of retrieved positive images to the total number of positive images in the corpus. Ignore: varname_list = 'tp, fp, fn, tn, fpr, tpr, tpa'.split(', ') lines = ['self.{varname} = {varname}'.format(varname=varname) for varname in varname_list] print(ut.indent('\n'.join(lines))) CommandLine: python -m vtool.confusion ConfusionMetrics --show Example: >>> # ENABLE_DOCTEST >>> from vtool.confusion import * # NOQA >>> scores, labels = testdata_scores_labels() >>> c = self = confusions = ConfusionMetrics().fit(scores, labels) >>> assert np.all(c.n_pos == c.n_tp + c.n_fn) >>> assert np.all(c.n_neg == c.n_tn + c.n_fp) >>> assert np.all(np.isclose(c.rp + c.rn, 1.0)) >>> assert np.all(np.isclose(c.pp + c.pn, 1.0)) >>> assert np.all(np.isclose(c.fpr, 1 - c.tnr)) >>> assert np.all(np.isclose(c.fnr, 1 - c.tpr)) >>> assert np.all(np.isclose(c.tpr, c.tp / c.rp)) >>> assert np.all(np.isclose(c.tpa, c.tp / c.pp)) >>> assert np.all(np.isclose(c.jacc, c.tp / (c.tp + c.fn + c.fp))) >>> assert np.all(np.isclose(c.mcc, np.sqrt(c.mk * c.bm))) >>> assert np.all(np.isclose( >>> c.acc, (c.tpr + c.c * (1 - c.fpr)) / (1 + c.c))) >>> assert np.all(np.isclose(c.ppv, c.recall * c.prev / c.bias)) >>> assert np.all(np.isclose( >>> c.wracc, 4 * c.c * (c.tpr - c.fpr) / (1 + c.c) ** 2)) >>> # xdoctest: +REQUIRES(--show) >>> confusions.draw_roc_curve() >>> ut.show_if_requested() """ aliases = { 'n_tp': {'n_hit', 'n_true_pos'}, 'n_tn': {'n_reject', 'n_true_neg'}, 'n_fp': {'n_false_alarm', 'n_false_pos'}, 'n_fn': {'n_miss', 'n_false_neg'}, # ----- 'rp': {'real_pos', 'prev', 'prevalence'}, 'rn': {'real_neg'}, 'pp': {'pred_pos', 'bias'}, 'pn': {'pred_neg'}, # ----- 'cs': {'class_odds', 'skew'}, 'cv': {'cost_ratio'}, 'cp': {'cost_pos'}, 'cn': {'cost_neg'}, # ----- 'tp': {'true_pos', 'hit'}, 'tn': {'true_neg', 'reject'}, 'fp': {'false_pos', 'type1_error', 'false_alarm'}, 'fn': {'false_neg', 'type2_error', 'miss'}, # ----- 'fpr': {'false_pos_rate', 'fallout'}, 'fnr': {'false_neg_rate', 'miss_rate'}, 'tpr': {'true_pos_rate', 'recall', 'sensitivity', 'hit_rate'}, 'tnr': {'true_neg_rate', 'inv_recall, specificity'}, # ----- 'tpa': {'true_pos_acc', 'pos_predict_value', 'precision', 'ppv'}, 'tna': {'true_neg_acc', 'neg_predict_value', 'inv_precision', 'npv'}, # ----- 'mk': {'markedness', 'deltaP', 'r_P'}, 'bm': {'informedness', 'bookmaker_informedness', "deltaP'", 'r_R'}, # ----- 'mcc': {'matthews_correlation_coefficient'}, 'jacc': {'jaccard_coefficient'}, 'acc': {'accuracy', 'rand_accuracy', 'tea', 'ter'}, 'wracc': {'weighted_relative_accuracy'}, } # the same things are called by lots of different names paper_alias = [ ['dtp', 'determinant'], ['lr', 'liklihood-ratio'], ['nlr', 'negative-liklihood-ratio'], ['bmg', 'bookmarkG', 'bookmark_geometric_mean', 'mcc?'], ['evenness_R', 'PrevG2'], ['evenness_P', 'BiasG2'], ['rh', 'real_harmonic_mean'], ['ph', 'pred_harminic_mean'], ] # And they related to each other in interesting ways paper_relations = { 'N': ['A + B + C + D'], 'dtp': ['A * D - B * C'], 'mk': ['dtp / (bias * (1 - bias))', 'dtp / biasG ** 2'], 'bm': ['dtp / (prev * (1 - prev))'], 'BiasG2': ['bias * 1 - bias'], 'lr': ['tpr / (1 - tnr)'], 'nlr': ['tnr / (1 - tpr)'], 'BMG': ['dtp / evenness_G'], 'IBias': ['1 - Bias'], 'etp': ['rp * pp', 'expected_true_positives'], 'etn': ['rn * pn', 'expected_true_negatives'], 'rh': ['2 * rp * rn / (rp + rn)', 'real_harmonic_mean'], 'ph': ['2 * pp * pn / (pp + pn)', 'pred_harminic_mean'], 'dp': ['tp - etp', 'dtp', '-dtn', '-(tn - etn)'], 'deltap': ['dtp - dtn', '2 * dp'], 'kappa': ['deltap / (deltap + (fp + fn) / 2)'], } # ROC Plot: tpr vs fpr # PN Plot: TP vs FP minimizing_metrics = {'fpr', 'fnr', 'fp', 'fn'} inv_aliases = { alias_key: std_key for std_key, alias_vals in aliases.items() for alias_key in set.union(alias_vals, {std_key}) } def __init__(self): # Scalars self.n_pos = None self.n_neg = None self.n_samples = None # Threshold based self.thresholds = None self.n_tp = None self.n_fp = None self.n_fn = None self.n_tn = None # Can be set to weight the cost of errors self.cp = 1.0 self.cn = 1.0 # def __nice__(self): # return '{}'.format(cfms.n_samples) @property def thresh(self): return self.thresholds # ---- @property def cs(self): """class ratio""" return self.rn / self.rp @property def cv(self): """ratio of cost of making a mistake""" return self.cn / self.cp @property def c(self): return self.cs * self.cv # ----- @property def tp(self): """true positive probability""" return self.n_tp / self.n_samples @property def tn(self): """true negative probability""" return self.n_tn / self.n_samples @property def fp(self): """false positive probability""" return self.n_fp / self.n_samples @property def fn(self): """false negative probability""" return self.n_fn / self.n_samples # ---- @property def rp(self): """real positive probability""" # return (self.tp + self.fn) return self.n_pos / self.n_samples @property def rn(self): """real negative probability""" # return (self.fp + self.tn) return self.n_neg / self.n_samples @property def pp(self): """predicted positive probability""" return self.tp + self.fp @property def pn(self): """predicted negative probability""" return self.fn + self.tn # ---- @property def fpr(self): """fallout, false positive rate""" return self.n_fp / self.n_neg @property def fnr(self): """miss rate, false negative rate""" return self.n_fn / self.n_pos @property def tpr(self): """sensitivity, recall, hit rate, tpr""" return self.n_tp / self.n_pos @property def tnr(self): """true negative rate, inverse recall""" return self.n_tn / self.n_neg # ---- @property def tpa(self): """miss rate, false negative rate""" with np.errstate(invalid='ignore'): return nan_to_num(self.n_tp / (self.n_tp + self.n_fp), 1.0) @property def tna(self): """negative predictive value, inverse precision""" with np.errstate(invalid='ignore'): return nan_to_num(self.n_tn / (self.n_tn + self.n_fn), 1.0) # ---- @property def bm(self): """bookmaker informedness""" return self.tpr + self.tnr - 1 @property def mk(self): """markedness""" return self.tpa + self.tna - 1 # ---- other measures @property def auc_trap(self): # per threshold trapazoidal auc metric return (self.tpr + self.tnr) / 2 @property def acc(self): """accuracy""" return self.tp + self.tn @property def sqrd_error(self): """squared error""" return np.sqrt(self.fpr ** 2 + self.fnr ** 2) @property def mcc(self): """matthews correlation coefficient Also true that: mcc == np.sqrt(self.bm * self.mk) """ mcc_numer = self.tp * self.tn - self.fp * self.fn mcc_denom = np.sqrt( (self.tp + self.fp) * (self.tp + self.fn) * (self.tn + self.fp) * (self.tn + self.fn) ) with np.errstate(invalid='ignore'): mcc = nan_to_num(mcc_numer / mcc_denom, 0.0) return mcc @property def jacc(self): """jaccard coefficient""" return self.n_tp / (self.n_samples - self.n_tn) # return self.tp / (self.tp + self.fn + self.fp) @property def wracc(self): """weighted relative accuracy""" return 4 * (self.recall - self.bias) * self.prev # --- alias names currently needed for compatability def __dir__(self): attrs = dir(object) attrs += list(self.__class__.__dict__.keys()) attrs += list(self.__dict__.keys()) attrs += self.inv_aliases.keys() attrs = sorted(set(attrs)) return attrs def __getattr__(self, attr): try: std_attr = self.inv_aliases[attr] except KeyError: raise AttributeError(attr) return getattr(self, std_attr) # @property # def recall(self): # return self.tpr # @property # def precision(self): # return self.tpa # @property # def fallout(self): # return self.fpr # -------------- # Construtors # --------------
[docs] def fit(self, scores, labels, verbose=False): scores = np.asarray(scores) labels = np.asarray(labels) # must be binary labels = labels.astype(np.bool_) if verbose: print('[confusion] building confusion metrics.') print( '[confusion] * scores.shape=%r, scores.dtype=%r' % (scores.shape, scores.dtype) ) print( '[confusion] * labels.shape=%r, labels.dtype=%r' % (labels.shape, labels.dtype) ) # sklearn has much faster implementation # n_fp - count the number of false positives with score >= threshold[i] # n_tp - count the number of true positives with score >= threshold[i] try: from sklearn.metrics._ranking import _binary_clf_curve except ImportError: from sklearn.metrics.ranking import _binary_clf_curve n_fp, n_tp, thresholds = _binary_clf_curve(labels, scores, pos_label=1) n_samples = len(labels) n_pos = labels.sum() n_neg = n_samples - n_pos # Scalars self.n_samples = n_samples self.n_pos = n_pos self.n_neg = n_neg # Threshold based self.thresholds = thresholds self.n_tp = n_tp self.n_fp = n_fp self.n_fn = n_pos - n_tp self.n_tn = n_neg - n_fp return self
[docs] @classmethod def from_tp_and_tn_scores(cls, tp_scores, tn_scores, verbose=False): scores = np.hstack([tp_scores, tn_scores]) labels = np.array([True] * len(tp_scores) + [False] * len(tn_scores)) self = cls().fit(scores, labels, verbose=verbose) return self
# ------------------------------- # Threshold-less Summary Measures # -------------------------------
[docs] def get_ave_precision(self): precision = self.precision recall = self.recall recall_domain, p_interp = interpolate_precision_recall(precision, recall) return p_interp.sum() / p_interp.size
@property def auc(self): """ The AUC is a standard measure used to evaluate a binary classifier and represents the probability that a random correct case will receive a higher score than a random incorrect case. References: https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve """ # TODO: change name to represent it is a total measure import sklearn.metrics return sklearn.metrics.auc(self.fpr, self.tpr) # --------------------- # Threshold Choosers / Info # ---------------------
[docs] def get_fpr_at_recall(self, target_recall): indicies = np.where(self.recall >= target_recall)[0] assert len(indicies) > 0, 'no recall at target level' func = scipy.interpolate.interp1d(self.recall, self.fpr) interp_fpr = func(target_recall) # # interpolate to target recall # right_index = indicies[0] # right_recall = self.recall[right_index] # left_index = right_index - 1 # left_recall = self.recall[left_index] # stepsize = right_recall - left_recall # alpha = (target_recall - left_recall) / stepsize # left_fpr = self.fpr[left_index] # right_fpr = self.fpr[right_index] # interp_fpp = (left_fpr * (1 - alpha)) + (right_fpr * (alpha)) return interp_fpr
[docs] def get_recall_at_fpr(self, target_fpr): indicies = np.where(self.fpr >= target_fpr)[0] assert len(indicies) > 0, 'no false positives at target level' func = scipy.interpolate.interp1d(self.fpr, self.tpr) interp_tpr = func(target_fpr) return interp_tpr
[docs] def get_thresh_at_metric_max(self, metric): """ metric = 'mcc' metric = 'fnr' """ metric_values = getattr(self, metric) if False: idx = metric_values.argmax() thresh = self.thresholds[idx] else: # interpolated version import vtool as vt thresh, max_value = vt.argsubmax(metric_values, self.thresholds) return thresh
[docs] def get_thresh_at_metric(self, metric, value, maximize=None): r""" Gets a threshold for a binary classifier using a target metric and value Args: metric (str): name of metric like tpr or fpr value (float): corresponding numeric value Returns: float: thresh CommandLine: python -m vtool.confusion get_thresh_at_metric python -m vtool.confusion --exec-interact_roc_factory --show Example: >>> # DISABLE_DOCTEST >>> from vtool.confusion import * # NOQA >>> scores, labels = testdata_scores_labels() >>> self = ConfusionMetrics().fit(scores, labels) >>> metric = 'tpr' >>> value = .85 >>> thresh = self.get_thresh_at_metric(metric, value) >>> print('%s = %r' % (metric, value,)) >>> result = ('thresh = %s' % (str(thresh),)) >>> print(result) thresh = 22.5 Ignore: metric = 'fpr' value = 1e-4 self = cfms maximize = False interpolate_replbounds(metric_values, self.thresholds, 0, maximize=maximize) interpolate_replbounds(metric_values, self.thresholds, 1e-4, maximize=maximize) interpolate_replbounds(metric_values, self.thresholds, 1e-3, maximize=maximize) interpolate_replbounds(metric_values, self.thresholds, 1e-2, maximize=maximize) interpolate_replbounds(metric_values, self.thresholds, 1e-2, maximize=maximize) """ if value == 'max': return self.get_thresh_at_metric_max(metric) # if value == 'min': # return self.get_thresh_at_metric_min(metric) # TODO: Use interpoloation here and make tpr vs fpr a smooth funciton metric = self.inv_aliases[metric] metric_values = getattr(self, metric) if metric == 'fpr': # hack if len(metric_values) <= 1: return 1.0 # maximize = metric not in self.minimizing_metrics if maximize is None: maximize = metric not in {'fpr'} thresh = interpolate_replbounds( metric_values, self.thresholds, value, maximize=maximize ) return thresh
[docs] def get_metric_at_metric( self, get_metric, at_metric, at_value, subindex=False, tiebreaker='maxthresh' ): """ Finds the corresponding value of `get_metric` at a specific value of `at_metric`. get_metric = 'fpr' at_metric = 'tpr' at_value = .25 self.rrr() self.get_metric_at_metric('fpr', 'tpr', .25) self.get_metric_at_metric('n_false_pos', 'tpr', .25) self.get_metric_at_metric('n_true_pos', 'tpr', .25) get_metric = 'n_true_pos' at_metric = 'n_false_pos' at_value = 0 subindex = False """ index = self.get_index_at_metric( at_metric, at_value, subindex=subindex, tiebreaker=tiebreaker ) get_value = self.get_metric_at_index(get_metric, index) return get_value
[docs] def get_index_at_metric( self, at_metric, at_value, subindex=False, tiebreaker='maxthresh' ): """ Finds the index that is closet to the metric at a given value Args: tiebreaker (str): either 'minimize' or 'maximize' if 'maximize', then a larger threshold is considered better when resolving ambiguities. Otherwise a smaller thresh is better. Doctest: >>> from vtool.confusion import * >>> pat1 = [0, 0, 0, 0] >>> pat2 = [0, 0, 1, 1] >>> pat3 = [0, 1, 1, 1] >>> pat4 = [1, 1, 1, 1] >>> pats = [pat1, pat2, pat3, pat4] >>> n = 4 >>> import itertools as it >>> s = it.count(0) >>> # Create places of ambiguitiy and unambiguity >>> x = list(ub.flatten([[next(s)] * len(pat) for pat in pats for _ in range(n)])) >>> y = list(ub.flatten([pat for pat in pats for _ in range(n)])) >>> self = ConfusionMetrics().fit(x, y) >>> at_metric = 'n_false_pos' >>> at_value = 0 >>> subindex = False >>> idx1 = self.get_index_at_metric(at_metric, at_value, subindex=False, tiebreaker='minthresh') >>> idx2 = self.get_index_at_metric(at_metric, at_value, subindex=False, tiebreaker= 'maxthresh') >>> assert idx1 == 3 >>> assert idx2 == 0 """ import vtool as vt at_arr = getattr(self, at_metric) if at_value in {'max', 'maximize'}: at_value = at_arr.max() elif at_value in {'min', 'minimize'}: at_value = at_arr.min() # Find point closest to the value distance = np.abs(at_arr - at_value) if subindex: # TODO: need to be able to figure out how to correctly break ties raise NotImplementedError('fixme use argsubminima2 and then other stuff') submin_x, submin_y = vt.argsubmin2(distance) return submin_x else: # idx = distance.argmin() idxs = np.where(distance == distance.min())[0] # If len(idxs) is bigger than 0 it is ambiguous if tiebreaker is None: return idxs else: if tiebreaker == 'maxthresh': # If we want to maximize the thresh then take leftmost idx = idxs[0] elif tiebreaker == 'minthresh': # If we want to minimize the thresh then take rightmost idx = idxs[-1] else: raise KeyError('tiebreaker = {!r}'.format(tiebreaker)) return idx
[docs] def get_metric_at_index(self, metric, subindex): import vtool as vt arr = getattr(self, metric) if isinstance(subindex, int): value = arr[subindex] else: value = vt.linear_interpolation(arr, subindex) return value
[docs] def get_metric_at_thresh(self, metric, thresh): r""" Args: metric (str): name of a metric thresh (float): desired threshold Returns: float : value - metric value CommandLine: python -m vtool.confusion --exec-get_metric_at_threshold Ignore: >>> self = cfms >>> metric = 'fpr' >>> thresh = 0 Example: >>> # ENABLE_DOCTEST >>> from vtool.confusion import * # NOQA >>> scores, labels = testdata_scores_labels() >>> self = ConfusionMetrics().fit(scores, labels) >>> metric = 'tpr' >>> thresh = .8 >>> thresh = [0, .1, .9, 1.0] >>> value = self.get_metric_at_thresh(metric, thresh) >>> result = ('(None, None) = %s' % (str((None, None)),)) >>> print(result) """ was_scalar = ut.isscalar(thresh) if was_scalar: thresh = [thresh] else: thresh = np.asarray(thresh) # Assert decreasing assert len(self.thresholds) == 1 or self.thresholds[0] > self.thresholds[-1] sortx = np.argsort(self.thresholds) thresh_ = np.clip(thresh, self.thresholds[-1], self.thresholds[0]) r = np.searchsorted(self.thresholds, thresh_, side='left', sorter=sortx) index_list = sortx[r] # index_list = [np.where(self.thresholds <= t)[0][0] for t in thresh] # sortx[r] # index_list = [] # for t in thresh: # try: # index = np.nonzero(self.thresholds <= t)[0][0] # except IndexError: # print('warning: index error in get_metric_at_thresh t=%r' % (t,)) # index = len(self.thresholds) - 1 # index_list.append(index) # # value = self.__dict__[metric][index] value = [getattr(self, metric)[index] for index in index_list] if was_scalar: value = value[0] return value
# -------------- # Visualizations # --------------
[docs] def draw_roc_curve(self, **kwargs): return draw_roc_curve(self.fpr, self.tpr, **kwargs)
[docs] def draw_precision_recall_curve(self, nSamples=11, **kwargs): precision = self.precision recall = self.recall recall_domain, p_interp = interpolate_precision_recall( precision, recall, nSamples ) return draw_precision_recall_curve(recall_domain, p_interp, **kwargs)
[docs] def plot_vs(self, x_metric, y_metric): """ x_metric = 'thresholds' y_metric = 'fpr' """ import wbia.plottool as pt # pt.qtensure() # xdata = self.thresholds xdata = getattr(self, x_metric) ydata_list = [getattr(self, y_metric)] pt.multi_plot( xdata, ydata_list, # label_list=[y_metric], xlabel=x_metric, marker='', ylabel=y_metric, use_legend=True, )
[docs] def plot_metrics(self): import wbia.plottool as pt metrics = [ 'mcc', 'acc', 'auc_trap' # 'tpa', 'tpr', # 'acc', 'sqrd_error', # 'auc_trap', # 'mk', 'bm' ] metrics = [ 'fnr', 'fpr', 'tpr', 'tnr', ] xdata = self.thresholds ydata_list = [getattr(self, m) for m in metrics] pt.multi_plot( xdata, ydata_list, label_list=metrics, xlabel='threshold', marker='', ylabel='metric', use_legend=True, )
[docs] def show_mcc(self): import wbia.plottool as pt pt.multi_plot( self.thresholds, [self.mcc], xlabel='threshold', marker='', ylabel='MCC' ) pass
[docs]def interpolate_replbounds(xdata, ydata, pt, maximize=True): """ xdata = np.array([.1, .2, .3, .4, .5]) ydata = np.array([.1, .2, .3, .4, .5]) pt = .35 FIXME: if duplicate xdata is given bad things happen. BUG: in scipy.interpolate.interp1d If there is a duplicate xdata, then assume_sorted=False will sort ydata by xdata, but xdata should retain its initial ordering in places of ambuguity. Currently it does not. Args: xdata (ndarray): ydata (ndarray): pt (ndarray): Returns: float: interp_vals CommandLine: python -m vtool.confusion --exec-interpolate_replbounds Example: >>> from vtool.confusion import * # NOQA >>> scores, labels = testdata_scores_labels() >>> self = ConfusionMetrics().fit(scores, labels) >>> xdata = self.tpr >>> ydata = self.thresholds >>> pt = 1.0 >>> #xdata = self.fpr >>> #ydata = self.thresholds >>> #pt = 0.0 >>> thresh = interpolate_replbounds(xdata, ydata, pt, maximize=True) >>> print('thresh = %r' % (thresh,)) >>> thresh = interpolate_replbounds(xdata, ydata, pt, maximize=False) >>> print('thresh = %r' % (thresh,)) Example: >>> # DISABLE_DOCTEST >>> from vtool.confusion import * # NOQA >>> xdata = np.array([0.7, 0.8, 0.8, 0.9, 0.9, 0.9]) >>> ydata = np.array([34, 26, 23, 22, 19, 17]) >>> pt = np.array([.85, 1.0, -1.0]) >>> interp_vals = interpolate_replbounds(xdata, ydata, pt) >>> result = ('interp_vals = %s' % (str(interp_vals),)) >>> print(result) interp_vals = [ 22.5 17. 34. ] """ if not ut.issorted(xdata): if ut.issorted(xdata[::-1]): xdata = xdata[::-1] ydata = ydata[::-1] else: raise AssertionError('need to sort xdata and ydata in function') sortx = np.lexsort(np.vstack([np.arange(len(xdata)), xdata])) xdata = xdata.take(sortx, axis=0) ydata = ydata.take(sortx, axis=0) is_scalar = not ub.iterable(pt) # print('----') # print('xdata = %r' % (xdata,)) # print('ydata = %r' % (ydata,)) if is_scalar: pt = np.array([pt]) minval = xdata.min() maxval = xdata.max() argx_min_list = np.argwhere(xdata == minval) argx_max_list = np.argwhere(xdata == maxval) argx_min = argx_min_list.min() argx_max = argx_max_list.max() lower_mask = pt < xdata[argx_min] upper_mask = pt > xdata[argx_max] interp_mask = ~np.logical_or(lower_mask, upper_mask) # if isinstance(pt, np.ndarray): dtype = np.result_type(np.float32, ydata.dtype) interp_vals = np.empty(pt.shape, dtype=dtype) interp_vals[lower_mask] = ydata[argx_min] interp_vals[upper_mask] = ydata[argx_max] # TODO: fix duplicate values depending on if higher or lower numbers are # desirable if True: # Grouping should be ok because xdata should be sorted # therefore groupxs are consecutive import vtool as vt unique_vals, groupxs = vt.group_indices(xdata) grouped_ydata = vt.apply_grouping(ydata, groupxs) if maximize: sub_idxs = [idxs[np.argmax(ys)] for idxs, ys in zip(groupxs, grouped_ydata)] else: sub_idxs = [idxs[np.argmin(ys)] for idxs, ys in zip(groupxs, grouped_ydata)] sub_idxs = np.array(sub_idxs) xdata = xdata[sub_idxs] ydata = ydata[sub_idxs] if np.any(interp_mask): # FIXME: allow assume_sorted = False func = scipy.interpolate.interp1d(xdata, ydata, kind='linear', assume_sorted=True) interp_vals[interp_mask] = func(pt[interp_mask]) if is_scalar: interp_vals = interp_vals[0] # interpolate to target recall # right_index = indicies[0] # right_recall = self.recall[right_index] # left_index = right_index - 1 # left_recall = self.recall[left_index] # stepsize = right_recall - left_recall # alpha = (target_recall - left_recall) / stepsize # left_fpr = self.fpr[left_index] # right_fpr = self.fpr[right_index] # interp_fpp = (left_fpr * (1 - alpha)) + (right_fpr * (alpha)) return interp_vals
[docs]def interpolate_precision_recall(precision, recall, nSamples=11): """ Interpolates precision as a function of recall p_{interp}(r) Reduce wiggles in average precision curve by taking interpolated values along a uniform sample. References: http://en.wikipedia.org/wiki/Information_retrieval#Average_precision http://en.wikipedia.org/wiki/Information_retrieval#Mean_Average_precision CommandLine: python -m vtool.confusion --test-interpolate_precision_recall --show Example: >>> # ENABLE_DOCTEST >>> from vtool.confusion import * # NOQA >>> scores, labels = testdata_scores_labels() >>> nSamples = 11 >>> confusions = ConfusionMetrics().fit(scores, labels) >>> precision = confusions.precision >>> recall = confusions.recall >>> recall_domain, p_interp = interpolate_precision_recall(confusions.precision, recall, nSamples=11) >>> result = ub.repr2(p_interp, precision=1, with_dtype=True) >>> print(result) >>> # xdoctest: +REQUIRES(--show) >>> draw_precision_recall_curve(recall_domain, p_interp) >>> ut.show_if_requested() np.array([ 1. , 1. , 1. , 1. , 1. , 1. , 1. , 0.9, 0.9, 0.8, 0.6], dtype=np.float64) """ if precision is None: return None, None recall_domain = np.linspace(0, 1, nSamples) if False: # normal interpolation func = scipy.interpolate.interp1d( recall, precision, bounds_error=False, fill_value=precision.max() ) p_interp = func(recall_domain) else: # Pascal interpolation # candidate_masks = recall >= recall_domain[:, None] # candidates_idxs_ = [np.where(mask)[0] for mask in candidate_masks] # chosen_idx = [-1 if len(idxs) == 0 else idxs.min() for idxs in candidates_idxs_] # p_interp = precision[chosen_idx] def p_interp(r): precision_candidates = precision[recall >= r] if len(precision_candidates) == 0: return 0 return precision_candidates.max() p_interp = np.array([p_interp(r) for r in recall_domain]) return recall_domain, p_interp
[docs]def interact_roc_factory(confusions, target_tpr=None, show_operating_point=False): r""" Args: confusions (Confusions): CommandLine: python -m vtool.confusion --exec-interact_roc_factory --show Example: >>> # DISABLE_DOCTEST >>> from vtool.confusion import * # NOQA >>> scores, labels = testdata_scores_labels() >>> print('scores = %r' % (scores,)) >>> confusions = ConfusionMetrics().fit(scores, labels) >>> print(ut.make_csv_table( >>> [confusions.fpr, confusions.tpr, confusions.thresholds], >>> ['fpr', 'tpr', 'thresh'])) >>> # xdoctest: +REQUIRES(--show) >>> ROCInteraction = interact_roc_factory(confusions, target_tpr=.4, show_operating_point=True) >>> inter = ROCInteraction() >>> inter.show_page() >>> # xdoctest: +REQUIRES(--show) >>> import wbia.plottool as pt >>> ut.show_if_requested() """ from wbia.plottool.abstract_interaction import AbstractInteraction class ROCInteraction(AbstractInteraction): """ References: http://scipy-central.org/item/38/1/roc-curve-demo Notes: Sensitivity = true positive rate Specificity = true negative rate """ def __init__(self, **kwargs): print('ROC Interact') super(ROCInteraction, self).__init__(**kwargs) self.confusions = confusions self.target_fpr = None self.show_operating_point = show_operating_point @staticmethod def static_plot(fnum, pnum, **kwargs): # print('ROC Interact2') kwargs['thresholds'] = kwargs.get('thresholds', confusions.thresholds) kwargs['show_operating_point'] = kwargs.get( 'show_operating_point', show_operating_point ) confusions.draw_roc_curve( fnum=fnum, pnum=pnum, target_tpr=target_tpr, **kwargs ) def plot(self, fnum, pnum): # print('ROC Interact3') self.static_plot( fnum, pnum, target_fpr=self.target_fpr, show_operating_point=self.show_operating_point, ) def on_click_inside(self, event, ex): self.target_fpr = event.xdata self.show_page() self.draw() def on_drag(self, event): # FIXME: blit if False: # print('Dragging ' + str(event.x) + ' ' + str(event.y)) self.target_fpr = event.xdata self.show_page() # self.draw() if event.inaxes is not None: self.fig.canvas.blit(event.inaxes.bbox) # [blit(ax) event.canvas.figure.axes] return ROCInteraction
[docs]def draw_roc_curve( fpr, tpr, fnum=None, pnum=None, marker='', target_tpr=None, target_fpr=None, thresholds=None, color=None, name=None, label=None, show_operating_point=False, ): r""" Args: fpr (?): tpr (?): fnum (int): figure number(default = None) pnum (tuple): plot number(default = None) marker (str): (default = '-x') target_tpr (None): (default = None) target_fpr (None): (default = None) thresholds (None): (default = None) color (None): (default = None) show_operating_point (bool): (default = False) CommandLine: python -m vtool.confusion --exec-draw_roc_curve --show --lightbg Example: >>> # DISABLE_DOCTEST >>> from vtool.confusion import * # NOQA >>> scores, labels = testdata_scores_labels() >>> confusions = ConfusionMetrics().fit(scores, labels) >>> fpr = confusions.fpr >>> tpr = confusions.tpr >>> thresholds = confusions.thresholds >>> fnum = None >>> pnum = None >>> marker = 'x' >>> target_tpr = .85 >>> target_fpr = None >>> color = None >>> show_operating_point = True >>> draw_roc_curve(fpr, tpr, fnum, pnum, marker, target_tpr, target_fpr, >>> thresholds, color, show_operating_point) >>> ut.show_if_requested() """ import wbia.plottool as pt import sklearn.metrics if fnum is None: fnum = pt.next_fnum() # if color is None: # color = (0.4, 1.0, 0.4) if pt.is_default_dark_bg() else (0.1, 0.4, 0.4) roc_auc = sklearn.metrics.auc(fpr, tpr) title_suffix = '' if target_fpr is not None: # func = scipy.interpolate.interp1d(fpr, tpr, kind='linear', assume_sorted=False) # func = scipy.interpolate.interp1d(xdata, ydata, kind='nearest', assume_sorted=False) # interp_vals[interp_mask] = func(pt[interp_mask]) target_fpr = np.clip(target_fpr, 0, 1) interp_tpr = interpolate_replbounds(fpr, tpr, target_fpr) choice_tpr = interp_tpr choice_fpr = target_fpr elif target_tpr is not None: target_tpr = np.clip(target_tpr, 0, 1) interp_fpr = interpolate_replbounds(tpr, fpr, target_tpr) choice_tpr = target_tpr choice_fpr = interp_fpr else: choice_tpr = None choice_fpr = None if choice_fpr is not None: choice_thresh = 0 if thresholds is not None: try: index = np.nonzero(tpr >= choice_tpr)[0][0] except IndexError: index = len(thresholds) - 1 choice_thresh = thresholds[index] # percent = ut.scalar_str(choice_tpr * 100).split('.')[0] # title_suffix = ', FPR%s=%05.2f%%' % (percent, choice_fpr) title_suffix = '' if show_operating_point: title_suffix = ', fpr=%.2f, tpr=%.2f, thresh=%.2f' % ( choice_fpr, choice_tpr, choice_thresh, ) else: title_suffix = '' # if recall_domain is None: # ave_p = np.nan # else: # ave_p = p_interp.sum() / p_interp.size title = 'Receiver operating characteristic' if name and not label: title += ' (%s)' % (name,) if not label: title += '\n' + 'AUC=%.3f' % (roc_auc,) else: label += ' AUC=%.3f' % (roc_auc,) title += title_suffix label_list = None if label: label_list = [label] pt.multi_plot( fpr, [tpr], label_list=label_list, marker=marker, color=color, fnum=fnum, pnum=pnum, title=title, xlabel='False Positive Rate', ylabel='True Positive Rate', ) # pt.plot2(fpr, tpr, marker=marker, # x_label='False Positive Rate', # y_label='True Positive Rate', # unitbox=True, flipx=False, color=color, fnum=fnum, pnum=pnum, # title=title) if False: # Interp does not work right because of duplicate values # in xdomain line_ = np.linspace(0.11, 0.9, 20) # np.append([np.inf], np.diff(fpr)) > 0 # np.append([np.inf], np.diff(tpr)) > 0 unique_tpr_idxs = np.nonzero(np.append([np.inf], np.diff(tpr)) > 0)[0] unique_fpr_idxs = np.nonzero(np.append([np.inf], np.diff(fpr)) > 0)[0] pt.plt.plot( line_, interpolate_replbounds(fpr[unique_fpr_idxs], tpr[unique_fpr_idxs], line_), 'b-x', ) pt.plt.plot( interpolate_replbounds(tpr[unique_tpr_idxs], fpr[unique_tpr_idxs], line_), line_, 'r-x', ) if choice_fpr is not None: pt.plot(choice_fpr, choice_tpr, 'o', color=pt.PINK)
[docs]def draw_precision_recall_curve( recall_domain, p_interp, title_pref=None, fnum=1, pnum=None, color=None ): import wbia.plottool as pt if color is None: color = (0.4, 1.0, 0.4) if pt.is_default_dark_bg() else (0.1, 0.4, 0.4) if recall_domain is None: recall_domain = np.array([]) p_interp = np.array([]) if recall_domain is None: ave_p = -1.0 # np.nan else: ave_p = p_interp.sum() / p_interp.size pt.plot2( recall_domain, p_interp, marker='o--', x_label='recall', y_label='precision', unitbox=True, flipx=False, color=color, fnum=fnum, pnum=pnum, title='Interplated Precision Vs Recall\n' + 'avep = %.3f' % ave_p, )
# print('Interplated Precision') # print(ub.repr2(list(zip(recall_domain, p_interp)))) # fig.show() if __name__ == '__main__': """ CommandLine: xdoctest -m vtool.confusion """ import xdoctest xdoctest.doctest_module(__file__)