-
Notifications
You must be signed in to change notification settings - Fork 50
/
metrics.py
111 lines (85 loc) · 3.85 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import keras.backend as K
from keras.metrics import binary_accuracy
def precision(y_true, y_pred):
"""Precision metric.
Only computes a batch-wise average of precision.
Computes the precision, a metric for multi-label classification of
how many selected items are relevant.
"""
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def recall(y_true, y_pred):
"""Recall metric.
Only computes a batch-wise average of recall.
Computes the recall, a metric for multi-label classification of
how many relevant items are selected.
"""
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def fbeta_score(y_true, y_pred, beta=1):
"""Computes the F score.
The F score is the weighted harmonic mean of precision and recall.
Here it is only computed as a batch-wise average, not globally.
This is useful for multi-label classification, where input samples can be
classified as sets of labels. By only using accuracy (precision) a model
would achieve a perfect score by simply assigning every class to every
input. In order to avoid this, a metric should penalize incorrect class
assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0)
computes this, as a weighted mean of the proportion of correct class
assignments vs. the proportion of incorrect class assignments.
With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning
correct classes becomes more important, and with beta > 1 the metric is
instead weighted towards penalizing incorrect class assignments.
F1 score: https://en.wikipedia.org/wiki/F1_score
"""
if beta < 0:
raise ValueError('The lowest choosable beta is zero (only precision).')
# If there are no true positives, fix the F score at 0 like sklearn.
if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
return 0
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
bb = beta ** 2
fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
return fbeta_score
def fmeasure(y_true, y_pred):
"""Computes the f-measure, the harmonic mean of precision and recall.
Here it is only computed as a batch-wise average, not globally.
"""
return fbeta_score(y_true, y_pred, beta=1)
def invasion_acc(y_true, y_pred):
binary_truth = y_true[:, -2] + y_true[:, -1]
binary_pred = y_pred[:, -2] + y_pred[:, -1]
return binary_accuracy(binary_truth, binary_pred)
def invasion_precision(y_true, y_pred):
binary_truth = y_true[:, -2] + y_true[:, -1]
binary_pred = y_pred[:, -2] + y_pred[:, -1]
return precision(binary_truth, binary_pred)
def invasion_recall(y_true, y_pred):
binary_truth = y_true[:, -2] + y_true[:, -1]
binary_pred = y_pred[:, -2] + y_pred[:, -1]
return recall(binary_truth, binary_pred)
def invasion_fmeasure(y_true, y_pred):
binary_truth = y_true[:, -2] + y_true[:, -1]
binary_pred = y_pred[:, -2] + y_pred[:, -1]
return fmeasure(binary_truth, binary_pred)
def ia_acc(y_true, y_pred):
binary_truth = y_true[:, -1]
binary_pred = y_pred[:, -1]
return binary_accuracy(binary_truth, binary_pred)
def ia_precision(y_true, y_pred):
binary_truth = y_true[:, -1]
binary_pred = y_pred[:, -1]
return precision(binary_truth, binary_pred)
def ia_recall(y_true, y_pred):
binary_truth = y_true[:, -1]
binary_pred = y_pred[:, -1]
return recall(binary_truth, binary_pred)
def ia_fmeasure(y_true, y_pred):
binary_truth = y_true[:, -1]
binary_pred = y_pred[:, -1]
return fmeasure(binary_truth, binary_pred)