-
Notifications
You must be signed in to change notification settings - Fork 2
/
StackingFW.py
414 lines (335 loc) · 12.8 KB
/
StackingFW.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
# -*- coding: utf-8 -*-
## Alp and Elvan modified this from
## https://github.com/anfibil/cse40647.sp14/blob/master/32%20-%20Stacking%20%26%20Blending.ipynb
## by Reid Johnson, 2014
#
# Original from:
# Kemal Eren (https://github.com/kemaleren/scikit-learn/blob/stacking/sklearn/ensemble/stacking.py)
#
# Generates a stacking/blending of base models. Cross-validation is used to
# generate predictions from base (level-0) models that are used as input to a
# combiner (level-1) model.
import numpy as np
from itertools import izip
from sklearn.grid_search import ParameterGrid as IterGrid
from sklearn.base import ClassifierMixin, RegressorMixin
from sklearn.ensemble.base import BaseEnsemble
from sklearn.utils.validation import assert_all_finite
# TODO: Built-in nested cross validation, re-using base classifiers, to pick
# best stacking method.
# TODO: Access to best, vote, etc. after training.
__all__ = [
"Stacking",
"StackingFWL",
'estimator_grid'
]
### Alp Added This ######################
import scipy as sp
from time import time
def logloss(act, pred):
pred = sp.maximum(1e-15, pred)
pred = sp.minimum(1-1e-15, pred)
ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
ll = ll * -1.0/len(act)
return ll
#########################################
def estimator_grid(*args):
"""Generate candidate estimators from a list of parameter values on the
combination of the various parameter lists given.
Parameters
----------
args : array
List of classifiers and corresponding parameters.
Returns
-------
result : array
The generated estimators.
"""
result = []
pairs = izip(args[::2], args[1::2])
for estimator, params in pairs:
if len(params) == 0:
result.append(estimator())
else:
for p in IterGrid(params):
result.append(estimator(**p))
return result
class MRLR(ClassifierMixin):
"""Converts a multi-class classification task into a set of indicator
regression tasks.
References
----------
.. [1] K. M. Ting, I. H. Witten, "Issues in Stacked Generalization", 1999.
"""
def __init__(self, regressor, stackingc, **kwargs):
self.estimator_ = regressor
self.estimator_args_ = kwargs
self.stackingc_ = stackingc
def _get_subdata(self, X):
"""Returns subsets of the data, one for each class. Assumes the
columns of X are striped in order.
e.g. if n_classes_ == 3, then returns (X[:, 0::3], X[:, 1::3],
X[:, 2::3])
Parameters
----------
X : np.ndarray, shape=(n, m)
The feature data.
Returns
-------
array of shape = [len(set(y)), n_samples]
The subsets of the data.
"""
if not self.stackingc_:
return [X, ] * self.n_classes_
result = []
for i in range(self.n_classes_):
slc = (slice(None), slice(i, None, self.n_classes_))
result.append(X[slc])
return result
def fit(self, X, y):
"""Fit the estimator given predictor(s) X and target y. Assumes the
columns of X are predictions generated by each predictor on each
class. Fits one estimator for each class.
Parameters
----------
X : np.ndarray, shape=(n, m)
The feature data for which to compute the predicted output.
y : array of shape = [n_samples]
The actual outputs (class data).
"""
self.n_classes_ = len(set(y))
self.estimators_ = []
# Generate feature data subsets corresponding to each class.
X_subs = self._get_subdata(X)
# Fit an instance of the estimator to each data subset.
for i in range(self.n_classes_):
e = self.estimator_(**self.estimator_args_)
y_i = np.array(list(j == i for j in y))
X_i = X_subs[i]
e.fit(X_i, y_i)
self.estimators_.append(e)
def predict(self, X):
"""Predict label values with the fitted estimator on predictor(s) X.
Returns
-------
array of shape = [n_samples]
The predicted label values of the input samples.
"""
proba = self.predict_proba(X)
return np.argmax(proba, axis=1)
def predict_proba(self, X):
"""Predict label probabilities with the fitted estimator on
predictor(s) X.
Returns
-------
proba : array of shape = [n_samples]
The predicted label probabilities of the input samples.
"""
proba = []
X_subs = self._get_subdata(X)
for i in range(self.n_classes_):
e = self.estimators_[i]
X_i = X_subs[i]
pred = e.predict(X_i).reshape(-1, 1)
proba.append(pred)
proba = np.hstack(proba)
normalizer = proba.sum(axis=1)[:, np.newaxis]
normalizer[normalizer == 0.0] = 1.0
proba /= normalizer
assert_all_finite(proba)
return proba
class Stacking(BaseEnsemble):
"""Implements stacking/blending.
Parameters
----------
meta_estimator : string or callable
May be one of "best", "vote", "average", or any classifier or
regressor constructor
estimators : iterator
An iterable of estimators; each must support predict_proba()
cv : iterator
A cross validation object. Base (level-0) estimators are trained on
the training folds, then the meta (level-1) estimator is trained on
the testing folds.
stackingc : bool
Whether to use StackingC or not. For more information, refer to the
following paper:
Reference:
A. K. Seewald, "How to Make Stacking Better and Faster While Also
Taking Care of an Unknown Weakness," 2002.
kwargs :
Arguments passed to instantiate meta_estimator.
References
----------
.. [1] D. H. Wolpert, "Stacked Generalization", 1992.
"""
# TODO: Support different features for each estimator.
# TODO: Support "best", "vote", and "average" for already trained model.
# TODO: Allow saving of estimators, so they need not be retrained when
# trying new stacking methods.
def __init__(self, meta_estimator, estimators,
cv, stackingc=True, proba=True,
**kwargs):
self.estimators_ = estimators
self.n_estimators_ = len(estimators)
self.cv_ = cv
self.stackingc_ = stackingc
self.proba_ = proba
if stackingc:
if isinstance(meta_estimator, str) or not issubclass(meta_estimator, RegressorMixin):
raise Exception('StackingC only works with a regressor.')
if isinstance(meta_estimator, str):
if meta_estimator not in ('best',
'average',
'vote'):
raise Exception('Invalid meta estimator: {0}'.format(meta_estimator))
raise Exception('"{0}" meta estimator not implemented'.format(meta_estimator))
elif issubclass(meta_estimator, ClassifierMixin):
self.meta_estimator_ = meta_estimator(**kwargs)
elif issubclass(meta_estimator, RegressorMixin):
self.meta_estimator_ = MRLR(meta_estimator, stackingc, **kwargs)
else:
raise Exception('Invalid meta estimator: {0}'.format(meta_estimator))
def _base_estimator_predict(self, e, X):
"""Predict label values with the specified estimator on predictor(s) X.
Parameters
----------
e : int
The estimator object.
X : np.ndarray, shape=(n, m)
The feature data for which to compute the predicted outputs.
Returns
-------
pred : np.ndarray, shape=(len(X), 1)
The mean of the label probabilities predicted by the specified
estimator for each fold for each instance X.
"""
# Generate array for the base-level testing set, which is n x n_folds.
pred = e.predict(X)
assert_all_finite(pred)
return pred
def _base_estimator_predict_proba(self, e, X):
"""Predict label probabilities with the specified estimator on
predictor(s) X.
Parameters
----------
e : int
The estimator object.
X : np.ndarray, shape=(n, m)
The feature data for which to compute the predicted outputs.
Returns
-------
pred : np.ndarray, shape=(len(X), 1)
The mean of the label probabilities predicted by the specified
estimator for each fold for each instance X.
"""
# Generate array for the base-level testing set, which is n x n_folds.
pred = e.predict_proba(X)
assert_all_finite(pred)
return pred
def _make_meta(self, X):
"""Make the feature set for the meta (level-1) estimator.
Parameters
----------
X : np.ndarray, shape=(n, m)
The feature data.
Returns
-------
An n x len(self.estimators_) array of meta-level features.
"""
rows = []
for e in self.estimators_:
if self.proba_:
# Predict label probabilities
pred = self._base_estimator_predict_proba(e, X)
else:
# Predict label values
pred = self._base_estimator_predict(e, X)
rows.append(pred)
return np.hstack(rows)
def fit(self, X, y):
"""Fit the estimator given predictor(s) X and target y.
Parameters
----------
X : np.ndarray, shape=(n, m)
The feature data on which to fit.
y : array of shape = [n_samples]
The actual outputs (class data).
"""
# Build meta data.
X_meta = [] # meta-level features
y_meta = [] # meta-level labels
print 'Training and validating the base (level-0) estimator(s)...'
print
for i, (a, b) in enumerate(self.cv_):
print 'Fold [%s]' % (i)
# You need to use iloc location referencing to access pandas data frame data
X_a, X_b = X.iloc[a], X.iloc[b] # training and validation features
y_a, y_b = y.iloc[a], y.iloc[b] # training and validation labels
# Fit each base estimator using the training set for the fold.
for j, e in enumerate(self.estimators_):
## ALP ADDED THESE ##
t_Start = time()
##
print ' Training base (level-0) estimator %d...' % (j),
e.fit(X_a, y_a)
## ALP ADDED THESE ##
pred = e.predict_proba(X_b)
print 'Logloss %f...' % (logloss(y_b, pred[:,1])),
print 'Time %f...' % (time() - t_Start),
##
print 'done.'
proba = self._make_meta(X_b)
X_meta.append(proba)
y_meta.append(y_b)
#print 'Estimater got logloss %f...' % logloss(y_b,proba)
print
X_meta = np.vstack(X_meta)
if y_meta[0].ndim == 1:
y_meta = np.hstack(y_meta)
else:
y_meta = np.vstack(y_meta)
# Train meta estimator.
print 'Training meta (level-1) estimator...',
self.meta_estimator_.fit(X_meta, y_meta)
print 'done.'
# Re-train base estimators on full data.
for j, e in enumerate(self.estimators_):
print 'Re-training base (level-0) estimator %d on full data...' % (j),
e.fit(X, y)
print 'done.'
def predict(self, X):
"""Predict label values with the fitted estimator on predictor(s) X.
Parameters
----------
X : np.ndarray, shape=(n, m)
The feature data for which to compute the predicted output.
Returns
-------
array of shape = [n_samples]
The predicted label values of the input samples.
"""
X_meta = self._make_meta(X)
return self.meta_estimator_.predict(X_meta)
def predict_proba(self, X):
"""Predict label probabilities with the fitted estimator on
predictor(s) X.
Parameters
----------
X : np.ndarray, shape=(n, m)
The feature data for which to compute the predicted output.
Returns
-------
array of shape = [n_samples]
The predicted label probabilities of the input samples.
"""
X_meta = self._make_meta(X)
return self.meta_estimator_.predict_proba(X_meta)
class StackingFWL(Stacking):
"""Implements Feature-Weighted Linear Stacking.
References
----------
.. [1] J. Sill, G. Takács, L. Mackey, D. Lin, "Feature-Weighted Linear
Stacking", 2009.
"""
pass