Source code for giotto.ml.classifier.random_forest

'''Randome Forest Classifer Module'''

from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
from sklearn import preprocessing
from sklearn import feature_selection

import pickle
import numpy as np

from giotto.ml.database.classifier import MLClassifier
from giotto.ml.database.sensor import MLSensor  


[docs]class MLRandomForest(MLClassifier): '''Random Forest classifier class This class train a Random Forest classifier using a dataset passed to the train function. Then, it makes a prediction using timeseries data given to the "predict" function. If you want to implement a classifier class using other models, replicate this class. The class have to implement two functions at least, train and predict. ''' def __init__(self, dictionary=None, serialized=False): MLClassifier.__init__(self, dictionary, serialized) if self.model is None: self.model = RandomForestClassifier() self.model_name = 'random forest'
[docs] def extract_features(self, dataset): '''Extracts features from a given dataset Extracts features with the preprocess function. The function is implemented in the MLClassifier class. ''' data = dataset['data'] labels = dataset['labels'] all_features = [] all_labels = [] for sample in data: raw_data = sample['timeseries'] features = self.preprocess(raw_data) indexed_label = labels.index(sample['label']) # Stack features and labels if all_features == []: all_features = features else: all_features = np.vstack((all_features,features)) all_labels.append(indexed_label) data = { 'features':all_features, 'labels':all_labels, 'sampling_period':dataset['sampling_period'] } return data
[docs] def train(self, dataset): '''Trains a random forest classifier''' # Generate a training set data = self.extract_features(dataset) # Prescale self.scaler = preprocessing.StandardScaler().fit(data['features']) scaledFeatures = self.scaler.transform(data['features']) # Select features Random Forest does not require feature selection # For other classifier uncomment the next 2 lines and do feature selection #self.selector = feature_selection.SelectKBest(feature_selection.f_regression).fit(scaledFeatures, data.labels) #selectedFeatures = self.selector.transform(scaledFeatures) # Train a classifier self.classifier = self.model.fit(scaledFeatures, data['labels']) self.sampling_period = data['sampling_period'] self.labels = dataset['labels']
[docs] def predict(self, timeseries): '''Makes a prediction using a pre-trained random forest classifier''' features = self.preprocess(timeseries) features = features.reshape(1, -1) # prescaling scaled_features = self.scaler.transform(features) # Feture selection #selectedFeatures = selector.transform(scaledFeatures) # Prediction predictions = self.classifier.predict(scaled_features) return self.labels[predictions[0].astype(int)]
if __name__=="__main__": clf = MLRandomForest('56b3c0f023cf8c29e049e89e','default') clf.train()