classifier_node.py :  » Math » Modular-toolkit-for-Data-Processing » MDP-2.6 » mdp » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Math » Modular toolkit for Data Processing 
Modular toolkit for Data Processing » MDP 2.6 » mdp » classifier_node.py
from mdp import Node,numx
import operator


class ClassifierNode(Node):
    """A ClassifierNode can be used for classification tasks that should not
    interfere with the normal execution flow. A Reason for that may be that the
    labels used for classification are not in the normal feature space but in
    label space.
    """
    
    ### Methods to be implemented by the subclasses
    
    def _label(self, x, *args, **kargs):
        raise NotImplementedError
    
    def _prob(self, x, *args, **kargs):
        raise NotImplementedError
    
    ### User interface to the overwritten methods
    
    def label(self, x, *args, **kwargs):
        """Returns an array with best class labels.
        
        By default, subclasses should overwrite _label to implement
        their label. The docstring of the '_label' method
        overwrites this docstring.
        """
        self._pre_execution_checks(x)
        return self._label(self._refcast(x), *args, **kwargs)
  
    def prob(self, x, *args, **kwargs):
        """Returns the probability for each datapoint and label
        (e.g., [{1:0.1, 2:0.0, 3:0.9}, {1:1.0, 2:0.0, 3:0.0}, ...])

        By default, subclasses should overwrite _prob to implement
        their prob. The docstring of the '_prob' method
        overwrites this docstring.        
        """
        self._pre_execution_checks(x)
        return self._prob(self._refcast(x), *args, **kwargs)
    
    def rank(self, x, threshold=None):
        """Returns ordered list with all labels ordered according to prob(x)
        (e.g., [[3 1 2], [2 1 3], ...]).
        
        The optional threshold parameter is used to exclude labels having equal
        or less probability. E.g. threshold=0 excludes all labels with zero
        probability.
        """
        all_ranking = []
        prob = self.prob(x)
        for p in prob:
            ranking = [(k, v) for k, v in p.items() if v > threshold]
            ranking.sort(key=operator.itemgetter(1), reverse=True)
            ranking = map(operator.itemgetter(0), ranking)
            all_ranking.append(ranking)
        return all_ranking


class ClassifierCumulator(ClassifierNode):
    """A ClassifierCumulator is a Node whose training phase simply collects
    all input data and labels. In this way it is possible to easily implement
    batch-mode learning.

    The data is accessible in the attribute 'self.data' after
    the beginning of the '_stop_training' phase. 'self.tlen' contains
    the number of data points collected.
    'self.labels' contains the assigned label to each data point.
    """

    def __init__(self, input_dim = None, output_dim = None, dtype = None):
        super(ClassifierCumulator, self).__init__(input_dim, output_dim, dtype)
        self.data = []
        self.labels = []
        self.tlen = 0

    def _check_train_args(self, x, labels):
        super(ClassifierCumulator, self)._check_train_args(x, labels)
        if (isinstance(labels, (list, tuple, numx.ndarray)) and
            len(labels) != x.shape[0]):
            msg = ("The number of labels must be equal to the number of "
                   "datapoints (%d != %d)" % (len(labels), x.shape[0]))
            raise mdp.TrainingException(msg)

    def _train(self, x, labels):
        """Cumulate all input data in a one dimensional list."""
        self.tlen += x.shape[0]
        self.data.extend(x.ravel().tolist())

        # if labels is a number, all x's belong to the same class
        if isinstance(labels, (list, tuple, numx.ndarray)):
            pass
        else:
            labels = [labels] * x.shape[0]

        self.labels.extend(labels.ravel().tolist())

    def _stop_training(self, *args, **kwargs):
        """Transform the data and labels lists to array objects and reshape them."""
        self.data = numx.array(self.data, dtype = self.dtype)
        self.data.shape = (self.tlen, self.input_dim)
        self.labels = numx.array(self.labels, dtype = self.dtype)
        self.labels.shape = (self.tlen)
        

www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.