shogun_svm_classifier.py :  » Math » Modular-toolkit-for-Data-Processing » MDP-2.6 » mdp » contrib » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Math » Modular toolkit for Data Processing 
Modular toolkit for Data Processing » MDP 2.6 » mdp » contrib » shogun_svm_classifier.py
import mdp
from mdp import numx

from svm_classifiers import _SVMClassifier,_LabelNormalizer

import shogun.Kernel as sgKernel
import shogun.Features as sgFeatures
import shogun.Classifier as sgClassifier

# We need to have at least SHOGUN 0.9, as we rely on
# SHOGUN's CClassifier::classify() method.
# (It makes our code much nicer, by the way.)
#
try:
    version = sgKernel._Kernel.Version_get_version_release()
except AttributeError:
    version = ""

if not (version.startswith('v0.9') or version.startswith('v1.')):
    msg = "We need at least SHOGUN version 0.9."
    raise ImportError(msg)

# switch off spurious warnings from shogun
import warnings
warnings.filterwarnings('ignore',
                        '.*Perceptron algorithm did not converge after.*',
                        RuntimeWarning)

# maybe integrate to the class
def is_shogun_classifier(test_classifier):
    """Check, if a class is a subclass of a SHOGUN classifier."""
    try:
        return issubclass(test_classifier, sgClassifier.Classifier)
    except (TypeError, NameError):
        # need to fetch NameError for some swig reasons
        return False
    
default_shogun_classifiers = []
for labels in dir(sgClassifier):
    test_classifier = getattr(sgClassifier, labels)
    if is_shogun_classifier(test_classifier):
        default_shogun_classifiers.append(test_classifier)

shogun_classifier_types = {}
for ct in dir(sgClassifier):
    if ct.startswith("CT_"):
        shogun_classifier_types[getattr(sgClassifier, ct)] = ct


class _OrderedDict(object):
    """Very simple version of an ordered dict."""
    def __init__(self, items):
        self._keys = []
        self._vals = []
        self.update(items)
    
    def update(self, other):
        """Update an ordered dict with new values."""
        for entry in other:
            if isinstance(other, dict):
                new_key = entry
                new_val = other[entry]
            else:
                new_key = entry[0]
                if len(entry) > 1: 
                    new_val = entry[1]
                else:
                    None
            if new_key in self._keys:
                i = self._keys.index(new_key)
                self._vals[i] = new_val
            else:
                self._keys.append(new_key)
                self._vals.append(new_val)
    
    @property
    def values(self):
        return self._vals


class Classifier(object):
    """This Classifier class hides the logic of setting a classifier and kernel
    from the main classifier node.
    It is currently not intended to be used on its own and might, in fact, get
    completely integrated into the ShogunSVMClassifier class."""
    
    def __init__(self):
        self._class = None
        self._instance = None
        
    def set_classifier(self, classifier, args=None):
        """Sets and initialises the classifier. If a classifier is reset
        by the user, the parameters will have to be set again.
        'classifier' can be a string, a subclass of shogun.Classifier
        or an instance of such a class.
        Note that some classifiers require initialisation arguments
        while others don't. Also note that neither labels nor feature
        vectors should be given with 'args'. 
        
        classifier -- the SHOGUN classifier to use
        args       -- the list of arguments needed for the SHOGUN
                      constructor method of the classifier
        """
        if args is None:
            args = []
        self._class = None
        self._instance = None
        
        # If name is a classifier instance: Take it
        if isinstance(classifier, sgClassifier.Classifier):
            self._class = classifier.__class__
            self._instance = classifier
        
        # If name is a class: Make an instance
        elif classifier.__class__ == type:
            if is_shogun_classifier(classifier):
                try:
                    self._class = classifier
                    self._instance = self._class(*args)
                except AttributeError:
                    msg = "Library '%s' could not be instantiated. Abstract class?" % classifier
                    raise mdp.NodeException(msg)
            else:
                msg = "The classifier '%s' is no subclass of CClassifier." % self._class.__name__
                raise mdp.NodeException(msg)

        # If classifier is a string: Check, if it's the name of a default library
        elif isinstance(classifier, basestring):
            possibleClasses = [labels for labels in default_shogun_classifiers
                               if labels.__name__.lower() == classifier.lower()]

            if not len(possibleClasses):
                msg = "Library '%s' is not a known subclass of CClassifier." % classifier
                raise mdp.NodeException(msg)

            # Take the first classifier which works
            for labels in possibleClasses:
                try:
                    self._instance = labels(*args)
                    self._class = labels
                except AttributeError:
                    # we might have a virtual class here
                    pass
            if not self._instance:
                msg = "Library '%s' could not be instantiated. Abstract class?" % classifier
                raise mdp.NodeException(msg)

        if not self._class or not self._instance:
            msg = "The classifier '%s' is not supported." % classifier
            raise mdp.NodeException(msg)
        
        if self.classifier_type() == "CT_NONE":
            msg = "The classifier '%s' is not valid." % classifier
            raise mdp.NodeException(msg) 
            
    
    def classifier_type(self):
        """Returns the SHOGUN classifier type as a string."""
        return shogun_classifier_types[self._instance.get_classifier_type()]
    
    def set_param(self, param, *value):
        """Sets parameters for the classifier.
        
        This calls set_param(*value) on the classifier instance.
        """
        # Non-standard cases
        if param == "C" and len(value) == 1:
            # set_C needs two arguments, but if we get only one,
            # we call set_C(arg, arg)
            value += value
        # get the parameter setting method
        meth = getattr(self._instance, "set_" + param)
        # call it 
        meth(*value)
        
    def get_param(self, param, *args):
        """Returns the parameter for a with a given name.
        
        This calls get_param(*args) on the classifier instance.
        
        """
        meth = getattr(self._instance, "get_" + param)
        return meth(*args)

    def set_train_features(self, features, labels):
        if issubclass(self._class, sgClassifier.LinearClassifier):
            self._instance.set_features(features)
        elif issubclass(self._class, sgClassifier.CKernelMachine):
            try:
                self.kernel.init(features, features)
            except AttributeError:
                msg = "You must assign a kernel before training."
                raise mdp.NodeException(msg)
            #self.svm.set_kernel(self.kernel)
        else:
            msg = "Sorry, SHOGUN classifiers of this type are not yet implemented"
            raise mdp.NodeException(msg)
        
        self._instance.set_labels(labels)
    
    def train(self):
        self._instance.train()
    
    def label(self, test_features):
        return self._instance.classify(test_features).get_labels()
    
    @property
    def takes_kernel(self):
        """Returns true, if the current classifier is a kernel machine."""
        return issubclass(self._class, sgClassifier.CKernelMachine)
    
    def _get_kernel(self):
        """Retrieve the currently set kernel from the classifier instance."""
        try:
            return self._instance.get_kernel()
        except AttributeError:
            msg = "Error retrieving kernel. The classifier might not support kernels."
            raise mdp.NodeException(msg)
    
    def _set_kernel(self, kernel):
        """Set the kernel in the current classifier instance"""
        try:
            self._instance.set_kernel(kernel)
        except AttributeError:
            msg = "Error setting kernel. The classifier might not support kernels."
            raise mdp.NodeException(msg)
    
    kernel = property(_get_kernel, _set_kernel)
    
    def __str__(self):
        str = self._instance.__class__.__name__
        try:
            str = str + " with %s kernel" % self.kernel.get_name()
        except (mdp.NodeException, AttributeError):
            pass
        return str


class ShogunSVMClassifier(_SVMClassifier):
    """The ShogunSVMClassifier works as a wrapper class for accessing
    the SHOGUN machine learning toolbox for support vector machines.
    
    Most kernel machines and linear classifier should work with this class.
    
    Currently, distance machines such as the K-means classifier
    are not supported yet.
    
    Information to paramters and additional options can be found on
    http://www.shogun-toolbox.org/
    
    Note that some parts in this classifier might receive some
    refinement in the future.
    """

    default_parameters = {
        'C': 1,
        'epsilon': 1e-3,
    }

    # Swig-code does not work with named parameters, so we have to define an order
    kernel_parameters = {
        # Simple float64t kernels
        'Chi2Kernel': [('size', 10), ('width', 1.4)],
        'GaussianKernel': [('size', 10), ('width', 1.9)],
        'LinearKernel': [],
        'PolyKernel': [('size', 10), ('degree', 3), ('inhomogene', True)],
        'PyramidChi2': [('size',), ('num_cells2',),
                        ('weights_foreach_cell2',), ('width_computation_type2',),
                        ('width2',)],
        'SigmoidKernel': [('size', 10), ('gamma', 1), ('coef0', 0)]
    }

    def __init__(self, classifier="libsvmmulticlass", classifier_arguments=(),
                 classifier_options=None,
                 num_threads="autodetect", input_dim=None, dtype=None):
        """
        Initialises a new ShogunSVMClassifier.
        
        Keyword arguments:
            
            classifier  -- The classifier to use
            classifier_arguments -- Arguments needed for the constructor of the classifier
            classifier_options -- Options for the classifier
            num_threads -- The number of threads, SHOGUN should use
                           can be set to "autodetect", then SHOGUN will use
                           the number of CPU cores.
                           Attention: this could crash on windows
        
        """
        super(ShogunSVMClassifier, self).__init__(input_dim=input_dim, dtype=dtype)
        
        if classifier_options is None:
            classifier_options = {}

        self.classifier = Classifier()
        self.classifier.set_classifier(classifier, classifier_arguments)
        self.classifier_options = self.default_parameters
        self.classifier_options.update(classifier_options)

        for p in self.classifier_options.keys():
            try:
                self.set_classifier_param(p, self.classifier_options[p])
            except:
                pass
            
        self._num_threads = num_threads
        self._set_num_threads()

    def _set_num_threads(self):
        # init number of threads
        if self._num_threads == "autodetect":
            try:
                self._num_threads = self.classifier._instance.parallel.get_num_cpus()
            except SystemError:
                # We're helping shogun here
                self._num_threads = 1
        self.classifier._instance.parallel.set_num_threads(self._num_threads)

    def set_classifier_param(self, param, *value):
        """Sets parameters for the classifier.
        """
        self.classifier.set_param(param, *value)

    def set_kernel(self, kernel_name, kernel_options=None):
        """Sets the Kernel along with options.
        'options' must be a tuple with the arguments of the kernel constructor
        in SHOGUN.
        We try to guess it right in many cases but in general, you will have to
        consult the SHOGUN documentation.
        
        kernel    --    The kernel to use. Default parameters are specified for
                            "PolyKernel"
                            "GaussianKernel"
                            "LinearKernel"
                            "SigmoidKernel"
                        Further kernels are possible if they are included in
                        SHOGUN and if kernel_options provides the correct init
                        arguments.
        kernel_options -- For known kernels, a dict specifying the options is
                          possible. Options not included take a default value.
                          Unknown kernels need an ordered list of constructor
                          arguments.
        """
        if kernel_options is None:
            kernel_options = {}
        if kernel_name in ShogunSVMClassifier.kernel_parameters \
            and not isinstance(kernel_options, list):
            default_opts = _OrderedDict(ShogunSVMClassifier.kernel_parameters[kernel_name])
            default_opts.update(kernel_options)
            options = default_opts.values
        
        kernel_meth = getattr(sgKernel, kernel_name)
        try:
            kernel = kernel_meth(*options)
        except NotImplementedError, msg:
            msg = ("Tried to call %s with arguments %s\n" %
                   (kernel_meth.__module__ + '.' + kernel_meth.__name__,
                    tuple(options).__repr__()) +
                   "Got the following error message:\n" + msg.__str__())
            raise mdp.NodeException(msg) 
        self.classifier.kernel = kernel
    
    def _stop_training(self):
        super(ShogunSVMClassifier, self)._stop_training()
        self.normalizer = _LabelNormalizer(self.labels)
        labels = self.normalizer.normalize(self.labels)
        # shogun expects float labels
        labels = sgFeatures.Labels(labels.astype(float))
        
        features = sgFeatures.RealFeatures(self.data.transpose())
        
        self.classifier.set_train_features(features, labels)
        self.classifier.train()
    
    def training_set(self, ordered=False):
        """Shows the set of data that has been inserted to be trained."""
        if ordered:
            labels = set(self.labels)
            data = {}
            for l in labels:
                data[l] = []
            for k, v in zip(self.labels, self.data):
                data[k].append(v)
            return data
        else:
            return zip(self.labels, self.data)
    
    def _label(self, x):
        """Classify the input data 'x'
        """
        test_features = sgFeatures.RealFeatures(x.transpose())

        labels = self.classifier.label(test_features)
        
        if self.normalizer:
            return self.normalizer.revert(labels)
        else:
            return labels
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.