Package nltk_lite :: Package contrib :: Package classifier :: Module discretisedattribute
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.classifier.discretisedattribute

 1  # Natural Language Toolkit Discretized attribute 
 2  #    Capable of mapping continuous values to discrete ones 
 3  # 
 4  # Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com> 
 5  # 
 6  # URL: <http://nltk.sf.net> 
 7  # This software is distributed under GPL, for license information see LICENSE.TXT 
 8  from nltk_lite.contrib.classifier import attribute, autoclass 
 9  from nltk_lite.contrib.classifier.exceptions import invaliddataerror as inv 
10   
11 -class DiscretisedAttribute(attribute.Attribute):
12 - def __init__(self, name, ranges, index):
13 self.name = name 14 self.values, klass_value = [], autoclass.FIRST 15 for i in range(len(ranges)): 16 self.values.append(klass_value.name) 17 klass_value = klass_value.next() 18 self.index = index 19 self.type = attribute.DISCRETE 20 self.ranges = ranges
21
22 - def mapping(self, continuous_value):
23 range_index = binary_search(self.ranges, continuous_value) 24 if range_index == -1: 25 raise inv.InvalidDataError('Value ' + str(continuous_value) + ' of type ' + str(type(continuous_value)) + ' not found in any of the ranges ' + self.__ranges_as_string()) 26 return self.values[range_index]
27
28 - def __ranges_as_string(self):
29 str_ranges = [] 30 for _range in self.ranges: 31 str_ranges.append(str(_range)) 32 return str(str_ranges)
33
34 - def __str__(self):
35 return attribute.Attribute.__str__(self) + self.__ranges_as_string()
36
37 -def binary_search(ranges, value):
38 length = len(ranges) 39 low, high = 0, length - 1 40 mid = low + (high - low) / 2; 41 while low <= high: 42 if ranges[mid].includes(value): 43 return mid 44 elif ranges[mid].lower > value: # search lower half 45 high = mid - 1 46 else: # search upper half 47 low = mid + 1 48 mid = low + (high - low) / 2 49 return -1
50