class Ai4r::Classifiers::Prism

Introduction

This is an implementation of the PRISM algorithm (Cendrowska, 1987) Given a set of preclassified examples, it builds a set of rules to predict the class of other instaces.

  1. Cendrowska (1987). PRISM: An algorithm for inducing modular rules.

International Journal of Man-Machine Studies. 27(4):349-370.

Attributes

data_set[R]
rules[R]

Public Instance Methods

build(data_set) click to toggle source

Build a new Prism classifier. You must provide a DataSet instance as parameter. The last attribute of each item is considered as the item class.

# File lib/ai4r/classifiers/prism.rb, line 34
def build(data_set)
  data_set.check_not_empty
  @data_set = data_set
  domains = @data_set.build_domains
  instances = @data_set.data_items.collect {|data| data }
  @rules = []
  domains.last.each do |class_value|
    while(has_class_value(instances, class_value))
      rule = build_rule(class_value, instances)
      @rules << rule
      instances = instances.select {|data| !matches_conditions(data, rule[:conditions])}
    end
  end
  return self
end
eval(instace) click to toggle source

You can evaluate new data, predicting its class. e.g.

classifier.eval(['New York',  '<30', 'F'])  # => 'Y'
# File lib/ai4r/classifiers/prism.rb, line 53
def eval(instace)
  @rules.each do |rule|
    return rule[:class_value] if matches_conditions(instace, rule[:conditions])
  end
  return nil
end
get_rules() click to toggle source

This method returns the generated rules in ruby code. e.g.

classifier.get_rules
  # => if age_range == '<30' then marketing_target = 'Y'
 elsif age_range == '>80' then marketing_target = 'Y'
 elsif city == 'Chicago' and age_range == '[30-50)' then marketing_target = 'Y'
 else marketing_target = 'N'
 end

It is a nice way to inspect induction results, and also to execute them:

age_range = '[30-50)'
city = 'New York'
eval(classifier.get_rules) 
puts marketing_target
 'Y'
# File lib/ai4r/classifiers/prism.rb, line 76
def get_rules
  out = "if #{join_terms(@rules.first)} then #{then_clause(@rules.first)}"
  @rules[1...-1].each do |rule| 
    out += "\nelsif #{join_terms(rule)} then #{then_clause(rule)}"
  end
  out += "\nelse #{then_clause(@rules.last)}" if @rules.size > 1
  out += "\nend"
  return out
end

Protected Instance Methods

better_pt(pt, best_pt) click to toggle source

pt = [p, t] p = occurrences of attribute value with instance classified as class_value t = occurrences of attribute value a pt is better if:

1- its ratio is higher
2- its ratio is equal, and has a higher p
# File lib/ai4r/classifiers/prism.rb, line 173
def better_pt(pt, best_pt)
  return false if pt[1] == 0
  return true if best_pt[1] == 0
  a = pt[0]*best_pt[1]
  b = best_pt[0]*pt[1]
  return true if a>b || (a==b && pt[0]>best_pt[0])
  return false
end
build_freq_table(rule_instances, attributes, class_value) click to toggle source

Returns a structure with the folloring format:

> {attr1_label => { :attr1_value1 => [p, t], attr1_value2 => [p, t], … },

attr2_label => { :attr2_value1 => [p, t], attr2_value2 => [p, t], ... },
...
}

where p is the number of instances classified as class_value with that attribute value, and t is the total number of instances with that attribute value

# File lib/ai4r/classifiers/prism.rb, line 135
def build_freq_table(rule_instances, attributes, class_value)
  freq_table = Hash.new()
  rule_instances.each do |data|
    attributes.each do |attr_label|
      attr_freqs = freq_table[attr_label] || Hash.new([0, 0])
      pt = attr_freqs[get_attr_value(data, attr_label)]
      pt = [(data.last == class_value) ? pt[0]+1 : pt[0], pt[1]+1]
      attr_freqs[get_attr_value(data, attr_label)] = pt
      freq_table[attr_label] = attr_freqs
    end
  end
  return freq_table
end
build_rule(class_value, instances) click to toggle source
# File lib/ai4r/classifiers/prism.rb, line 112
def build_rule(class_value, instances)
  rule = {:class_value => class_value, :conditions => {}}
  rule_instances = instances.collect {|data| data }
  attributes = @data_set.data_labels[0...-1].collect {|label| label }
  until(is_perfect(instances, rule) || attributes.empty?)
    freq_table = build_freq_table(rule_instances, attributes, class_value)
    condition = get_condition(freq_table)
    rule[:conditions].merge!(condition)
    rule_instances = rule_instances.select do |data| 
      matches_conditions(data, condition) 
    end
  end
  return rule
end
get_attr_value(data, attr) click to toggle source
# File lib/ai4r/classifiers/prism.rb, line 88
def get_attr_value(data, attr)
  data[@data_set.get_index(attr)]
end
get_condition(freq_table) click to toggle source

returns a single conditional term: {attrN_label => attrN_valueM} selecting the attribute with higher pt ratio (occurrences of attribute value classified as class_value /

occurrences of attribute value)
# File lib/ai4r/classifiers/prism.rb, line 153
def get_condition(freq_table)
  best_pt = [0, 0]
  condition = nil
  freq_table.each do |attr_label, attr_freqs|
    attr_freqs.each do |attr_value, pt|
      if(better_pt(pt, best_pt))
        condition = { attr_label => attr_value }
        best_pt = pt
      end
    end
  end
  return condition
end
has_class_value(instances, class_value) click to toggle source
# File lib/ai4r/classifiers/prism.rb, line 92
def has_class_value(instances, class_value)
  instances.each { |data| return true if data.last == class_value}
  return false
end
is_perfect(instances, rule) click to toggle source
# File lib/ai4r/classifiers/prism.rb, line 97
def is_perfect(instances, rule)
  class_value = rule[:class_value]
  instances.each do |data| 
    return false if data.last != class_value and matches_conditions(data, rule[:conditions])
  end
  return true
end
join_terms(rule) click to toggle source
# File lib/ai4r/classifiers/prism.rb, line 182
def join_terms(rule)
  terms = []
  rule[:conditions].each do |attr_label, attr_value| 
      terms << "#{attr_label} == '#{attr_value}'"
  end
  "#{terms.join(" and ")}"
end
matches_conditions(data, conditions) click to toggle source
# File lib/ai4r/classifiers/prism.rb, line 105
def matches_conditions(data, conditions)
  conditions.each_pair do |attr_label, attr_value|
    return false if get_attr_value(data, attr_label) != attr_value
  end
  return true
end
then_clause(rule) click to toggle source
# File lib/ai4r/classifiers/prism.rb, line 190
def then_clause(rule)
  "#{@data_set.data_labels.last} = '#{rule[:class_value]}'"
end