Class: Candle::PatternEntityRecognizer

Inherits:
Object
  • Object
show all
Defined in:
lib/candle/ner.rb

Overview

Pattern-based entity recognizer for custom entities

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(entity_type, patterns = []) ⇒ PatternEntityRecognizer

Returns a new instance of PatternEntityRecognizer.



165
166
167
168
# File 'lib/candle/ner.rb', line 165

def initialize(entity_type, patterns = [])
  @entity_type = entity_type
  @patterns = patterns
end

Instance Attribute Details

#entity_typeObject (readonly)

Returns the value of attribute entity_type.



163
164
165
# File 'lib/candle/ner.rb', line 163

def entity_type
  @entity_type
end

#patternsObject (readonly)

Returns the value of attribute patterns.



163
164
165
# File 'lib/candle/ner.rb', line 163

def patterns
  @patterns
end

Instance Method Details

#add_pattern(pattern) ⇒ Object

Add a pattern (String or Regexp)



171
172
173
174
# File 'lib/candle/ner.rb', line 171

def add_pattern(pattern)
  @patterns << pattern
  self
end

#recognize(text, tokenizer = nil) ⇒ Object

Recognize entities using patterns



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/candle/ner.rb', line 177

def recognize(text, tokenizer = nil)
  entities = []
  
  @patterns.each do |pattern|
    regex = pattern.is_a?(Regexp) ? pattern : Regexp.new(pattern)
    
    text.scan(regex) do |match|
      match_text = $&
      match_start = $~.offset(0)[0]
      match_end = $~.offset(0)[1]
      
      entities << {
        "text" => match_text,
        "label" => @entity_type,
        "start" => match_start,
        "end" => match_end,
        "confidence" => 1.0,
        "source" => "pattern"
      }
    end
  end
  
  entities
end