Class: Candle::HybridNER

Inherits:
Object
  • Object
show all
Defined in:
lib/candle/ner.rb

Overview

Hybrid NER that combines ML model with rules

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model_id = nil, device: nil) ⇒ HybridNER

Returns a new instance of HybridNER.



280
281
282
283
284
# File 'lib/candle/ner.rb', line 280

def initialize(model_id = nil, device: nil)
  @model_ner = model_id ? NER.from_pretrained(model_id, device: device) : nil
  @pattern_recognizers = []
  @gazetteer_recognizers = []
end

Instance Attribute Details

#gazetteer_recognizersObject (readonly)

Returns the value of attribute gazetteer_recognizers.



278
279
280
# File 'lib/candle/ner.rb', line 278

def gazetteer_recognizers
  @gazetteer_recognizers
end

#model_nerObject (readonly)

Returns the value of attribute model_ner.



278
279
280
# File 'lib/candle/ner.rb', line 278

def model_ner
  @model_ner
end

#pattern_recognizersObject (readonly)

Returns the value of attribute pattern_recognizers.



278
279
280
# File 'lib/candle/ner.rb', line 278

def pattern_recognizers
  @pattern_recognizers
end

Instance Method Details

#add_gazetteer_recognizer(entity_type, terms, **options) ⇒ Object

Add a gazetteer-based recognizer



294
295
296
297
298
# File 'lib/candle/ner.rb', line 294

def add_gazetteer_recognizer(entity_type, terms, **options)
  recognizer = GazetteerEntityRecognizer.new(entity_type, terms, **options)
  @gazetteer_recognizers << recognizer
  self
end

#add_pattern_recognizer(entity_type, patterns) ⇒ Object

Add a pattern-based recognizer



287
288
289
290
291
# File 'lib/candle/ner.rb', line 287

def add_pattern_recognizer(entity_type, patterns)
  recognizer = PatternEntityRecognizer.new(entity_type, patterns)
  @pattern_recognizers << recognizer
  self
end

#extract_entities(text, confidence_threshold: 0.9) ⇒ Object

Extract entities using all recognizers



301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'lib/candle/ner.rb', line 301

def extract_entities(text, confidence_threshold: 0.9)
  all_entities = []
  
  # Model-based entities
  if @model_ner
    model_entities = @model_ner.extract_entities(text, confidence_threshold: confidence_threshold)
    all_entities.concat(model_entities)
  end
  
  # Pattern-based entities
  @pattern_recognizers.each do |recognizer|
    pattern_entities = recognizer.recognize(text)
    all_entities.concat(pattern_entities)
  end
  
  # Gazetteer-based entities
  @gazetteer_recognizers.each do |recognizer|
    gazetteer_entities = recognizer.recognize(text)
    all_entities.concat(gazetteer_entities)
  end
  
  # Merge overlapping entities (prefer highest confidence)
  merge_entities(all_entities)
end