Class: Candle::HybridNER

Inherits:
Object
  • Object
show all
Defined in:
lib/candle/ner.rb

Overview

Hybrid NER that combines ML model with rules

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model_id = nil, device: nil) ⇒ HybridNER

Returns a new instance of HybridNER.



303
304
305
306
307
# File 'lib/candle/ner.rb', line 303

def initialize(model_id = nil, device: nil)
  @model_ner = model_id ? NER.from_pretrained(model_id, device: device) : nil
  @pattern_recognizers = []
  @gazetteer_recognizers = []
end

Instance Attribute Details

#gazetteer_recognizersObject (readonly)

Returns the value of attribute gazetteer_recognizers.



301
302
303
# File 'lib/candle/ner.rb', line 301

def gazetteer_recognizers
  @gazetteer_recognizers
end

#model_nerObject (readonly)

Returns the value of attribute model_ner.



301
302
303
# File 'lib/candle/ner.rb', line 301

def model_ner
  @model_ner
end

#pattern_recognizersObject (readonly)

Returns the value of attribute pattern_recognizers.



301
302
303
# File 'lib/candle/ner.rb', line 301

def pattern_recognizers
  @pattern_recognizers
end

Instance Method Details

#add_gazetteer_recognizer(entity_type, terms, **options) ⇒ Object

Add a gazetteer-based recognizer



317
318
319
320
321
# File 'lib/candle/ner.rb', line 317

def add_gazetteer_recognizer(entity_type, terms, **options)
  recognizer = GazetteerEntityRecognizer.new(entity_type, terms, **options)
  @gazetteer_recognizers << recognizer
  self
end

#add_pattern_recognizer(entity_type, patterns) ⇒ Object

Add a pattern-based recognizer



310
311
312
313
314
# File 'lib/candle/ner.rb', line 310

def add_pattern_recognizer(entity_type, patterns)
  recognizer = PatternEntityRecognizer.new(entity_type, patterns)
  @pattern_recognizers << recognizer
  self
end

#extract_entities(text, confidence_threshold: 0.9) ⇒ Object

Extract entities using all recognizers



324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
# File 'lib/candle/ner.rb', line 324

def extract_entities(text, confidence_threshold: 0.9)
  all_entities = []
  
  # Model-based entities
  if @model_ner
    model_entities = @model_ner.extract_entities(text, confidence_threshold: confidence_threshold)
    all_entities.concat(model_entities)
  end
  
  # Pattern-based entities
  @pattern_recognizers.each do |recognizer|
    pattern_entities = recognizer.recognize(text)
    all_entities.concat(pattern_entities)
  end
  
  # Gazetteer-based entities
  @gazetteer_recognizers.each do |recognizer|
    gazetteer_entities = recognizer.recognize(text)
    all_entities.concat(gazetteer_entities)
  end
  
  # Merge overlapping entities (prefer highest confidence)
  merge_entities(all_entities)
end