本文整理匯總了Python中extractor.Extractor.extract_emit_features方法的典型用法代碼示例。如果您正苦於以下問題:Python Extractor.extract_emit_features方法的具體用法?Python Extractor.extract_emit_features怎麽用?Python Extractor.extract_emit_features使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類extractor.Extractor
的用法示例。
在下文中一共展示了Extractor.extract_emit_features方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: Segmentor
# 需要導入模塊: from extractor import Extractor [as 別名]
# 或者: from extractor.Extractor import extract_emit_features [as 別名]
#.........這裏部分代碼省略.........
logging.warning("Max iteration number is not set or in valid state .")
logging.info("set it to default value .")
else :
self.max_iter = max_iter
logging.info("Max iteration is %d ." %(self.max_iter))
def _build_extractor(self) :
self.extractor = Extractor(self.inner_lexicon)
def _build_constrain(self) :
self.constrain = Constrain()
def _build_decoder(self) :
self.decoder = Decoder()
def _build_inner_lexicon_and_predict_model_from_saving_path(self , model_path) :
self.model = Model()
self._load(model_path)
def _build_training_model(self) :
'''
init a empty model , build model emit feature space , build model label space , build model weight
'''
#! Init empty model
logging.info("Initialize an empty model")
self.model = Model()
self.model.init_empty_model()
#! build emit feature space
logging.info("extract all training instance and build model feature space .")
if self.extractor is None or self.model is None or self.training_unigrams_data is None :
logging.error("failed!")
return
for atom_line in self.training_unigrams_data :
emit_feature_list = self.extractor.extract_emit_features(atom_line)
self.model.add_emit_feature_list2feature_space(emit_feature_list)
#! build label space
logging.info("add labels to model label space .")
self.model.add_labels2label_space( (TAG_B , TAG_M , TAG_E , TAG_S) )
#! build feature tans mat and weight
logging.info("Inlitialize feature transition and weight space .")
self.model.build_up_model()
def _build_inner_lexicon(self , threshold=1.) :
logging.info("build inner lexicon from training data .")
if self.raw_training_data is None :
logging.error('failed')
return
words_counter = Counter()
for raw_instance in self.raw_training_data :
#! len > 1 to ensure it is a lexicon
unicode_instance = [ WSAtomTranslator.trans_atom_gram_list2unicode_line(atom_instance_gram_list)
for atom_instance_gram_list in raw_instance if len(atom_instance_gram_list) > 1 ]
words_counter.update(unicode_instance)
total_freq = sum(words_counter.viewvalues())
lexicon_list = []
if threshold < 1. :
##! a fast and clearly implementation is using Counter.most_common(N) to return the threshold number words .
##! but it clearly will cause some words were added to lexicon dict while some ohter words with the same freq is cut off at tail . it is bad.
##! So do following logic to keep fair .
##! strategy changed ! the threshold freq is also accepted (orginal , we reject words with the edge frequnce )!
threshold_num = int( total_freq * threshold )
pre_freq = INF
words_has_same_freq = []
freq_counter = 0
for word , freq in words_counter.most_common() :