本文整理匯總了Python中SolrClient.SolrClient.get_industry_term_field_analysis方法的典型用法代碼示例。如果您正苦於以下問題:Python SolrClient.get_industry_term_field_analysis方法的具體用法?Python SolrClient.get_industry_term_field_analysis怎麽用?Python SolrClient.get_industry_term_field_analysis使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類SolrClient.SolrClient
的用法示例。
在下文中一共展示了SolrClient.get_industry_term_field_analysis方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: TaggingProcessor
# 需要導入模塊: from SolrClient import SolrClient [as 別名]
# 或者: from SolrClient.SolrClient import get_industry_term_field_analysis [as 別名]
#.........這裏部分代碼省略.........
if not self.dict_tagging:
self._logger.info("dictionary tagging is set to false. Disable dictionary tagging.")
return
self._logger.info("Dictionary tagging is enabled.")
try:
self.dictionary_file = config['DICTIONARY_TAGGER']['dictionary_file']
except KeyError:
self._logger.exception("Oops! 'dict_tagging' is set incorrectly in config file. Default to use default csv file in config dir.")
self.dictionary_file = os.path.join(os.path.dirname(__file__), '..','config','Steel-Terminology-Tata-Steel.csv')
try:
self.dict_tagger_fuzzy_matching=config['DICTIONARY_TAGGER']['dict_tagger_fuzzy_matching']
if "true" == self.dict_tagger_fuzzy_matching.lower():
self.dict_tagger_fuzzy_matching = True
elif "false" == self.dict_tagger_fuzzy_matching.lower():
self.dict_tagger_fuzzy_matching = False
except KeyError:
self._logger.exception("Oops! 'dict_tagger_fuzzy_matching' is set incorrectly in config file. Default to False.")
self.dict_tagger_fuzzy_matching=False
try:
self.dict_tagger_sim_threshold=float(config['DICTIONARY_TAGGER']['dict_tagger_sim_threshold'])
except KeyError:
self._logger.exception("Oops! 'dict_tagger_sim_threshold' is set incorrectly in config file. Default to 0.95.")
self.dict_tagger_sim_threshold=float(0.95)
self.dict_terms = load_terms_from_csv(self.dictionary_file)
self._logger.info("normalising terms from dictionary...")
self.dict_terms = [self.solrClient.get_industry_term_field_analysis(dict_term) for dict_term in self.dict_terms]
self._logger.info("dictionary terms are normalised and loaded successfully. Total dictionary term size is [%s]", str(len(self.dict_terms)))
if self.dict_tagger_fuzzy_matching:
self._logger.info("loading into Trie nodes for fuzzy matching...")
self.dict_terms_trie = TrieNode()
[self.dict_terms_trie.insert(normed_term) for normed_term in self.dict_terms]
self._logger.info("loaded into Trie nodes successfully.")
else:
self.dict_terms_trie = TrieNode()
def load_grammars(self):
grammars=[]
pos_sequences = read_by_line(self.pos_sequences_file)
for sequence_str in pos_sequences:
grammars.append(sequence_str.replace('\n','').strip())
return grammars
def parsing_candidates_regexp(self, text_pos_tokens,candidate_grammar):
cp = nltk.RegexpParser(candidate_grammar)
candidate_chunk=cp.parse(text_pos_tokens)
term_candidates=set()
for node_a in candidate_chunk:
if type(node_a) is nltk.Tree:
if node_a.label() == 'TermCandidate':
term_tokens=[]
for node_b in node_a:
if node_b[0] == '"':
#TODO: find a more elegant way to deal with spurious POS tagging for quotes
continue