本文整理汇总了Python中nltk.internals.find_jars_within_path方法的典型用法代码示例。如果您正苦于以下问题:Python internals.find_jars_within_path方法的具体用法?Python internals.find_jars_within_path怎么用?Python internals.find_jars_within_path使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.internals
的用法示例。
在下文中一共展示了internals.find_jars_within_path方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: find_maltparser
# 需要导入模块: from nltk import internals [as 别名]
# 或者: from nltk.internals import find_jars_within_path [as 别名]
def find_maltparser(parser_dirname):
"""
A module to find MaltParser .jar file and its dependencies.
"""
if os.path.exists(parser_dirname): # If a full path is given.
_malt_dir = parser_dirname
else: # Try to find path to maltparser directory in environment variables.
_malt_dir = find_dir(parser_dirname, env_vars=('MALT_PARSER',))
# Checks that that the found directory contains all the necessary .jar
malt_dependencies = ['','','']
_malt_jars = set(find_jars_within_path(_malt_dir))
_jars = set(jar.rpartition('/')[2] for jar in _malt_jars)
malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar'])
assert malt_dependencies.issubset(_jars)
assert any(filter(lambda i: i.startswith('maltparser-') and i.endswith('.jar'), _jars))
return list(_malt_jars)
示例2: __init__
# 需要导入模块: from nltk import internals [as 别名]
# 或者: from nltk.internals import find_jars_within_path [as 别名]
def __init__(self, model_filename, path_to_jar=None, encoding='utf8', verbose=False, java_options='-mx1000m'):
if not self._JAR:
warnings.warn('The StanfordTagger class is not meant to be '
'instantiated directly. Did you mean StanfordPOSTagger or StanfordNERTagger?')
self._stanford_jar = find_jar(
self._JAR, path_to_jar,
searchpath=(), url=_stanford_url,
verbose=verbose)
self._stanford_model = find_file(model_filename,
env_vars=('STANFORD_MODELS',), verbose=verbose)
# Adding logging jar files to classpath
stanford_dir = os.path.split(self._stanford_jar)[0]
self._stanford_jar = tuple(find_jars_within_path(stanford_dir))
self._encoding = encoding
self.java_options = java_options
示例3: find_maltparser
# 需要导入模块: from nltk import internals [as 别名]
# 或者: from nltk.internals import find_jars_within_path [as 别名]
def find_maltparser(parser_dirname):
"""
A module to find MaltParser .jar file and its dependencies.
"""
if os.path.exists(parser_dirname): # If a full path is given.
_malt_dir = parser_dirname
else: # Try to find path to maltparser directory in environment variables.
_malt_dir = find_dir(parser_dirname, env_vars=('MALT_PARSER',))
# Checks that that the found directory contains all the necessary .jar
malt_dependencies = ['', '', '']
_malt_jars = set(find_jars_within_path(_malt_dir))
_jars = set(os.path.split(jar)[1] for jar in _malt_jars)
malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar'])
assert malt_dependencies.issubset(_jars)
assert any(
filter(lambda i: i.startswith('maltparser-') and i.endswith('.jar'), _jars)
)
return list(_malt_jars)
示例4: get_postag_with_record
# 需要导入模块: from nltk import internals [as 别名]
# 或者: from nltk.internals import find_jars_within_path [as 别名]
def get_postag_with_record(records, pairs):
path = os.path.dirname(__file__)
path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/'
print(path)
# jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
jar = path + '/stanford-postagger.jar'
model = path + '/models/english-bidirectional-distsim.tagger'
pos_tagger = StanfordPOSTagger(model, jar)
# model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger'
# model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger'
stanford_dir = jar.rpartition('/')[0]
stanford_jars = find_jars_within_path(stanford_dir)
pos_tagger._stanford_jar = ':'.join(stanford_jars)
tagged_source = []
# Predict on testing data
for idx, (record, pair) in enumerate(zip(records, pairs)): # len(test_data_plain)
print('*' * 100)
print('File: ' + record['name'])
print('Input: ' + str(pair[0]))
text = pos_tagger.tag(pair[0])
print('[%d/%d][%d] : %s' % (idx, len(records) , len(pair[0]), str(text)))
tagged_source.append(text)
return tagged_source
示例5: get_postag_with_index
# 需要导入模块: from nltk import internals [as 别名]
# 或者: from nltk.internals import find_jars_within_path [as 别名]
def get_postag_with_index(sources, idx2word, word2idx):
path = os.path.dirname(__file__)
path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/'
print(path)
# jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
jar = path + '/stanford-postagger.jar'
model = path + '/models/english-bidirectional-distsim.tagger'
pos_tagger = StanfordPOSTagger(model, jar)
# model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger'
# model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger'
stanford_dir = jar.rpartition('/')[0]
stanford_jars = find_jars_within_path(stanford_dir)
pos_tagger._stanford_jar = ':'.join(stanford_jars)
tagged_source = []
# Predict on testing data
for idx in xrange(len(sources)): # len(test_data_plain)
test_s_o = sources[idx]
source_text = keyphrase_utils.cut_zero(test_s_o, idx2word)
text = pos_tagger.tag(source_text)
print('[%d/%d] : %s' % (idx, len(sources), str(text)))
tagged_source.append(text)
return tagged_source
示例6: load_pos_tagger
# 需要导入模块: from nltk import internals [as 别名]
# 或者: from nltk.internals import find_jars_within_path [as 别名]
def load_pos_tagger():
path = os.path.dirname(__file__)
path = os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger')
print(path)
# jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
jar = path + '/stanford-postagger.jar'
model = path + '/models/english-bidirectional-distsim.tagger'
pos_tagger = StanfordPOSTagger(model, jar)
stanford_dir = jar.rpartition('/')[0]
stanford_jars = find_jars_within_path(stanford_dir)
pos_tagger._stanford_jar = ':'.join(stanford_jars)
return pos_tagger
示例7: __init__
# 需要导入模块: from nltk import internals [as 别名]
# 或者: from nltk.internals import find_jars_within_path [as 别名]
def __init__(self):
self.parser = StanfordParser(
model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz')
stanford_dir = self.parser._classpath[0].rpartition('/')[0]
self.parser._classpath = tuple(find_jars_within_path(stanford_dir))
示例8: load_pos_tagger
# 需要导入模块: from nltk import internals [as 别名]
# 或者: from nltk.internals import find_jars_within_path [as 别名]
def load_pos_tagger(stanford_base_dir):
# path = os.path.dirname(__file__)
# path = os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger')
# print(path)
# jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
jar = stanford_base_dir + '/stanford-postagger.jar'
model = stanford_base_dir + '/models/english-bidirectional-distsim.tagger'
pos_tagger = StanfordPOSTagger(model_filename=model, path_to_jar=jar)
stanford_base_dir = jar.rpartition('/')[0]
stanford_jars = find_jars_within_path(stanford_base_dir)
pos_tagger._stanford_jar = ':'.join(stanford_jars)
return pos_tagger
示例9: check_postag
# 需要导入模块: from nltk import internals [as 别名]
# 或者: from nltk.internals import find_jars_within_path [as 别名]
def check_postag(config):
train_set, validation_set, test_set, idx2word, word2idx = deserialize_from_file(config['dataset'])
path = os.path.dirname(__file__)
path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/'
jar = path + '/stanford-postagger.jar'
model = path + '/models/english-bidirectional-distsim.tagger'
pos_tagger = StanfordPOSTagger(model, jar)
for dataset_name in config['testing_datasets']:
# override the original test_set
# test_set = load_testing_data(dataset_name, kwargs=dict(basedir=config['path']))(idx2word, word2idx, config['preprocess_type'])
test_sets = load_additional_testing_data(config['testing_datasets'], idx2word, word2idx, config)
test_set = test_sets[dataset_name]
# print(dataset_name)
# print('Avg length=%d, Max length=%d' % (np.average([len(s) for s in test_set['source']]), np.max([len(s) for s in test_set['source']])))
test_data_plain = zip(*(test_set['source'], test_set['target']))
test_size = len(test_data_plain)
# Alternatively to setting the CLASSPATH add the jar and model via their path:
jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
# model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger'
model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger'
pos_tagger = StanfordPOSTagger(model, jar)
for idx in xrange(len(test_data_plain)): # len(test_data_plain)
test_s_o, test_t_o = test_data_plain[idx]
source = keyphrase_utils.cut_zero(test_s_o, idx2word)
print(source)
# Add other jars from Stanford directory
stanford_dir = jar.rpartition('/')[0]
stanford_jars = find_jars_within_path(stanford_dir)
pos_tagger._stanford_jar = ':'.join(stanford_jars)
text = pos_tagger.tag(source)
print(text)