本文整理汇总了Python中nltk.data.ZipFilePathPointer方法的典型用法代码示例。如果您正苦于以下问题:Python data.ZipFilePathPointer方法的具体用法?Python data.ZipFilePathPointer怎么用?Python data.ZipFilePathPointer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.data
的用法示例。
在下文中一共展示了data.ZipFilePathPointer方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_from_file
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import ZipFilePathPointer [as 别名]
def train_from_file(self, conll_file, verbose=False):
"""
Train MaltParser from a file
:param conll_file: str for the filename of the training input data
:type conll_file: str
"""
# If conll_file is a ZipFilePathPointer,
# then we need to do some extra massaging
if isinstance(conll_file, ZipFilePathPointer):
with tempfile.NamedTemporaryFile(prefix='malt_train.conll.',
dir=self.working_dir, mode='w', delete=False) as input_file:
with conll_file.open() as conll_input_file:
conll_str = conll_input_file.read()
input_file.write(text_type(conll_str))
return self.train_from_file(input_file.name, verbose=verbose)
# Generate command to run maltparser.
cmd =self.generate_malt_command(conll_file, mode="learn")
ret = self._execute(cmd, verbose)
if ret != 0:
raise Exception("MaltParser training (%s) failed with exit "
"code %d" % (' '.join(cmd), ret))
self._trained = True
示例2: __repr__
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import ZipFilePathPointer [as 别名]
def __repr__(self):
if isinstance(self._root, ZipFilePathPointer):
path = '%s/%s' % (self._root.zipfile.filename, self._root.entry)
else:
path = '%s' % self._root.path
return '<%s in %r>' % (self.__class__.__name__, path)
示例3: find_corpus_fileids
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import ZipFilePathPointer [as 别名]
def find_corpus_fileids(root, regexp):
if not isinstance(root, PathPointer):
raise TypeError('find_corpus_fileids: expected a PathPointer')
regexp += '$'
# Find fileids in a zipfile: scan the zipfile's namelist. Filter
# out entries that end in '/' -- they're directories.
if isinstance(root, ZipFilePathPointer):
fileids = [name[len(root.entry):] for name in root.zipfile.namelist()
if not name.endswith('/')]
items = [name for name in fileids if re.match(regexp, name)]
return sorted(items)
# Find fileids in a directory: use os.walk to search all (proper
# or symlinked) subdirectories, and match paths against the regexp.
elif isinstance(root, FileSystemPathPointer):
items = []
# workaround for py25 which doesn't support followlinks
kwargs = {}
if not py25():
kwargs = {'followlinks': True}
for dirname, subdirs, fileids in os.walk(root.path, **kwargs):
prefix = ''.join('%s/' % p for p in _path_from(root.path, dirname))
items += [prefix+fileid for fileid in fileids
if re.match(regexp, prefix+fileid)]
# Don't visit svn directories:
if '.svn' in subdirs: subdirs.remove('.svn')
return sorted(items)
else:
raise AssertionError("Don't know how to handle %r" % root)
示例4: _load_lang_mapping_data
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import ZipFilePathPointer [as 别名]
def _load_lang_mapping_data(self):
''' Load language mappings between codes and description from table.txt '''
if isinstance(self.root, ZipFilePathPointer):
raise RuntimeError("Please install the 'crubadan' corpus first, use nltk.download()")
mapper_file = path.join(self.root, self._LANG_MAPPER_FILE)
if self._LANG_MAPPER_FILE not in self.fileids():
raise RuntimeError("Could not find language mapper file: " + mapper_file)
if PY3:
raw = open(mapper_file, 'r', encoding='utf-8').read().strip()
else:
raw = open(mapper_file, 'rU').read().decode('utf-8').strip()
self._lang_mapping_data = [row.split('\t') for row in raw.split('\n')]
示例5: demo
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import ZipFilePathPointer [as 别名]
def demo():
from itertools import islice
# zip_path = find('corpora/toolbox.zip')
# lexicon = ToolboxData(ZipFilePathPointer(zip_path, 'toolbox/rotokas.dic')).parse()
file_path = find('corpora/toolbox/rotokas.dic')
lexicon = ToolboxData(file_path).parse()
print('first field in fourth record:')
print(lexicon[3][0].tag)
print(lexicon[3][0].text)
print('\nfields in sequential order:')
for field in islice(lexicon.find('record'), 10):
print(field.tag, field.text)
print('\nlx fields:')
for field in islice(lexicon.findall('record/lx'), 10):
print(field.text)
settings = ToolboxSettings()
file_path = find('corpora/toolbox/MDF/MDF_AltH.typ')
settings.open(file_path)
# settings.open(ZipFilePathPointer(zip_path, entry='toolbox/MDF/MDF_AltH.typ'))
tree = settings.parse(unwrap=False, encoding='cp1252')
print(tree.find('expset/expMDF/rtfPageSetup/paperSize').text)
settings_tree = ElementTree(tree)
print(to_settings_string(settings_tree).encode('utf8'))
示例6: train_from_file
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import ZipFilePathPointer [as 别名]
def train_from_file(self, conll_file, verbose=False):
"""
Train MaltParser from a file
:param conll_file: str for the filename of the training input data
"""
if not self._malt_bin:
raise Exception("MaltParser location is not configured. Call config_malt() first.")
# If conll_file is a ZipFilePathPointer, then we need to do some extra
# massaging
if isinstance(conll_file, ZipFilePathPointer):
input_file = tempfile.NamedTemporaryFile(prefix='malt_train.conll',
dir=self.working_dir,
delete=False)
try:
conll_str = conll_file.open().read()
conll_file.close()
input_file.write(conll_str)
input_file.close()
return self.train_from_file(input_file.name, verbose=verbose)
finally:
input_file.close()
os.remove(input_file.name)
cmd = ['java', '-jar', self._malt_bin, '-w', self.working_dir,
'-c', self.mco, '-i', conll_file, '-m', 'learn']
ret = self._execute(cmd, verbose)
if ret != 0:
raise Exception("MaltParser training (%s) "
"failed with exit code %d" %
(' '.join(cmd), ret))
self._trained = True