本文整理汇总了Python中nltk.data.PathPointer方法的典型用法代码示例。如果您正苦于以下问题:Python data.PathPointer方法的具体用法?Python data.PathPointer怎么用?Python data.PathPointer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.data
的用法示例。
在下文中一共展示了data.PathPointer方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: abspath
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import PathPointer [as 别名]
def abspath(self, fileid):
"""
Return the absolute path for the given file.
:type fileid: str
:param fileid: The file identifier for the file whose path
should be returned.
:rtype: PathPointer
"""
return self._root.join(fileid)
示例2: abspaths
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import PathPointer [as 别名]
def abspaths(self, fileids=None, include_encoding=False,
include_fileid=False):
"""
Return a list of the absolute paths for all fileids in this corpus;
or for the given list of fileids, if specified.
:type fileids: None or str or list
:param fileids: Specifies the set of fileids for which paths should
be returned. Can be None, for all fileids; a list of
file identifiers, for a specified set of fileids; or a single
file identifier, for a single file. Note that the return
value is always a list of paths, even if ``fileids`` is a
single file identifier.
:param include_encoding: If true, then return a list of
``(path_pointer, encoding)`` tuples.
:rtype: list(PathPointer)
"""
if fileids is None:
fileids = self._fileids
elif isinstance(fileids, compat.string_types):
fileids = [fileids]
paths = [self._root.join(f) for f in fileids]
if include_encoding and include_fileid:
return list(zip(paths, [self.encoding(f) for f in fileids], fileids))
elif include_fileid:
return list(zip(paths, fileids))
elif include_encoding:
return list(zip(paths, [self.encoding(f) for f in fileids]))
else:
return paths
示例3: _open
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import PathPointer [as 别名]
def _open(self):
"""
Open the file stream associated with this corpus view. This
will be called performed if any value is read from the view
while its file stream is closed.
"""
if isinstance(self._fileid, PathPointer):
self._stream = self._fileid.open(self._encoding)
elif self._encoding:
self._stream = SeekableUnicodeStreamReader(
open(self._fileid, 'rb'), self._encoding)
else:
self._stream = open(self._fileid, 'rb')
示例4: find_corpus_fileids
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import PathPointer [as 别名]
def find_corpus_fileids(root, regexp):
if not isinstance(root, PathPointer):
raise TypeError('find_corpus_fileids: expected a PathPointer')
regexp += '$'
# Find fileids in a zipfile: scan the zipfile's namelist. Filter
# out entries that end in '/' -- they're directories.
if isinstance(root, ZipFilePathPointer):
fileids = [name[len(root.entry):] for name in root.zipfile.namelist()
if not name.endswith('/')]
items = [name for name in fileids if re.match(regexp, name)]
return sorted(items)
# Find fileids in a directory: use os.walk to search all (proper
# or symlinked) subdirectories, and match paths against the regexp.
elif isinstance(root, FileSystemPathPointer):
items = []
# workaround for py25 which doesn't support followlinks
kwargs = {}
if not py25():
kwargs = {'followlinks': True}
for dirname, subdirs, fileids in os.walk(root.path, **kwargs):
prefix = ''.join('%s/' % p for p in _path_from(root.path, dirname))
items += [prefix+fileid for fileid in fileids
if re.match(regexp, prefix+fileid)]
# Don't visit svn directories:
if '.svn' in subdirs: subdirs.remove('.svn')
return sorted(items)
else:
raise AssertionError("Don't know how to handle %r" % root)
示例5: abspath
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import PathPointer [as 别名]
def abspath(self, fileid):
"""
Return the absolute path for the given file.
:type file: str
:param file: The file identifier for the file whose path
should be returned.
:rtype: PathPointer
"""
return self._root.join(fileid)
示例6: __init__
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import PathPointer [as 别名]
def __init__(self, fileid, block_reader=None, startpos=0,
encoding='utf8'):
"""
Create a new corpus view, based on the file ``fileid``, and
read with ``block_reader``. See the class documentation
for more information.
:param fileid: The path to the file that is read by this
corpus view. ``fileid`` can either be a string or a
``PathPointer``.
:param startpos: The file position at which the view will
start reading. This can be used to skip over preface
sections.
:param encoding: The unicode encoding that should be used to
read the file's contents. If no encoding is specified,
then the file's contents will be read as a non-unicode
string (i.e., a str).
"""
if block_reader:
self.read_block = block_reader
# Initialize our toknum/filepos mapping.
self._toknum = [0]
self._filepos = [startpos]
self._encoding = encoding
# We don't know our length (number of tokens) yet.
self._len = None
self._fileid = fileid
self._stream = None
self._current_toknum = None
"""This variable is set to the index of the next token that
will be read, immediately before ``self.read_block()`` is
called. This is provided for the benefit of the block
reader, which under rare circumstances may need to know
the current token number."""
self._current_blocknum = None
"""This variable is set to the index of the next block that
will be read, immediately before ``self.read_block()`` is
called. This is provided for the benefit of the block
reader, which under rare circumstances may need to know
the current block number."""
# Find the length of the file.
try:
if isinstance(self._fileid, PathPointer):
self._eofpos = self._fileid.file_size()
else:
self._eofpos = os.stat(self._fileid).st_size
except Exception as exc:
raise ValueError('Unable to open or access %r -- %s' %
(fileid, exc))
# Maintain a cache of the most recently read block, to
# increase efficiency of random access.
self._cache = (-1, -1, None)