本文整理匯總了Python中nltk.data.PathPointer方法的典型用法代碼示例。如果您正苦於以下問題:Python data.PathPointer方法的具體用法?Python data.PathPointer怎麽用?Python data.PathPointer使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類nltk.data
的用法示例。
在下文中一共展示了data.PathPointer方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: abspath
# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import PathPointer [as 別名]
def abspath(self, fileid):
"""
Return the absolute path for the given file.
:type fileid: str
:param fileid: The file identifier for the file whose path
should be returned.
:rtype: PathPointer
"""
return self._root.join(fileid)
示例2: abspaths
# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import PathPointer [as 別名]
def abspaths(self, fileids=None, include_encoding=False,
include_fileid=False):
"""
Return a list of the absolute paths for all fileids in this corpus;
or for the given list of fileids, if specified.
:type fileids: None or str or list
:param fileids: Specifies the set of fileids for which paths should
be returned. Can be None, for all fileids; a list of
file identifiers, for a specified set of fileids; or a single
file identifier, for a single file. Note that the return
value is always a list of paths, even if ``fileids`` is a
single file identifier.
:param include_encoding: If true, then return a list of
``(path_pointer, encoding)`` tuples.
:rtype: list(PathPointer)
"""
if fileids is None:
fileids = self._fileids
elif isinstance(fileids, compat.string_types):
fileids = [fileids]
paths = [self._root.join(f) for f in fileids]
if include_encoding and include_fileid:
return list(zip(paths, [self.encoding(f) for f in fileids], fileids))
elif include_fileid:
return list(zip(paths, fileids))
elif include_encoding:
return list(zip(paths, [self.encoding(f) for f in fileids]))
else:
return paths
示例3: _open
# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import PathPointer [as 別名]
def _open(self):
"""
Open the file stream associated with this corpus view. This
will be called performed if any value is read from the view
while its file stream is closed.
"""
if isinstance(self._fileid, PathPointer):
self._stream = self._fileid.open(self._encoding)
elif self._encoding:
self._stream = SeekableUnicodeStreamReader(
open(self._fileid, 'rb'), self._encoding)
else:
self._stream = open(self._fileid, 'rb')
示例4: find_corpus_fileids
# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import PathPointer [as 別名]
def find_corpus_fileids(root, regexp):
if not isinstance(root, PathPointer):
raise TypeError('find_corpus_fileids: expected a PathPointer')
regexp += '$'
# Find fileids in a zipfile: scan the zipfile's namelist. Filter
# out entries that end in '/' -- they're directories.
if isinstance(root, ZipFilePathPointer):
fileids = [name[len(root.entry):] for name in root.zipfile.namelist()
if not name.endswith('/')]
items = [name for name in fileids if re.match(regexp, name)]
return sorted(items)
# Find fileids in a directory: use os.walk to search all (proper
# or symlinked) subdirectories, and match paths against the regexp.
elif isinstance(root, FileSystemPathPointer):
items = []
# workaround for py25 which doesn't support followlinks
kwargs = {}
if not py25():
kwargs = {'followlinks': True}
for dirname, subdirs, fileids in os.walk(root.path, **kwargs):
prefix = ''.join('%s/' % p for p in _path_from(root.path, dirname))
items += [prefix+fileid for fileid in fileids
if re.match(regexp, prefix+fileid)]
# Don't visit svn directories:
if '.svn' in subdirs: subdirs.remove('.svn')
return sorted(items)
else:
raise AssertionError("Don't know how to handle %r" % root)
示例5: abspath
# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import PathPointer [as 別名]
def abspath(self, fileid):
"""
Return the absolute path for the given file.
:type file: str
:param file: The file identifier for the file whose path
should be returned.
:rtype: PathPointer
"""
return self._root.join(fileid)
示例6: __init__
# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import PathPointer [as 別名]
def __init__(self, fileid, block_reader=None, startpos=0,
encoding='utf8'):
"""
Create a new corpus view, based on the file ``fileid``, and
read with ``block_reader``. See the class documentation
for more information.
:param fileid: The path to the file that is read by this
corpus view. ``fileid`` can either be a string or a
``PathPointer``.
:param startpos: The file position at which the view will
start reading. This can be used to skip over preface
sections.
:param encoding: The unicode encoding that should be used to
read the file's contents. If no encoding is specified,
then the file's contents will be read as a non-unicode
string (i.e., a str).
"""
if block_reader:
self.read_block = block_reader
# Initialize our toknum/filepos mapping.
self._toknum = [0]
self._filepos = [startpos]
self._encoding = encoding
# We don't know our length (number of tokens) yet.
self._len = None
self._fileid = fileid
self._stream = None
self._current_toknum = None
"""This variable is set to the index of the next token that
will be read, immediately before ``self.read_block()`` is
called. This is provided for the benefit of the block
reader, which under rare circumstances may need to know
the current token number."""
self._current_blocknum = None
"""This variable is set to the index of the next block that
will be read, immediately before ``self.read_block()`` is
called. This is provided for the benefit of the block
reader, which under rare circumstances may need to know
the current block number."""
# Find the length of the file.
try:
if isinstance(self._fileid, PathPointer):
self._eofpos = self._fileid.file_size()
else:
self._eofpos = os.stat(self._fileid).st_size
except Exception as exc:
raise ValueError('Unable to open or access %r -- %s' %
(fileid, exc))
# Maintain a cache of the most recently read block, to
# increase efficiency of random access.
self._cache = (-1, -1, None)