本文整理匯總了Python中typing.IO.close方法的典型用法代碼示例。如果您正苦於以下問題:Python IO.close方法的具體用法?Python IO.close怎麽用?Python IO.close使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類typing.IO
的用法示例。
在下文中一共展示了IO.close方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _get_single_df
# 需要導入模塊: from typing import IO [as 別名]
# 或者: from typing.IO import close [as 別名]
def _get_single_df(
stream: IO, filetype: Optional[TypeEnum], **kwargs
) -> Union[pd.DataFrame, Iterable[pd.DataFrame]]:
"""
Read a stream and retrieve the data frame or data frame generator (chunks)
It uses `stream.name`, which is the path to a local file (often temporary)
to avoid closing it. It will be closed at the end of the method.
"""
if filetype is None:
filetype = TypeEnum(detect_type(stream.name))
# Check encoding
encoding = kwargs.get('encoding')
if not validate_encoding(stream.name, encoding):
encoding = detect_encoding(stream.name)
kwargs['encoding'] = encoding
# Check separator for CSV files if it's not set
if filetype is TypeEnum.CSV and 'sep' not in kwargs:
if not validate_sep(stream.name, encoding=encoding):
kwargs['sep'] = detect_sep(stream.name, encoding)
pd_read = getattr(pd, f'read_{filetype}')
try:
df = pd_read(stream.name, **kwargs)
finally:
stream.close()
# In case of sheets, the df can be a dictionary
if kwargs.get('sheet_name', NOTSET) is None:
for sheet_name, _df in df.items():
_df['__sheet__'] = sheet_name
df = pd.concat(df.values(), sort=False)
return df
示例2: embed_file
# 需要導入模塊: from typing import IO [as 別名]
# 或者: from typing.IO import close [as 別名]
def embed_file(self,
input_file: IO,
output_file_path: str,
output_format: str = "all",
batch_size: int = DEFAULT_BATCH_SIZE) -> None:
"""
Computes ELMo embeddings from an input_file where each line contains a sentence tokenized by whitespace.
The ELMo embeddings are written out in HDF5 format, where each sentences is saved in a dataset.
Parameters
----------
input_file : ``IO``, required
A file with one tokenized sentence per line.
output_file_path : ``str``, required
A path to the output hdf5 file.
output_format : ``str``, optional, (default = "all")
The embeddings to output. Must be one of "all", "top", or "average".
batch_size : ``int``, optional, (default = 64)
The number of sentences to process in ELMo at one time.
"""
assert output_format in ["all", "top", "average"]
# Tokenizes the sentences.
sentences = [line.strip() for line in input_file if line.strip()]
split_sentences = [sentence.split() for sentence in sentences]
# Uses the sentence as the key.
embedded_sentences = zip(sentences, self.embed_sentences(split_sentences, batch_size))
logger.info("Processing sentences.")
with h5py.File(output_file_path, 'w') as fout:
for key, embeddings in Tqdm.tqdm(embedded_sentences):
if key in fout.keys():
logger.warning(f"Key already exists in {output_file_path}, skipping: {key}")
else:
if output_format == "all":
output = embeddings
elif output_format == "top":
output = embeddings[2]
elif output_format == "average":
output = numpy.average(embeddings, axis=0)
fout.create_dataset(
key,
output.shape, dtype='float32',
data=output
)
input_file.close()
示例3: embed_file
# 需要導入模塊: from typing import IO [as 別名]
# 或者: from typing.IO import close [as 別名]
def embed_file(self,
input_file: IO,
output_file_path: str,
output_format: str = "all",
batch_size: int = DEFAULT_BATCH_SIZE,
forget_sentences: bool = False,
use_sentence_keys: bool = False) -> None:
"""
Computes ELMo embeddings from an input_file where each line contains a sentence tokenized by whitespace.
The ELMo embeddings are written out in HDF5 format, where each sentence embedding
is saved in a dataset with the line number in the original file as the key.
Parameters
----------
input_file : ``IO``, required
A file with one tokenized sentence per line.
output_file_path : ``str``, required
A path to the output hdf5 file.
output_format : ``str``, optional, (default = "all")
The embeddings to output. Must be one of "all", "top", or "average".
batch_size : ``int``, optional, (default = 64)
The number of sentences to process in ELMo at one time.
forget_sentences : ``bool``, optional, (default = False).
If use_sentence_keys is False, whether or not to include a string
serialized JSON dictionary that associates sentences with their
line number (its HDF5 key). The mapping is placed in the
"sentence_to_index" HDF5 key. This is useful if
you want to use the embeddings without keeping the original file
of sentences around.
use_sentence_keys : ``bool``, optional, (default = False).
Whether or not to use full sentences as keys. By default,
the line numbers of the input file are used as ids, which is more robust.
"""
assert output_format in ["all", "top", "average"]
# Tokenizes the sentences.
sentences = [line.strip() for line in input_file]
blank_lines = [i for (i, line) in enumerate(sentences) if line == ""]
if blank_lines:
raise ConfigurationError(f"Your input file contains empty lines at indexes "
f"{blank_lines}. Please remove them.")
split_sentences = [sentence.split() for sentence in sentences]
# Uses the sentence index as the key.
if use_sentence_keys:
logger.warning("Using sentences as keys can fail if sentences "
"contain forward slashes or colons. Use with caution.")
embedded_sentences = zip(sentences, self.embed_sentences(split_sentences, batch_size))
else:
embedded_sentences = ((str(i), x) for i, x in
enumerate(self.embed_sentences(split_sentences, batch_size)))
sentence_to_index = {}
logger.info("Processing sentences.")
with h5py.File(output_file_path, 'w') as fout:
for key, embeddings in Tqdm.tqdm(embedded_sentences):
if use_sentence_keys and key in fout.keys():
raise ConfigurationError(f"Key already exists in {output_file_path}. "
f"To encode duplicate sentences, do not pass "
f"the --use-sentence-keys flag.")
if not forget_sentences and not use_sentence_keys:
sentence = sentences[int(key)]
sentence_to_index[sentence] = key
if output_format == "all":
output = embeddings
elif output_format == "top":
output = embeddings[-1]
elif output_format == "average":
output = numpy.average(embeddings, axis=0)
fout.create_dataset(
str(key),
output.shape, dtype='float32',
data=output
)
if not forget_sentences and not use_sentence_keys:
sentence_index_dataset = fout.create_dataset(
"sentence_to_index",
(1,),
dtype=h5py.special_dtype(vlen=str))
sentence_index_dataset[0] = json.dumps(sentence_to_index)
input_file.close()