当前位置: 首页>>代码示例>>Python>>正文


Python IO.close方法代码示例

本文整理汇总了Python中typing.IO.close方法的典型用法代码示例。如果您正苦于以下问题:Python IO.close方法的具体用法?Python IO.close怎么用?Python IO.close使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在typing.IO的用法示例。


在下文中一共展示了IO.close方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _get_single_df

# 需要导入模块: from typing import IO [as 别名]
# 或者: from typing.IO import close [as 别名]
    def _get_single_df(
        stream: IO, filetype: Optional[TypeEnum], **kwargs
    ) -> Union[pd.DataFrame, Iterable[pd.DataFrame]]:
        """
        Read a stream and retrieve the data frame or data frame generator (chunks)
        It uses `stream.name`, which is the path to a local file (often temporary)
        to avoid closing it. It will be closed at the end of the method.
        """
        if filetype is None:
            filetype = TypeEnum(detect_type(stream.name))

        # Check encoding
        encoding = kwargs.get('encoding')
        if not validate_encoding(stream.name, encoding):
            encoding = detect_encoding(stream.name)
        kwargs['encoding'] = encoding

        # Check separator for CSV files if it's not set
        if filetype is TypeEnum.CSV and 'sep' not in kwargs:
            if not validate_sep(stream.name, encoding=encoding):
                kwargs['sep'] = detect_sep(stream.name, encoding)

        pd_read = getattr(pd, f'read_{filetype}')
        try:
            df = pd_read(stream.name, **kwargs)
        finally:
            stream.close()

        # In case of sheets, the df can be a dictionary
        if kwargs.get('sheet_name', NOTSET) is None:
            for sheet_name, _df in df.items():
                _df['__sheet__'] = sheet_name
            df = pd.concat(df.values(), sort=False)

        return df
开发者ID:ToucanToco,项目名称:peakina,代码行数:37,代码来源:datasource.py

示例2: embed_file

# 需要导入模块: from typing import IO [as 别名]
# 或者: from typing.IO import close [as 别名]
    def embed_file(self,
                   input_file: IO,
                   output_file_path: str,
                   output_format: str = "all",
                   batch_size: int = DEFAULT_BATCH_SIZE) -> None:
        """
        Computes ELMo embeddings from an input_file where each line contains a sentence tokenized by whitespace.
        The ELMo embeddings are written out in HDF5 format, where each sentences is saved in a dataset.

        Parameters
        ----------
        input_file : ``IO``, required
            A file with one tokenized sentence per line.
        output_file_path : ``str``, required
            A path to the output hdf5 file.
        output_format : ``str``, optional, (default = "all")
            The embeddings to output.  Must be one of "all", "top", or "average".
        batch_size : ``int``, optional, (default = 64)
            The number of sentences to process in ELMo at one time.
        """

        assert output_format in ["all", "top", "average"]

        # Tokenizes the sentences.
        sentences = [line.strip() for line in input_file if line.strip()]
        split_sentences = [sentence.split() for sentence in sentences]
        # Uses the sentence as the key.
        embedded_sentences = zip(sentences, self.embed_sentences(split_sentences, batch_size))

        logger.info("Processing sentences.")
        with h5py.File(output_file_path, 'w') as fout:
            for key, embeddings in Tqdm.tqdm(embedded_sentences):
                if key in fout.keys():
                    logger.warning(f"Key already exists in {output_file_path}, skipping: {key}")
                else:
                    if output_format == "all":
                        output = embeddings
                    elif output_format == "top":
                        output = embeddings[2]
                    elif output_format == "average":
                        output = numpy.average(embeddings, axis=0)

                    fout.create_dataset(
                            key,
                            output.shape, dtype='float32',
                            data=output
                    )
        input_file.close()
开发者ID:Jordan-Sauchuk,项目名称:allennlp,代码行数:50,代码来源:elmo.py

示例3: embed_file

# 需要导入模块: from typing import IO [as 别名]
# 或者: from typing.IO import close [as 别名]
    def embed_file(self,
                   input_file: IO,
                   output_file_path: str,
                   output_format: str = "all",
                   batch_size: int = DEFAULT_BATCH_SIZE,
                   forget_sentences: bool = False,
                   use_sentence_keys: bool = False) -> None:
        """
        Computes ELMo embeddings from an input_file where each line contains a sentence tokenized by whitespace.
        The ELMo embeddings are written out in HDF5 format, where each sentence embedding
        is saved in a dataset with the line number in the original file as the key.

        Parameters
        ----------
        input_file : ``IO``, required
            A file with one tokenized sentence per line.
        output_file_path : ``str``, required
            A path to the output hdf5 file.
        output_format : ``str``, optional, (default = "all")
            The embeddings to output.  Must be one of "all", "top", or "average".
        batch_size : ``int``, optional, (default = 64)
            The number of sentences to process in ELMo at one time.
        forget_sentences : ``bool``, optional, (default = False).
            If use_sentence_keys is False, whether or not to include a string
            serialized JSON dictionary that associates sentences with their
            line number (its HDF5 key). The mapping is placed in the
            "sentence_to_index" HDF5 key. This is useful if
            you want to use the embeddings without keeping the original file
            of sentences around.
        use_sentence_keys : ``bool``, optional, (default = False).
            Whether or not to use full sentences as keys. By default,
            the line numbers of the input file are used as ids, which is more robust.
        """

        assert output_format in ["all", "top", "average"]

        # Tokenizes the sentences.
        sentences = [line.strip() for line in input_file]

        blank_lines = [i for (i, line) in enumerate(sentences) if line == ""]
        if blank_lines:
            raise ConfigurationError(f"Your input file contains empty lines at indexes "
                                     f"{blank_lines}. Please remove them.")
        split_sentences = [sentence.split() for sentence in sentences]
        # Uses the sentence index as the key.

        if use_sentence_keys:
            logger.warning("Using sentences as keys can fail if sentences "
                           "contain forward slashes or colons. Use with caution.")
            embedded_sentences = zip(sentences, self.embed_sentences(split_sentences, batch_size))
        else:
            embedded_sentences = ((str(i), x) for i, x in
                                  enumerate(self.embed_sentences(split_sentences, batch_size)))

        sentence_to_index = {}
        logger.info("Processing sentences.")
        with h5py.File(output_file_path, 'w') as fout:
            for key, embeddings in Tqdm.tqdm(embedded_sentences):
                if use_sentence_keys and key in fout.keys():
                    raise ConfigurationError(f"Key already exists in {output_file_path}. "
                                             f"To encode duplicate sentences, do not pass "
                                             f"the --use-sentence-keys flag.")

                if not forget_sentences and not use_sentence_keys:
                    sentence = sentences[int(key)]
                    sentence_to_index[sentence] = key

                if output_format == "all":
                    output = embeddings
                elif output_format == "top":
                    output = embeddings[-1]
                elif output_format == "average":
                    output = numpy.average(embeddings, axis=0)

                fout.create_dataset(
                        str(key),
                        output.shape, dtype='float32',
                        data=output
                )
            if not forget_sentences and not use_sentence_keys:
                sentence_index_dataset = fout.create_dataset(
                        "sentence_to_index",
                        (1,),
                        dtype=h5py.special_dtype(vlen=str))
                sentence_index_dataset[0] = json.dumps(sentence_to_index)

        input_file.close()
开发者ID:apmoore1,项目名称:allennlp,代码行数:89,代码来源:elmo.py


注:本文中的typing.IO.close方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。