Python smart_open.open方法代码示例

本文整理汇总了Python中smart_open.open方法的典型用法代码示例。如果您正苦于以下问题：Python smart_open.open方法的具体用法？Python smart_open.open怎么用？Python smart_open.open使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类smart_open的用法示例。

在下文中一共展示了smart_open.open方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getitem

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def __getitem__(self, i):
        """ Returns the line indexed by i. Primarily used for 
        :meth:`~fse.models.sentencevectors.SentenceVectors.most_similar`
        
        Parameters
        ----------
        i : int
            The line index used to index the file

        Returns
        -------
        str
            line at the current index

        """
        if not self.get_able:
            raise RuntimeError("To index the lines, you must contruct with get_able=True")

        with open(self.path, "rb") as f:
            f.seek(self.line_offset[i])
            output = f.readline()
            f.seek(0)
            return any2unicode(output).rstrip()

开发者ID:oborchers，项目名称:Fast_Sentence_Embeddings，代码行数:25，代码来源:inputs.py

示例2: bigrammer

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def bigrammer(source_file, outfile, mincount=100, threshold=0.99, scoring='npmi',
              commonfile='common_tagged.txt'):
    """
    :param source_file:
    :param outfile:
    :param mincount:
    :param threshold:
    :param scoring:
    :param commonfile:
    :return:
    """
    common = set([word.strip() for word in open(commonfile, 'r').readlines()])
    data = LineSentence(source_file)
    bigram_transformer = Phrases(sentences=data, min_count=mincount, threshold=threshold,
                                 scoring=scoring, max_vocab_size=400000000, delimiter=b':::',
                                 progress_per=100000, common_terms=common)
    bigrams = Phraser(bigram_transformer)
    tempfile = open(outfile, 'a')
    print('Writing bigrammed text to %s' % outfile, file=sys.stderr)
    for i in bigrams[data]:
        tempfile.write(' '.join(i) + '\n')
    tempfile.close()
    return len(bigrams.phrasegrams)

开发者ID:akutuzov，项目名称:webvectors，代码行数:25，代码来源:helpers.py

示例3: load_glove_from_file

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def load_glove_from_file(glove_filepath: Path) -> Tuple[Dict[str, int], np.array]:
    w2i = {}
    embeddings = []

    with open(glove_filepath, "r") as fp:
        iterator = tqdm(enumerate(fp), "Embeddings") if TQDM else enumerate(fp)
        for index, line in iterator:
            line = line.split(" ")  # each line: word num1 num2 ...
            w2i[line[0]] = index  # word = line[0]
            embedding_i = np.array([float(val) for val in line[1:]])
            embeddings.append(embedding_i)

    return w2i, np.stack(embeddings)

开发者ID:feedly，项目名称:transfer-nlp，代码行数:15，代码来源:embeddings.py

示例4: report

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def report(self, name: str, experiment: ExperimentConfig, report_dir: Path):

        with open(report_dir / 'metrics_report.txt', 'w') as reporting:
            reporting.write(f"Metrics reporting for experiment {name}\n")
            reporting.write("#"*50 + '\n')

            for mode, metrics in experiment['trainer'].metrics_history.items():
                reporting.write(f"Reporting metrics in {mode} mode\n")
                for metric, values in metrics.items():
                    reporting.write(f"{metric}: [{', '.join([str(value) for value in values])}]\n")

开发者ID:feedly，项目名称:transfer-nlp，代码行数:12，代码来源:mlp_parameter_tuning.py

示例5: open_file

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def open_file(path, mode, num_tries=20, encoding='utf-8', auto_decompression=True):
    import warnings

    if is_s3_path(path) and 'r' in mode:
        client = _get_fsclient_bypath(path)
        client.wait_for_path(path)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        import smart_open

        nTry = 0
        while nTry <= num_tries:
            try:
                # TODO: support append mode for s3
                return smart_open.open(
                    path,
                    mode,
                    encoding=encoding,
                    ignore_ext=not auto_decompression,
                    transport_params=get_smart_open_transport_params(path)
                )
            except Exception as e:
                if nTry >= num_tries:
                    raise

            if 'w' in mode:
                remove_file(path)

            nTry = nTry + 1
            time.sleep(1)

开发者ID:augerai，项目名称:a2ml，代码行数:33，代码来源:fsclient.py

示例6: get_file_size

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def get_file_size(path):
    client = _get_fsclient_bypath(path)
    return client.get_file_size(path)

# @classmethod
# def openFile(cls, path, mode):
#     client = cls._get_fsclient_bypath(path)
#     return client.open(path, mode)

开发者ID:augerai，项目名称:a2ml，代码行数:10，代码来源:fsclient.py

示例7: post_image_to_actress

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def post_image_to_actress(actress_id, image_f, emby_url, api_key):
        with open(image_f, 'rb') as f:
            b6_pic = base64.b64encode(f.read())  # 读取文件内容，转换为base64编码

        url = f'{emby_url}emby/Items/{actress_id}/Images/Primary?api_key={api_key}'
        if image_f.endswith('png'):
            header = {"Content-Type": 'image/png', }
        else:
            header = {"Content-Type": 'image/jpeg', }

        requests.post(url=url, data=b6_pic, headers=header)
        print(f'successfully post actress ID: {actress_id} image')
        return 1

开发者ID:ddd354，项目名称:JAVOneStop，代码行数:15，代码来源:emby_actors.py

示例8: load_model

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def load_model(self, model: str, model_path: str, max_seq_length: int):
        try:
            encoding = 'utf-8'
            unicode_errors = 'strict'

            model_file = [f for f in os.listdir(model_path) if os.path.isfile(os.path.join(model_path, f))]
            f = open(os.path.join(model_path, model_file[0]), 'rb')

            header = to_unicode(f.readline(), encoding=encoding)
            vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format

            binary_len = dtype(real).itemsize * vector_size
            for _ in tqdm(range(vocab_size)):
                word = []
                while True:
                    ch = f.read(1)
                    if ch == b' ':
                        break
                    if ch == b'':
                        raise EOFError("unexpected end of input; is count incorrect or file otherwise damaged?")
                    if ch != b'\n':  # ignore newlines in front of words (some binary files have)
                        word.append(ch)
                word = to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors)

                weights = fromstring(f.read(binary_len), dtype=real).astype(real)

                self.word_vectors[word] = weights
            self.model_name = model
            self.max_seq_length = max_seq_length
            print("Model loaded Successfully !")
            return self
        except Exception as e:
            print('Error loading Model, ', str(e))

开发者ID:amansrivastava17，项目名称:embedding-as-service，代码行数:35，代码来源:__init__.py

示例9: read_index

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def read_index(pack_index_path: str) -> Dict[str, str]:
    page_idx: Dict[str, str] = {}

    logging.info("Reading pack index from %s", pack_index_path)

    with open(pack_index_path) as idx:
        for page_name, page_path in csv.reader(idx, delimiter='\t'):
            page_idx[page_name] = page_path
    return page_idx

开发者ID:asyml，项目名称:forte，代码行数:11，代码来源:dbpedia_infobox_reader.py

示例10: _parse_pack

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def _parse_pack(
            self, collection: Tuple[str, Dict[str, List[state_type]]]
    ) -> Iterator[DataPack]:
        resource_name, info_box_data = collection

        if resource_name in self.redirects:
            resource_name = self.redirects[resource_name]

        if resource_name in self.pack_index:
            print_progress(f'Add infobox to resource: [{resource_name}]')

            pack_path = os.path.join(
                self.pack_dir,
                self.pack_index[resource_name]
            )

            if os.path.exists(pack_path):
                with open(pack_path) as pack_file:
                    pack = data_utils.deserialize(
                        self._pack_manager, pack_file.read())

                    add_info_boxes(pack, info_box_data['literals'])
                    add_info_boxes(pack, info_box_data['objects'])
                    add_property(pack, info_box_data['properties'])
                    yield pack
        else:
            print_notice(f"Resource {resource_name} is not in the raw packs.")
            self.logger.warning("Resource %s is not in the raw packs.",
                                resource_name)

开发者ID:asyml，项目名称:forte，代码行数:31，代码来源:dbpedia_infobox_reader.py

示例11: _build_offsets

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def _build_offsets(self):
        """ Builds an offset table to index the file """
        with open(self.path, "rb") as f:
            offset = f.tell()
            for line in f:
                self.line_offset.append(offset)
                offset += len(line)

开发者ID:oborchers，项目名称:Fast_Sentence_Embeddings，代码行数:9，代码来源:inputs.py

示例12: iter

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def __iter__(self):
        """Iterate through the lines in the source.

        Yields
        ------
        tuple : (list[str], int)
            Tuple of list of string and index

        """
        with open(self.path, "rb") as f:
            for i, line in enumerate(f):
                yield (any2unicode(line).split(), i)

开发者ID:oborchers，项目名称:Fast_Sentence_Embeddings，代码行数:14，代码来源:inputs.py

示例13: load_object_from_file

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def load_object_from_file(path, use_local_cache=False):
    import joblib

    path_to_load = None
    if is_s3_path(path):
        if use_local_cache:
            local_path = path.replace(
                "s3://"+os.environ.get('S3_DATA_PATH'), os.environ.get("AUGER_LOCAL_TMP_DIR", ''))
            #logging.info("Local cache path: %s"%local_path)
            if not is_file_exists(local_path):
                local_lock_path = local_path + '.lock'
                create_parent_folder(local_lock_path)
                f_lock = None
                try:
                    f_lock = open(local_lock_path, 'x')
                except Exception as e:
                    #logging.exception("Open lock file failed.")
                    pass

                if f_lock:
                    try:
                        if not is_file_exists(local_path):
                            with save_atomic(local_path) as local_tmp_path:
                                logging.info("Download file from s3 to: %s, temp folder: %s" % (
                                    local_path, local_tmp_path))
                                download_file(path, local_tmp_path)
                    finally:
                        f_lock.close()
                        remove_file(local_lock_path)
                else:
                    wait_for_file(local_path, True,
                                  num_tries=300, interval_sec=10)

            path_to_load = local_path
        else:
            with save_atomic(path, move_file=False) as local_tmp_path:
                download_file(path, local_tmp_path)
                return joblib.load(local_tmp_path)
    else:
        path_to_load = path

    return joblib.load(path_to_load)

开发者ID:augerai，项目名称:a2ml，代码行数:44，代码来源:fsclient.py

示例14: load_df_from_s3

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def load_df_from_s3(
    aws_key: str,
    aws_secret: str,
    bucket_name: str,
    file_path: str,
    skiprows: Optional[int] = 0,
    skipfooter: Optional[int] = 0,
) -> pd.DataFrame:
    """Load data from a S3 bucket.

    Given a file object, try to read the content and transform it into a data
    frame.

    It also tries to convert as many columns as possible to date/time format
    (testing the conversion on every string column).

    :param aws_key: Key to access the S3 bucket
    :param aws_secret: Secret to access the S3 bucket
    :param bucket_name: Bucket name
    :param file_path: Path to access the file within the bucket
    :param skiprows: Number of lines to skip at the top of the document
    :param skipfooter: Number of lines to skip at the bottom of the document
    :return: Resulting data frame, or an Exception.
    """
    path_prefix = ''
    if aws_key and aws_secret:
        # If key/secret are given, create prefix
        path_prefix = '{0}:{1}@'.format(aws_key, aws_secret)

    if settings.ONTASK_TESTING:
        uri = 'file:///{0}/{1}'.format(
            bucket_name,
            file_path)
    else:
        uri = 's3://{0}{1}/{2}'.format(
            path_prefix,
            bucket_name,
            file_path)
    data_frame = pd.read_csv(
        smart_open.open(uri),
        index_col=False,
        infer_datetime_format=True,
        quotechar='"',
        skiprows=skiprows,
        skipfooter=skipfooter,
        encoding='utf-8',
    )

    # Strip white space from all string columns and try to convert to
    # datetime just in case
    return pandas.detect_datetime_columns(data_frame)

开发者ID:abelardopardo，项目名称:ontask_b，代码行数:53，代码来源:dataframeupload.py

示例15: _parse

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def _parse(self, path: Union[Path, str]) -> None:
        section_name: str = ''
        sample_header: Optional[List[str]] = None

        with open(path, encoding=self._encoding) as handle:
            lines = list(csv.reader(handle, skipinitialspace=True))

        for i, line in enumerate(lines):
            # Skip to next line if this line is empty to support formats of
            # sample sheets with multiple newlines as section seperators.
            #
            #   https://github.com/clintval/sample-sheet/issues/46
            #
            if not ''.join(line).strip():
                continue

            # Raise exception if we encounter invalid characters.
            if any(
                character not in VALID_ASCII
                for character in set(''.join(line))
            ):
                raise ValueError(
                    f'Sample sheet contains invalid characters on line '
                    f'{i + 1}: {"".join(line)}'
                )

            header_match = self._section_header_re.match(line[0])

            # If we enter a section save it's name and continue to next line.
            if header_match:
                section_name, *_ = header_match.groups()
                if (
                    section_name not in self._sections
                    and section_name not in REQUIRED_SECTIONS
                ):
                    self.add_section(section_name)
                continue

            # [Reads] - vertical list of integers.
            if section_name == 'Reads':
                self.Reads.append(int(line[0]))
                continue

            # [Data] - delimited data with the first line a header.
            elif section_name == 'Data':
                if sample_header is not None:
                    self.add_sample(Sample(dict(zip(sample_header, line))))
                elif any(key == '' for key in line):
                    raise ValueError(
                        f'Header for [Data] section is not allowed to '
                        f'have empty fields: {line}'
                    )
                else:
                    sample_header = line
                continue

            # [<Other>] - keys in first column and values in second column.
            elif len(line) >= 2:
                key, value = (line[0], line[1])
                section: Section = getattr(self, section_name)
                section[key] = value

开发者ID:clintval，项目名称:sample-sheet，代码行数:63，代码来源:__init__.py

注：本文中的smart_open.open方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

示例1: __getitem__