当前位置: 首页>>代码示例>>Python>>正文


Python smart_open.smart_open方法代码示例

本文整理汇总了Python中smart_open.smart_open方法的典型用法代码示例。如果您正苦于以下问题:Python smart_open.smart_open方法的具体用法?Python smart_open.smart_open怎么用?Python smart_open.smart_open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在smart_open的用法示例。


在下文中一共展示了smart_open.smart_open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __iter__

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def __iter__(self):
        """Streams relations from self.file_path decoded into unicode strings.

        Yields
        -------
        2-tuple (unicode, unicode)
            Relation from input file.
        """
        with smart_open(self.file_path) as file_obj:
            if sys.version_info[0] < 3:
                lines = file_obj
            else:
                lines = (l.decode(self.encoding) for l in file_obj)
            # csv.reader requires bytestring input in python2, unicode input in python3
            reader = csv.reader(lines, delimiter=self.delimiter)
            for row in reader:
                if sys.version_info[0] < 3:
                    row = [value.decode(self.encoding) for value in row]
                (u,v) = tuple(row) # Swap line in the csv file because we want the correct edge direction.
                assert u != v
                if self.reverse:
                    yield (v,u)
                else:
                    yield (u,v) 
开发者ID:dalab,项目名称:hyperbolic_cones,代码行数:26,代码来源:relations.py

示例2: _get_file

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def _get_file(self):
        if not self.cur_file or self.bytes_written >= self.max_file_size:
            if self.cur_file:
                self.cur_file.close()
            timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
            path = os.path.join(
                self.path, "output-{}_worker-{}_{}.json".format(
                    timestr, self.ioctx.worker_index, self.file_index))
            if self.path_is_uri:
                if smart_open is None:
                    raise ValueError(
                        "You must install the `smart_open` module to write "
                        "to URIs like {}".format(path))
                self.cur_file = smart_open(path, "w")
            else:
                self.cur_file = open(path, "w")
            self.file_index += 1
            self.bytes_written = 0
            logger.info("Writing to new output file {}".format(self.cur_file))
        return self.cur_file 
开发者ID:ray-project,项目名称:ray,代码行数:22,代码来源:json_writer.py

示例3: _next_line

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def _next_line(self):
        if not self.cur_file:
            self.cur_file = self._next_file()
        line = self.cur_file.readline()
        tries = 0
        while not line and tries < 100:
            tries += 1
            if hasattr(self.cur_file, "close"):  # legacy smart_open impls
                self.cur_file.close()
            self.cur_file = self._next_file()
            line = self.cur_file.readline()
            if not line:
                logger.debug("Ignoring empty file {}".format(self.cur_file))
        if not line:
            raise ValueError("Failed to read next line from files: {}".format(
                self.files))
        return line 
开发者ID:ray-project,项目名称:ray,代码行数:19,代码来源:json_reader.py

示例4: fit

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def fit(self, data):
        require(lore.dependencies.SMART_OPEN)
        from smart_open import smart_open

        with timer('fit %s' % self.name, logging.DEBUG):
            self.missing_value = numpy.asarray([0.0] * self.dimensions, dtype=numpy.float32)

            if not Glove.map:
                Glove.map = {}
                Glove.inverse = {}

                path = os.path.join('encoders', 'glove.6B.%dd.txt.gz' % self.dimensions)
                local = lore.io.download(path)
                for line in smart_open(local):
                    values = line.split()
                    word = values[0]
                    parameters = numpy.asarray(values[1:], dtype=numpy.float32)
                    Glove.map[word] = parameters
                    Glove.inverse[tuple(parameters.tolist())] = word

            self.map = Glove.map
            self.inverse = Glove.inverse 
开发者ID:instacart,项目名称:lore,代码行数:24,代码来源:encoders.py

示例5: __init__

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def __init__(self, args):
        self.args = args
        if not args.create_vocab:
            logger.info('[ Reading vocab files from {}]'.format(args.vocab_dir))
            self.tok2ind = json.load(open(args.vocab_dir+'tok2ind.json'))
            self.ind2tok = json.load(open(args.vocab_dir+'ind2tok.json'))

        else:
            self.tok2ind = {self.NULL: 0, self.UNK: 1}
            self.ind2tok = {0: self.NULL, 1: self.UNK}
            self.oov_words = {}

            # Index words in embedding file
            if args.pretrained_words and args.embedding_file:
                logger.info('[ Indexing words in embedding file... ]')
                self.valid_words = set()
                with smart_open(args.embedding_file) as f:
                    for line in f:
                        w = self.normalize(line.decode('utf-8').rstrip().split(' ')[0])
                        self.valid_words.add(w)
                logger.info('[ Num words in set = %d ]' % len(self.valid_words))
            else:
                self.valid_words = None 
开发者ID:rajarshd,项目名称:Multi-Step-Reasoning,代码行数:25,代码来源:data.py

示例6: smart_open

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def smart_open(fname, mode='rb'):
        _, ext = os.path.splitext(fname)
        if ext == '.bz2':
            from bz2 import BZ2File
            return make_closing(BZ2File)(fname, mode)
        if ext == '.gz':
            from gzip import GzipFile
            return make_closing(GzipFile)(fname, mode)
        return open(fname, mode)


# noinspection PyUnresolvedReferences 
开发者ID:hankcs,项目名称:pyhanlp,代码行数:14,代码来源:util.py

示例7: glove2word2vec

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def glove2word2vec(glove_vector_file, output_model_file):
    """Convert GloVe vectors into word2vec C format"""

    def get_info(glove_file_name):
        """Return the number of vectors and dimensions in a file in GloVe format."""
        with smart_open.smart_open(glove_file_name) as f:
            num_lines = sum(1 for line in f)
        with smart_open.smart_open(glove_file_name) as f:
            num_dims = len(f.readline().split()) - 1
        return num_lines, num_dims

    def prepend_line(infile, outfile, line):
        """
        Function to prepend lines using smart_open
        """
        with smart_open.smart_open(infile, 'rb') as old:
            with smart_open.smart_open(outfile, 'wb') as new:
                new.write(str(line.strip()) + "\n")
                for line in old:
                    new.write(line)
        return outfile

    num_lines, dims = get_info(glove_vector_file)

    logger.info('%d lines with %s dimensions' % (num_lines, dims))

    gensim_first_line = "{} {}".format(num_lines, dims)
    model_file = prepend_line(glove_vector_file, output_model_file, gensim_first_line)

    logger.info('Model %s successfully created !!'%output_model_file)

    # Demo: Loads the newly created glove_model.txt into gensim API.
    model = gensim.models.Word2Vec.load_word2vec_format(model_file, binary=False) #GloVe Model

    logger.info('Most similar to king are: %s' % model.most_similar(positive=['king'], topn=10))
    logger.info('Similarity score between woman and man is %s ' % model.similarity('woman', 'man'))

    logger.info("Finished running %s", program)

    return model_file 
开发者ID:manasRK,项目名称:glove-gensim,代码行数:42,代码来源:glove2word2vec.py

示例8: smart_open

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def smart_open(fname, mode='rb'):
        _, ext = os.path.splitext(fname)
        if ext == '.bz2':
            from bz2 import BZ2File
            return make_closing(BZ2File)(fname, mode)
        if ext == '.gz':
            from gzip import GzipFile
            return make_closing(GzipFile)(fname, mode)
        return open(fname, mode) 
开发者ID:huyingxi,项目名称:Synonyms,代码行数:11,代码来源:utils.py

示例9: file_or_filename

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def file_or_filename(input):
    """
    Return a file-like object ready to be read from the beginning. `input` is either
    a filename (gz/bz2 also supported) or a file-like object supporting seek.

    """
    if isinstance(input, string_types):
        # input was a filename: open as file
        yield smart_open(input)
    else:
        # input already a file-like object; just reset to the beginning
        input.seek(0)
        yield input 
开发者ID:huyingxi,项目名称:Synonyms,代码行数:15,代码来源:utils.py

示例10: write

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def write(self, sample_batch):
        start = time.time()
        data = _to_json(sample_batch, self.compress_columns)
        f = self._get_file()
        f.write(data)
        f.write("\n")
        if hasattr(f, "flush"):  # legacy smart_open impls
            f.flush()
        self.bytes_written += len(data)
        logger.debug("Wrote {} bytes to {} in {}s".format(
            len(data), f,
            time.time() - start)) 
开发者ID:ray-project,项目名称:ray,代码行数:14,代码来源:json_writer.py

示例11: _next_file

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def _next_file(self):
        path = random.choice(self.files)
        if urlparse(path).scheme not in ["", "c"]:
            if smart_open is None:
                raise ValueError(
                    "You must install the `smart_open` module to read "
                    "from URIs like {}".format(path))
            return smart_open(path, "r")
        else:
            return open(path, "r") 
开发者ID:ray-project,项目名称:ray,代码行数:12,代码来源:json_reader.py

示例12: _from_json

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def _from_json(batch):
    if isinstance(batch, bytes):  # smart_open S3 doesn't respect "r"
        batch = batch.decode("utf-8")
    data = json.loads(batch)

    if "type" in data:
        data_type = data.pop("type")
    else:
        raise ValueError("JSON record missing 'type' field")

    if data_type == "SampleBatch":
        for k, v in data.items():
            data[k] = unpack_if_needed(v)
        return SampleBatch(data)
    elif data_type == "MultiAgentBatch":
        policy_batches = {}
        for policy_id, policy_batch in data["policy_batches"].items():
            inner = {}
            for k, v in policy_batch.items():
                inner[k] = unpack_if_needed(v)
            policy_batches[policy_id] = SampleBatch(inner)
        return MultiAgentBatch(policy_batches, data["count"])
    else:
        raise ValueError(
            "Type field must be one of ['SampleBatch', 'MultiAgentBatch']",
            data_type) 
开发者ID:ray-project,项目名称:ray,代码行数:28,代码来源:json_reader.py

示例13: main

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def main():
    args = parse_arguments()

    # open file
    file_handler = smart_open(args.diversity_file)

    # create analysis object
    default_class = RandomnessGenerator(file_handler)
    default_class.run()

    default_class.viz.save(filename=args.svg_file)

    print(f"{default_class.viz.over_called_pixels} pixels overrepresented.") 
开发者ID:r-bioinformatics,项目名称:collaboration,代码行数:15,代码来源:randomness.py

示例14: load_embeddings

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def load_embeddings(args, word_dict):

    embeddings = torch.Tensor(len(word_dict), args.embedding_dim_orig)
    if not os.path.isfile(args.embedding_table):
        logger.info("Initializing embedding table randomly...")
        embeddings.normal_(0, 1)
        embeddings[0].fill_(0)

        # Fill in embeddings
        with smart_open(args.embedding_file) as f:
            for line in f:
                line = line.decode('utf-8')
                parsed = line.rstrip().split(' ')
                assert (len(parsed) == args.embedding_dim_orig + 1)
                w = word_dict.normalize(parsed[0])
                if w in word_dict:
                    vec = torch.Tensor([float(i) for i in parsed[1:]])
                    embeddings[word_dict[w]].copy_(vec)
        # save the embedding table
        logger.info('Saving the embedding table')
        torch.save(embeddings, args.embedding_table)
    else:
        logger.info('Loading embeddings from saved embeddings table')
        embeddings = torch.load(args.embedding_table)
    return embeddings

#
# ------------------------------------------------------------------------------
# Utility classes
# ------------------------------------------------------------------------------ 
开发者ID:rajarshd,项目名称:Multi-Step-Reasoning,代码行数:32,代码来源:utils.py

示例15: _parse

# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import smart_open [as 别名]
def _parse(self, path: Union[Path, str]) -> None:
        section_name: str = ''
        sample_header: Optional[List[str]] = None

        with open(path, encoding=self._encoding) as handle:
            lines = list(csv.reader(handle, skipinitialspace=True))

        for i, line in enumerate(lines):
            # Skip to next line if this line is empty to support formats of
            # sample sheets with multiple newlines as section seperators.
            #
            #   https://github.com/clintval/sample-sheet/issues/46
            #
            if not ''.join(line).strip():
                continue

            # Raise exception if we encounter invalid characters.
            if any(
                character not in VALID_ASCII
                for character in set(''.join(line))
            ):
                raise ValueError(
                    f'Sample sheet contains invalid characters on line '
                    f'{i + 1}: {"".join(line)}'
                )

            header_match = self._section_header_re.match(line[0])

            # If we enter a section save it's name and continue to next line.
            if header_match:
                section_name, *_ = header_match.groups()
                if (
                    section_name not in self._sections
                    and section_name not in REQUIRED_SECTIONS
                ):
                    self.add_section(section_name)
                continue

            # [Reads] - vertical list of integers.
            if section_name == 'Reads':
                self.Reads.append(int(line[0]))
                continue

            # [Data] - delimited data with the first line a header.
            elif section_name == 'Data':
                if sample_header is not None:
                    self.add_sample(Sample(dict(zip(sample_header, line))))
                elif any(key == '' for key in line):
                    raise ValueError(
                        f'Header for [Data] section is not allowed to '
                        f'have empty fields: {line}'
                    )
                else:
                    sample_header = line
                continue

            # [<Other>] - keys in first column and values in second column.
            elif len(line) >= 2:
                key, value = (line[0], line[1])
                section: Section = getattr(self, section_name)
                section[key] = value 
开发者ID:clintval,项目名称:sample-sheet,代码行数:63,代码来源:__init__.py


注:本文中的smart_open.smart_open方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。