当前位置: 首页>>代码示例>>Python>>正文


Python smart_open.smart_open函数代码示例

本文整理汇总了Python中smart_open.smart_open函数的典型用法代码示例。如果您正苦于以下问题:Python smart_open函数的具体用法?Python smart_open怎么用?Python smart_open使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了smart_open函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_lines

def get_lines(glove_file_name):
    """Return the number of vectors and dimensions in a file in GloVe format."""
    with smart_open.smart_open(glove_file_name, 'r') as f:
        num_lines = sum(1 for line in f)
    with smart_open.smart_open(glove_file_name, 'r') as f:
        num_dims = len(f.readline().split()) - 1
    return num_lines, num_dims
开发者ID:jroakes,项目名称:glove-to-word2vec,代码行数:7,代码来源:convert.py

示例2: testConversion

    def testConversion(self):
        word2vec2tensor(word2vec_model_path=self.datapath, tensor_filename=self.output_folder)

        with smart_open(self.metadata_file, 'rb') as f:
            metadata = f.readlines()

        with smart_open(self.tensor_file, 'rb') as f:
            vectors = f.readlines()

        # check if number of words and vector size in tensor file line up with word2vec
        with smart_open(self.datapath, 'rb') as f:
            first_line = f.readline().strip()

        number_words, vector_size = map(int, first_line.split(b' '))
        self.assertTrue(len(metadata) == len(vectors) == number_words,
            ('Metadata file %s and tensor file %s imply different number of rows.'
                % (self.metadata_file, self.tensor_file)))

        # grab metadata and vectors from written file
        metadata = [word.strip() for word in metadata]
        vectors = [vector.replace(b'\t', b' ') for vector in vectors]

        # get the originaly vector KV model
        orig_model = KeyedVectors.load_word2vec_format(self.datapath, binary=False)

        # check that the KV model and tensor files have the same values key-wise
        for word, vector in zip(metadata, vectors):
            word_string = word.decode("utf8")
            vector_string = vector.decode("utf8")
            vector_array = np.array(list(map(float, vector_string.split())))
            np.testing.assert_almost_equal(orig_model[word_string], vector_array, decimal=5)
开发者ID:RaRe-Technologies,项目名称:gensim,代码行数:31,代码来源:test_scripts.py

示例3: word2vec2tensor

def word2vec2tensor(word2vec_model_path, tensor_filename, binary=False):
    """Convert file in Word2Vec format and writes two files 2D tensor TSV file.

    File "tensor_filename"_tensor.tsv contains word-vectors, "tensor_filename"_metadata.tsv contains words.

    Parameters
    ----------
    word2vec_model_path : str
        Path to file in Word2Vec format.
    tensor_filename : str
        Prefix for output files.
    binary : bool, optional
        True if input file in binary format.

    """
    model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model_path, binary=binary)
    outfiletsv = tensor_filename + '_tensor.tsv'
    outfiletsvmeta = tensor_filename + '_metadata.tsv'

    with smart_open(outfiletsv, 'wb') as file_vector, smart_open(outfiletsvmeta, 'wb') as file_metadata:
        for word in model.index2word:
            file_metadata.write(gensim.utils.to_utf8(word) + gensim.utils.to_utf8('\n'))
            vector_row = '\t'.join(str(x) for x in model[word])
            file_vector.write(gensim.utils.to_utf8(vector_row) + gensim.utils.to_utf8('\n'))

    logger.info("2D tensor file saved to %s", outfiletsv)
    logger.info("Tensor metadata file saved to %s", outfiletsvmeta)
开发者ID:RaRe-Technologies,项目名称:gensim,代码行数:27,代码来源:word2vec2tensor.py

示例4: test_s3_iter_moto

    def test_s3_iter_moto(self):
        """Are S3 files iterated over correctly?"""
        # a list of strings to test with
        expected = [b"*" * 5 * 1024**2] + [b'0123456789'] * 1024 + [b"test"]

        # create fake bucket and fake key
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='mybucket')

        with smart_open.smart_open("s3://mybucket/mykey", "wb", s3_min_part_size=5 * 1024**2) as fout:
            # write a single huge line (=full multipart upload)
            fout.write(expected[0] + b'\n')

            # write lots of small lines
            for lineno, line in enumerate(expected[1:-1]):
                fout.write(line + b'\n')

            # ...and write the last line too, no newline at the end
            fout.write(expected[-1])

        # connect to fake s3 and read from the fake key we filled above
        smart_open_object = smart_open.smart_open("s3://mybucket/mykey")
        output = [line.rstrip(b'\n') for line in smart_open_object]
        self.assertEqual(output, expected)

        # same thing but using a context manager
        with smart_open.smart_open("s3://mybucket/mykey") as smart_open_object:
            output = [line.rstrip(b'\n') for line in smart_open_object]
            self.assertEqual(output, expected)
开发者ID:mpenkov,项目名称:smart_open,代码行数:29,代码来源:test_smart_open_old.py

示例5: test_s3_boto

    def test_s3_boto(self, mock_s3_open_read, mock_boto):
        """Is S3 line iterator called correctly?"""
        # Configure the mock boto.config.get to return default host
        smart_open.smart_open_lib.boto.config.get.return_value = 's3.amazonaws.com'

        # no credentials
        smart_open_object = smart_open.smart_open("s3://mybucket/mykey")
        smart_open_object.__iter__()
        mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None, profile_name=None, host='s3.amazonaws.com')

        # with credential
        smart_open_object = smart_open.smart_open("s3://access_id:[email protected]/mykey")
        smart_open_object.__iter__()
        mock_boto.connect_s3.assert_called_with(aws_access_key_id="access_id", aws_secret_access_key="access_secret", profile_name=None, host='s3.amazonaws.com')

        # with credential profile
        smart_open_object = smart_open.smart_open("s3://mybucket/mykey", profile_name="my_credentials")
        smart_open_object.__iter__()
        mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None, profile_name="my_credentials", host='s3.amazonaws.com')

        # lookup bucket, key; call s3_iter_lines
        smart_open_object = smart_open.smart_open("s3://access_id:[email protected]/mykey")
        smart_open_object.__iter__()
        mock_boto.connect_s3().get_bucket.assert_called_with("mybucket")
        mock_boto.connect_s3().get_bucket().get_key.assert_called_with("mykey")
        #
        # TODO: this is kind of a useless assertion...
        #
        self.assertTrue(smart_open_object.__iter__.called)

        # with user-specified host
        smart_open_object = smart_open.smart_open("s3://access_id:[email protected]/mykey", host='aa.domain.com')
        smart_open_object.__iter__()
        mock_boto.connect_s3.assert_called_with(aws_access_key_id="access_id", aws_secret_access_key="access_secret", profile_name=None, host='aa.domain.com')
开发者ID:move-fast,项目名称:smart_open,代码行数:34,代码来源:test_smart_open.py

示例6: test_http_bz2

    def test_http_bz2(self):
        """Can open bz2 via http?"""
        test_string = b'Hello World Compressed.'
        #
        # TODO: why are these tests writing to temporary files?  We can do the
        # bz2 compression in memory.
        #
        with tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False) as infile:
            test_file = infile.name

        with smart_open.smart_open(test_file, 'wb') as outfile:
            outfile.write(test_string)

        with open(test_file, 'rb') as infile:
            compressed_data = infile.read()

        if os.path.isfile(test_file):
            os.unlink(test_file)

        responses.add(responses.GET, "http://127.0.0.1/data.bz2",
                      body=compressed_data, stream=True)
        smart_open_object = smart_open.smart_open("http://127.0.0.1/data.bz2")

        # decompress the gzip and get the same md5 hash
        self.assertEqual(smart_open_object.read(), test_string)
开发者ID:mpenkov,项目名称:smart_open,代码行数:25,代码来源:test_smart_open_old.py

示例7: test_s3_iter_moto

    def test_s3_iter_moto(self):
        """Are S3 files iterated over correctly?"""
        # a list of strings to test with
        expected = [b"*" * 5 * 1024**2] + [b'0123456789'] * 1024 + [b"test"]

        # create fake bucket and fake key
        conn = boto.connect_s3()
        conn.create_bucket("mybucket")
        # lower the multipart upload size, to speed up these tests
        smart_open_lib.S3_MIN_PART_SIZE = 5 * 1024**2
        with smart_open.smart_open("s3://mybucket/mykey", "wb") as fout:
            # write a single huge line (=full multipart upload)
            fout.write(expected[0] + b'\n')

            # write lots of small lines
            for lineno, line in enumerate(expected[1:-1]):
                fout.write(line + b'\n')

            # ...and write the last line too, no newline at the end
            fout.write(expected[-1])

        # connect to fake s3 and read from the fake key we filled above
        smart_open_object = smart_open.smart_open("s3://mybucket/mykey")
        output = [line.rstrip(b'\n') for line in smart_open_object]
        self.assertEqual(output, expected)

        # same thing but using a context manager
        with smart_open.smart_open("s3://mybucket/mykey") as smart_open_object:
            output = [line.rstrip(b'\n') for line in smart_open_object]
            self.assertEqual(output, expected)
开发者ID:move-fast,项目名称:smart_open,代码行数:30,代码来源:test_smart_open.py

示例8: test_file

    def test_file(self, mock_smart_open):
        """Is file:// line iterator called correctly?"""
        prefix = "file://"
        full_path = '/tmp/test.txt'
        read_mode = "rb"
        smart_open_object = smart_open.smart_open(prefix+full_path, read_mode)
        smart_open_object.__iter__()
        # called with the correct path?
        mock_smart_open.assert_called_with(full_path, read_mode, buffering=-1)

        full_path = '/tmp/test#hash##more.txt'
        read_mode = "rb"
        smart_open_object = smart_open.smart_open(prefix+full_path, read_mode)
        smart_open_object.__iter__()
        # called with the correct path?
        mock_smart_open.assert_called_with(full_path, read_mode, buffering=-1)

        full_path = 'aa#aa'
        read_mode = "rb"
        smart_open_object = smart_open.smart_open(full_path, read_mode)
        smart_open_object.__iter__()
        # called with the correct path?
        mock_smart_open.assert_called_with(full_path, read_mode, buffering=-1)

        short_path = "~/tmp/test.txt"
        full_path = os.path.expanduser(short_path)
开发者ID:mpenkov,项目名称:smart_open,代码行数:26,代码来源:test_smart_open_old.py

示例9: test_s3_metadata_write

    def test_s3_metadata_write(self):
        # Read local file fixture
        path = os.path.join(CURR_DIR, 'test_data/crime-and-punishment.txt.gz')
        data = ""
        with smart_open.smart_open(path, 'rb') as fd:
            data = fd.read()

        # Create a test bucket
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='mybucket')

        # Write data, with multipart_upload options
        write_stream = smart_open.smart_open(
            's3://mybucket/crime-and-punishment.txt.gz', 'wb',
            s3_upload={
                'ContentType': 'text/plain',
                'ContentEncoding': 'gzip'
            }
        )
        with write_stream as fout:
            fout.write(data)

        key = s3.Object('mybucket', 'crime-and-punishment.txt.gz')
        self.assertIn('text/plain', key.content_type)
        self.assertEqual(key.content_encoding, 'gzip')
开发者ID:mpenkov,项目名称:smart_open,代码行数:25,代码来源:test_smart_open_old.py

示例10: test_s3_mode_mock

    def test_s3_mode_mock(self, mock_session):
        """Are s3:// open modes passed correctly?"""

        # correct write mode, correct s3 URI
        smart_open.smart_open("s3://mybucket/mykey", "w", host='s3.amazonaws.com')
        mock_session.return_value.resource.assert_called_with(
            's3', endpoint_url='http://s3.amazonaws.com'
        )
开发者ID:mpenkov,项目名称:smart_open,代码行数:8,代码来源:test_smart_open_old.py

示例11: test_s3_mode_mock

 def test_s3_mode_mock(self, mock_write, mock_boto):
     """Are s3:// open modes passed correctly?"""
     # correct write mode, correct s3 URI
     smart_open.smart_open("s3://mybucket/mykey", "w")
     mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None)
     mock_boto.connect_s3().lookup.return_value = True
     mock_boto.connect_s3().get_bucket.assert_called_with("mybucket")
     self.assertTrue(mock_write.called)
开发者ID:salilb,项目名称:smart_open,代码行数:8,代码来源:test_smart_open.py

示例12: test_session_write_mode

    def test_session_write_mode(self):
        """
        Write stream should use a custom boto3.Session
        """
        session = boto3.Session()
        session.resource = mock.MagicMock()

        smart_open.smart_open('s3://bucket/key', 'wb', s3_session=session)
        session.resource.assert_called_with('s3')
开发者ID:mpenkov,项目名称:smart_open,代码行数:9,代码来源:test_smart_open_old.py

示例13: write_read_assertion

    def write_read_assertion(self, test_file):
        with smart_open.smart_open(test_file, 'wb') as fout:  # 'b' for binary, needed on Windows
            fout.write(self.TEXT.encode('utf8'))

        with smart_open.smart_open(test_file, 'rb') as fin:
            self.assertEqual(fin.read().decode('utf8'), self.TEXT)

        if os.path.isfile(test_file):
            os.unlink(test_file)
开发者ID:mpenkov,项目名称:smart_open,代码行数:9,代码来源:test_smart_open_old.py

示例14: test_gzip_write_mode

    def test_gzip_write_mode(self):
        """Should always open in binary mode when writing through a codec."""
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='bucket')
        uri = smart_open_lib._parse_uri("s3://bucket/key.gz")

        with mock.patch('smart_open.s3.open') as mock_open:
            smart_open.smart_open("s3://bucket/key.gz", "wb")
            mock_open.assert_called_with('bucket', 'key.gz', 'wb')
开发者ID:mpenkov,项目名称:smart_open,代码行数:9,代码来源:test_smart_open_old.py

示例15: test_readline

    def test_readline(self):
        """Does readline() return the correct file content?"""
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='mybucket')
        test_string = u"hello žluťoučký world!\nhow are you?".encode('utf8')
        with smart_open.smart_open("s3://mybucket/mykey", "wb") as fout:
            fout.write(test_string)

        reader = smart_open.smart_open("s3://mybucket/mykey", "rb")
        self.assertEqual(reader.readline(), u"hello žluťoučký world!\n".encode("utf-8"))
开发者ID:mpenkov,项目名称:smart_open,代码行数:10,代码来源:test_smart_open_old.py


注:本文中的smart_open.smart_open函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。