Python more_itertools.chunked方法代码示例

本文整理汇总了Python中more_itertools.chunked方法的典型用法代码示例。如果您正苦于以下问题：Python more_itertools.chunked方法的具体用法？Python more_itertools.chunked怎么用？Python more_itertools.chunked使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类more_itertools的用法示例。

在下文中一共展示了more_itertools.chunked方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: filter

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def filter(self, text, noise_only):
        """Filter lines that contain IP addresses from a given text.

        :param text: Text input
        :type text: file-like | str
        :param noise_only:
            If set, return only lines that contain IP addresses classified as noise,
            otherwise, return lines that contain IP addresses not classified as noise.
        :type noise_only: bool
        :return: Iterator that yields lines in chunks
        :rtype: iterable

        """
        if isinstance(text, str):
            text = text.splitlines(True)
        chunks = more_itertools.chunked(text, self.FILTER_TEXT_CHUNK_SIZE)
        for chunk in chunks:
            yield self._filter_chunk(chunk, noise_only)

开发者ID:GreyNoise-Intelligence，项目名称:pygreynoise，代码行数:20，代码来源:filter.py

示例2: get_minibatch_iterator

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def get_minibatch_iterator(
    token_seqs: np.ndarray,
    batch_size: int,
    is_training: bool,
    drop_remainder: bool = True,
) -> Iterator[np.ndarray]:
    indices = np.arange(token_seqs.shape[0])
    if is_training:
        np.random.shuffle(indices)

    for minibatch_indices in chunked(indices, batch_size):
        if len(minibatch_indices) < batch_size and drop_remainder:
            break  # Drop last, smaller batch

        minibatch_seqs = token_seqs[minibatch_indices]
        yield minibatch_seqs

开发者ID:microsoft，项目名称:machine-learning-for-programming-samples，代码行数:18，代码来源:dataset.py

示例3: calculate_words_displacement

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def calculate_words_displacement(self, column_names, n_jobs = 1):
        """ Calculate word displacements for each word in the Pandas data frame. """

        words = self.get_word_list()
        # Create chunks of the words to be processed.
        chunk_sz = np.ceil(len(words)/float(n_jobs))
        chunks = list(more_itertools.chunked(words, chunk_sz))

        # Calculate the displacements
        chunksL = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_source, self) for chunk in chunks)
        chunksH = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_dest, self) for chunk in chunks)
        L = more_itertools.flatten(chunksL)
        H = more_itertools.flatten(chunksH)
        flattendL = [x for sublist in L for x in sublist]
        flattendH = [x for sublist in H for x in sublist]

        # Store the results in a nice pandas data frame
        dfo, dfn = self.create_data_frames(flattendL, flattendH, column_names)
        return flattendL, flattendH, dfo, dfn

开发者ID:viveksck，项目名称:langchangetrack，代码行数:21，代码来源:displacements.py

示例4: get_features

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def get_features(model, imgs, test_num_tracks):
    """to handle higher seq length videos due to OOM error
    specifically used during test
    
    Arguments:
        model -- model under test
        imgs -- imgs to get features for
    
    Returns:
        features 
    """

    # handle chunked data
    all_features = []

    for test_imgs in mit.chunked(imgs, test_num_tracks):
        current_test_imgs = torch.stack(test_imgs)
        num_current_test_imgs = current_test_imgs.shape[0]
        # print(current_test_imgs.shape)
        features = model(current_test_imgs)
        features = features.view(num_current_test_imgs, -1)
        all_features.append(features)

    return torch.cat(all_features)

开发者ID:InnovArul，项目名称:vidreid_cosegmentation，代码行数:26，代码来源:utils.py

示例5: get_spatial_features

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def get_spatial_features(model, imgs, test_num_tracks):
    """to handle higher seq length videos due to OOM error
    specifically used during test
    
    Arguments:
        model -- model under test
        imgs -- imgs to get features for
    
    Returns:
        features 
    """

    # handle chunked data
    all_features, all_spatial_features = [], []

    for test_imgs in mit.chunked(imgs, test_num_tracks):
        current_test_imgs = torch.stack(test_imgs)
        num_current_test_imgs = current_test_imgs.shape[0]
        features, spatial_feats = model(current_test_imgs)
        features = features.view(num_current_test_imgs, -1)

        all_spatial_features.append(spatial_feats)
        all_features.append(features)

    return torch.cat(all_features), torch.cat(all_spatial_features)

开发者ID:InnovArul，项目名称:vidreid_cosegmentation，代码行数:27，代码来源:utils.py

示例6: test_even

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def test_even(self):
        """Test when ``n`` divides evenly into the length of the iterable."""
        self.assertEqual(
            list(mi.chunked('ABCDEF', 3)), [['A', 'B', 'C'], ['D', 'E', 'F']]
        )

开发者ID:sofia-netsurv，项目名称:python-netsurv，代码行数:7，代码来源:test_more.py

示例7: test_odd

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def test_odd(self):
        """Test when ``n`` does not divide evenly into the length of the
        iterable.

        """
        self.assertEqual(
            list(mi.chunked('ABCDE', 3)), [['A', 'B', 'C'], ['D', 'E']]
        )

开发者ID:sofia-netsurv，项目名称:python-netsurv，代码行数:10，代码来源:test_more.py

示例8: init

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def __init__(self, args, batch_size=32, test=False):
		self.batch_size = batch_size
		
		self.query = """
			MATCH p=
				(person:PERSON) 
					-[:WROTE]-> 
				(review:REVIEW {dataset_name:{dataset_name}, test:{test}}) 
					-[:OF]-> 
				(product:PRODUCT)
			RETURN person.style_preference + product.style as x, review.score as y
		"""

		self.query_params = {
			"dataset_name": "article_0",
			"test": test
		}

		with open('./settings.json') as f:
			self.settings = json.load(f)[args.database]

		driver = GraphDatabase.driver(
			self.settings["neo4j_url"], 
			auth=(self.settings["neo4j_user"], self.settings["neo4j_password"]))

		with driver.session() as session:
			data = session.run(self.query, **self.query_params).data()
			data = [ (np.array(i["x"]), i["y"]) for i in data]
			
			# Split the data up into "batches"
			data = more_itertools.chunked(data, self.batch_size)

			# Format our batches in the way Keras expects them:
			# An array of tuples (x_batch, y_batch)

			# An x_batch is a numpy array of shape (batch_size, 12), 
			# containing the concatenated style and style_preference vectors. 

			# A y_batch is a numpy array of shape (batch_size,1) containing the review scores.

			self.data = [ (np.array([j[0] for j in i]), np.array([j[1] for j in i])) for i in data]

开发者ID:Octavian-ai，项目名称:article-0，代码行数:43，代码来源:graph_sequence.py

示例9: post_graph_chunked

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def post_graph_chunked(
        self,
        graph: BELGraph,
        chunksize: int,
        *,
        use_tqdm: bool = True,
        collections: Optional[Iterable[str]] = None,
        overwrite: bool = False,
        validate: bool = True,
        email: Union[bool, str] = False
    ) -> requests.Response:
        """Post the graph to BioDati in chunks, when the graph is too big for a normal upload.

        :param graph: A BEL graph
        :param chunksize: The size of the chunks of nanopubs to upload
        :param use_tqdm: Should tqdm be used when iterating?
        :param collections: Tags to add to the nanopubs for lookup on BioDati
        :param overwrite: Set the BioDati upload "overwrite" setting
        :param validate: Set the BioDati upload "validate" setting
        :param email: Who should get emailed with results about the upload? If true, emails to user
         used for login. If string, emails to that user. If false, no email.
        :return: Last response from upload
        """
        metadata_extras = dict()
        if collections is not None:
            metadata_extras.update(collections=list(collections))
        iterable = _iter_graphdati(graph, use_tqdm=use_tqdm, metadata_extras=metadata_extras)
        res = None
        for chunk in chunked(iterable, chunksize):
            res = self.post_graph_json(
                chunk,
                overwrite=overwrite,
                validate=validate,
                email=email,
            )
        return res

开发者ID:pybel，项目名称:pybel，代码行数:38，代码来源:biodati_client.py

示例10: publish_iteratively

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def publish_iteratively(self):
        for chunk in more_itertools.chunked(self._selected_data, 20000):
            rk, body, prop_kwargs = self.get_output_components(selected_data="\n".join(chunk))
            self.publish_output(rk, body, prop_kwargs)
            yield
        yield self.FLUSH_OUT
        self.save_state(self._state)

开发者ID:CERT-Polska，项目名称:n6，代码行数:9，代码来源:abuse_ch.py

示例11: _write_data

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def _write_data(out_dir: RichPath, window_idx: int, chunk_size: int, data_window: List[Any]):
    np.random.shuffle(data_window)
    for chunk_idx, data_chunk in enumerate(chunked(data_window, chunk_size)):
        out_file = out_dir.join('chunk_%i-%i.jsonl.gz' % (window_idx, chunk_idx))
        out_file.save_as_compressed_file(data_chunk)

开发者ID:microsoft，项目名称:tf-gnn-samples，代码行数:7，代码来源:varmisuse_data_splitter.py

示例12: score

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def score(self, rev_ids, caches=None, cache=None):
        if isinstance(rev_ids, int):
            rev_ids = [rev_ids]

        batches = batch_rev_caches(chunked(rev_ids, self.batch_size), caches,
                                   cache)

        for batch_scores in self.scores_ex.map(self._score_batch, batches):
            for score in batch_scores:
                yield score

开发者ID:wikimedia，项目名称:revscoring，代码行数:12，代码来源:score_processor.py

示例13: batch_execute

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def batch_execute(
        self,
        query: Union[Query, Insert, Update, Delete, Select, str],
        parameter_sets: Optional[List[Dict[str, Any]]],
        transaction_id: Optional[str] = None,
        database: Optional[str] = None,
    ) -> UpdateResults:

        if self.transaction_id:
            start_transaction: bool = False
        else:
            self.begin(database=database)
            start_transaction = True
        try:
            results_sets = list(
                flatten(
                    self.client.batch_execute_statement(
                        **Options(
                            resourceArn=self.resource_arn,
                            secretArn=self.secret_arn,
                            database=database or self.database,
                            transactionId=transaction_id or self.transaction_id,
                            parameterSets=chunked_parameter_sets,  # type: ignore
                            sql=query,
                        ).build()
                    )["updateResults"]
                    for chunked_parameter_sets in chunked(
                        parameter_sets or [], MAX_RECORDS
                    )
                )
            )
        except:
            if start_transaction:
                self.rollback()
            raise
        if start_transaction:
            self.commit()
        return UpdateResults(results_sets)

开发者ID:koxudaxi，项目名称:py-data-api，代码行数:40，代码来源:pydataapi.py

示例14: dump_histories

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def dump_histories(all_histories: List[Tuple[str, List[DbVisit]]]) -> None:
    logger = get_logger()
    output_dir = Path(config.get().output_dir)
    db_path = output_dir / 'promnesia.sqlite'

    def iter_visits():
        for e, h in all_histories:
            # TODO sort them somehow for determinism?
            # TODO what do we do with errors?
            # TODO maybe conform them to schema and dump too?
            # TODO or, dump to a separate table?
            yield from h

    tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite'
    engine = create_engine(f'sqlite:///{tpath}')
    binder = NTBinder.make(DbVisit)
    meta = MetaData(engine)
    table = Table('visits', meta, *binder.columns)
    meta.create_all()

    with engine.begin() as conn:
        for chunk in chunked(iter_visits(), n=_CHUNK_BY):
            bound = [binder.to_row(x) for x in chunk]
            # pylint: disable=no-value-for-parameter
            conn.execute(table.insert().values(bound))

    shutil.move(str(tpath), str(db_path))

    logger.info('saved database to %s', db_path)
    # TODO log error count

开发者ID:karlicoss，项目名称:promnesia，代码行数:32，代码来源:dump.py

示例15: analyze

# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def analyze(self, text):
        """Aggregate stats related to IP addresses from a given text.

        :param text: Text input
        :type text: file-like | str
        :return: Aggregated stats for all the IP addresses found.
        :rtype: dict

        """
        if isinstance(text, str):
            text = text.splitlines(True)
        chunks = more_itertools.chunked(text, self.ANALYZE_TEXT_CHUNK_SIZE)
        text_stats = {
            "query": [],
            "count": 0,
            "stats": {},
        }
        text_ip_addresses = set()
        chunks_stats = [
            self._analyze_chunk(chunk, text_ip_addresses) for chunk in chunks
        ]
        functools.reduce(self._aggregate_stats, chunks_stats, text_stats)

        # This maps section dictionaries to list of dictionaries
        # (undoing mapping done previously to keep track of count values)
        for section_key, section_value in text_stats["stats"].items():
            section_element_key = self.SECTION_KEY_TO_ELEMENT_KEY[section_key]
            text_stats["stats"][section_key] = sorted(
                [
                    {section_element_key: element_key, "count": element_count}
                    for element_key, element_count in section_value.items()
                ],
                key=lambda element: (-element["count"], element[section_element_key]),
            )

        if text_ip_addresses:
            noise_ip_addresses = {
                result["ip"]
                for result in self.api.quick(text_ip_addresses)
                if result["noise"]
            }
        else:
            noise_ip_addresses = set()

        ip_count = len(text_ip_addresses)
        noise_ip_count = len(noise_ip_addresses)
        not_noise_ip_count = ip_count - noise_ip_count
        if ip_count > 0:
            noise_ip_ratio = float(noise_ip_count) / ip_count
        else:
            noise_ip_ratio = 0

        text_stats["summary"] = {
            "ip_count": ip_count,
            "noise_ip_count": noise_ip_count,
            "not_noise_ip_count": not_noise_ip_count,
            "noise_ip_ratio": noise_ip_ratio,
        }

        return text_stats

开发者ID:GreyNoise-Intelligence，项目名称:pygreynoise，代码行数:62，代码来源:analyzer.py

注：本文中的more_itertools.chunked方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。