本文整理汇总了Python中more_itertools.chunked方法的典型用法代码示例。如果您正苦于以下问题:Python more_itertools.chunked方法的具体用法?Python more_itertools.chunked怎么用?Python more_itertools.chunked使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类more_itertools
的用法示例。
在下文中一共展示了more_itertools.chunked方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: filter
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def filter(self, text, noise_only):
"""Filter lines that contain IP addresses from a given text.
:param text: Text input
:type text: file-like | str
:param noise_only:
If set, return only lines that contain IP addresses classified as noise,
otherwise, return lines that contain IP addresses not classified as noise.
:type noise_only: bool
:return: Iterator that yields lines in chunks
:rtype: iterable
"""
if isinstance(text, str):
text = text.splitlines(True)
chunks = more_itertools.chunked(text, self.FILTER_TEXT_CHUNK_SIZE)
for chunk in chunks:
yield self._filter_chunk(chunk, noise_only)
示例2: get_minibatch_iterator
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def get_minibatch_iterator(
token_seqs: np.ndarray,
batch_size: int,
is_training: bool,
drop_remainder: bool = True,
) -> Iterator[np.ndarray]:
indices = np.arange(token_seqs.shape[0])
if is_training:
np.random.shuffle(indices)
for minibatch_indices in chunked(indices, batch_size):
if len(minibatch_indices) < batch_size and drop_remainder:
break # Drop last, smaller batch
minibatch_seqs = token_seqs[minibatch_indices]
yield minibatch_seqs
示例3: calculate_words_displacement
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def calculate_words_displacement(self, column_names, n_jobs = 1):
""" Calculate word displacements for each word in the Pandas data frame. """
words = self.get_word_list()
# Create chunks of the words to be processed.
chunk_sz = np.ceil(len(words)/float(n_jobs))
chunks = list(more_itertools.chunked(words, chunk_sz))
# Calculate the displacements
chunksL = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_source, self) for chunk in chunks)
chunksH = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_dest, self) for chunk in chunks)
L = more_itertools.flatten(chunksL)
H = more_itertools.flatten(chunksH)
flattendL = [x for sublist in L for x in sublist]
flattendH = [x for sublist in H for x in sublist]
# Store the results in a nice pandas data frame
dfo, dfn = self.create_data_frames(flattendL, flattendH, column_names)
return flattendL, flattendH, dfo, dfn
示例4: get_features
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def get_features(model, imgs, test_num_tracks):
"""to handle higher seq length videos due to OOM error
specifically used during test
Arguments:
model -- model under test
imgs -- imgs to get features for
Returns:
features
"""
# handle chunked data
all_features = []
for test_imgs in mit.chunked(imgs, test_num_tracks):
current_test_imgs = torch.stack(test_imgs)
num_current_test_imgs = current_test_imgs.shape[0]
# print(current_test_imgs.shape)
features = model(current_test_imgs)
features = features.view(num_current_test_imgs, -1)
all_features.append(features)
return torch.cat(all_features)
示例5: get_spatial_features
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def get_spatial_features(model, imgs, test_num_tracks):
"""to handle higher seq length videos due to OOM error
specifically used during test
Arguments:
model -- model under test
imgs -- imgs to get features for
Returns:
features
"""
# handle chunked data
all_features, all_spatial_features = [], []
for test_imgs in mit.chunked(imgs, test_num_tracks):
current_test_imgs = torch.stack(test_imgs)
num_current_test_imgs = current_test_imgs.shape[0]
features, spatial_feats = model(current_test_imgs)
features = features.view(num_current_test_imgs, -1)
all_spatial_features.append(spatial_feats)
all_features.append(features)
return torch.cat(all_features), torch.cat(all_spatial_features)
示例6: test_even
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def test_even(self):
"""Test when ``n`` divides evenly into the length of the iterable."""
self.assertEqual(
list(mi.chunked('ABCDEF', 3)), [['A', 'B', 'C'], ['D', 'E', 'F']]
)
示例7: test_odd
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def test_odd(self):
"""Test when ``n`` does not divide evenly into the length of the
iterable.
"""
self.assertEqual(
list(mi.chunked('ABCDE', 3)), [['A', 'B', 'C'], ['D', 'E']]
)
示例8: __init__
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def __init__(self, args, batch_size=32, test=False):
self.batch_size = batch_size
self.query = """
MATCH p=
(person:PERSON)
-[:WROTE]->
(review:REVIEW {dataset_name:{dataset_name}, test:{test}})
-[:OF]->
(product:PRODUCT)
RETURN person.style_preference + product.style as x, review.score as y
"""
self.query_params = {
"dataset_name": "article_0",
"test": test
}
with open('./settings.json') as f:
self.settings = json.load(f)[args.database]
driver = GraphDatabase.driver(
self.settings["neo4j_url"],
auth=(self.settings["neo4j_user"], self.settings["neo4j_password"]))
with driver.session() as session:
data = session.run(self.query, **self.query_params).data()
data = [ (np.array(i["x"]), i["y"]) for i in data]
# Split the data up into "batches"
data = more_itertools.chunked(data, self.batch_size)
# Format our batches in the way Keras expects them:
# An array of tuples (x_batch, y_batch)
# An x_batch is a numpy array of shape (batch_size, 12),
# containing the concatenated style and style_preference vectors.
# A y_batch is a numpy array of shape (batch_size,1) containing the review scores.
self.data = [ (np.array([j[0] for j in i]), np.array([j[1] for j in i])) for i in data]
示例9: post_graph_chunked
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def post_graph_chunked(
self,
graph: BELGraph,
chunksize: int,
*,
use_tqdm: bool = True,
collections: Optional[Iterable[str]] = None,
overwrite: bool = False,
validate: bool = True,
email: Union[bool, str] = False
) -> requests.Response:
"""Post the graph to BioDati in chunks, when the graph is too big for a normal upload.
:param graph: A BEL graph
:param chunksize: The size of the chunks of nanopubs to upload
:param use_tqdm: Should tqdm be used when iterating?
:param collections: Tags to add to the nanopubs for lookup on BioDati
:param overwrite: Set the BioDati upload "overwrite" setting
:param validate: Set the BioDati upload "validate" setting
:param email: Who should get emailed with results about the upload? If true, emails to user
used for login. If string, emails to that user. If false, no email.
:return: Last response from upload
"""
metadata_extras = dict()
if collections is not None:
metadata_extras.update(collections=list(collections))
iterable = _iter_graphdati(graph, use_tqdm=use_tqdm, metadata_extras=metadata_extras)
res = None
for chunk in chunked(iterable, chunksize):
res = self.post_graph_json(
chunk,
overwrite=overwrite,
validate=validate,
email=email,
)
return res
示例10: publish_iteratively
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def publish_iteratively(self):
for chunk in more_itertools.chunked(self._selected_data, 20000):
rk, body, prop_kwargs = self.get_output_components(selected_data="\n".join(chunk))
self.publish_output(rk, body, prop_kwargs)
yield
yield self.FLUSH_OUT
self.save_state(self._state)
示例11: _write_data
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def _write_data(out_dir: RichPath, window_idx: int, chunk_size: int, data_window: List[Any]):
np.random.shuffle(data_window)
for chunk_idx, data_chunk in enumerate(chunked(data_window, chunk_size)):
out_file = out_dir.join('chunk_%i-%i.jsonl.gz' % (window_idx, chunk_idx))
out_file.save_as_compressed_file(data_chunk)
示例12: score
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def score(self, rev_ids, caches=None, cache=None):
if isinstance(rev_ids, int):
rev_ids = [rev_ids]
batches = batch_rev_caches(chunked(rev_ids, self.batch_size), caches,
cache)
for batch_scores in self.scores_ex.map(self._score_batch, batches):
for score in batch_scores:
yield score
示例13: batch_execute
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def batch_execute(
self,
query: Union[Query, Insert, Update, Delete, Select, str],
parameter_sets: Optional[List[Dict[str, Any]]],
transaction_id: Optional[str] = None,
database: Optional[str] = None,
) -> UpdateResults:
if self.transaction_id:
start_transaction: bool = False
else:
self.begin(database=database)
start_transaction = True
try:
results_sets = list(
flatten(
self.client.batch_execute_statement(
**Options(
resourceArn=self.resource_arn,
secretArn=self.secret_arn,
database=database or self.database,
transactionId=transaction_id or self.transaction_id,
parameterSets=chunked_parameter_sets, # type: ignore
sql=query,
).build()
)["updateResults"]
for chunked_parameter_sets in chunked(
parameter_sets or [], MAX_RECORDS
)
)
)
except:
if start_transaction:
self.rollback()
raise
if start_transaction:
self.commit()
return UpdateResults(results_sets)
示例14: dump_histories
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def dump_histories(all_histories: List[Tuple[str, List[DbVisit]]]) -> None:
logger = get_logger()
output_dir = Path(config.get().output_dir)
db_path = output_dir / 'promnesia.sqlite'
def iter_visits():
for e, h in all_histories:
# TODO sort them somehow for determinism?
# TODO what do we do with errors?
# TODO maybe conform them to schema and dump too?
# TODO or, dump to a separate table?
yield from h
tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite'
engine = create_engine(f'sqlite:///{tpath}')
binder = NTBinder.make(DbVisit)
meta = MetaData(engine)
table = Table('visits', meta, *binder.columns)
meta.create_all()
with engine.begin() as conn:
for chunk in chunked(iter_visits(), n=_CHUNK_BY):
bound = [binder.to_row(x) for x in chunk]
# pylint: disable=no-value-for-parameter
conn.execute(table.insert().values(bound))
shutil.move(str(tpath), str(db_path))
logger.info('saved database to %s', db_path)
# TODO log error count
示例15: analyze
# 需要导入模块: import more_itertools [as 别名]
# 或者: from more_itertools import chunked [as 别名]
def analyze(self, text):
"""Aggregate stats related to IP addresses from a given text.
:param text: Text input
:type text: file-like | str
:return: Aggregated stats for all the IP addresses found.
:rtype: dict
"""
if isinstance(text, str):
text = text.splitlines(True)
chunks = more_itertools.chunked(text, self.ANALYZE_TEXT_CHUNK_SIZE)
text_stats = {
"query": [],
"count": 0,
"stats": {},
}
text_ip_addresses = set()
chunks_stats = [
self._analyze_chunk(chunk, text_ip_addresses) for chunk in chunks
]
functools.reduce(self._aggregate_stats, chunks_stats, text_stats)
# This maps section dictionaries to list of dictionaries
# (undoing mapping done previously to keep track of count values)
for section_key, section_value in text_stats["stats"].items():
section_element_key = self.SECTION_KEY_TO_ELEMENT_KEY[section_key]
text_stats["stats"][section_key] = sorted(
[
{section_element_key: element_key, "count": element_count}
for element_key, element_count in section_value.items()
],
key=lambda element: (-element["count"], element[section_element_key]),
)
if text_ip_addresses:
noise_ip_addresses = {
result["ip"]
for result in self.api.quick(text_ip_addresses)
if result["noise"]
}
else:
noise_ip_addresses = set()
ip_count = len(text_ip_addresses)
noise_ip_count = len(noise_ip_addresses)
not_noise_ip_count = ip_count - noise_ip_count
if ip_count > 0:
noise_ip_ratio = float(noise_ip_count) / ip_count
else:
noise_ip_ratio = 0
text_stats["summary"] = {
"ip_count": ip_count,
"noise_ip_count": noise_ip_count,
"not_noise_ip_count": not_noise_ip_count,
"noise_ip_ratio": noise_ip_ratio,
}
return text_stats