本文整理汇总了Python中tqdm.tqdm.pandas方法的典型用法代码示例。如果您正苦于以下问题:Python tqdm.pandas方法的具体用法?Python tqdm.pandas怎么用?Python tqdm.pandas使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tqdm.tqdm
示例1: process_csv
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def process_csv(args, split_name):
raw_csv = os.path.join(args.output_dir, split_name + "_raw.csv")
clean_csv = os.path.join(args.output_dir, split_name + ".csv")
data, labels = load_dataset(os.path.join(args.input_dir, split_name))
# save as csv file, seperated by tab
if not os.path.exists(args.output_dir):
with open(raw_csv, 'w') as f:
writer = csv.writer(f, delimiter='\t')
for sentence, label in zip(data, labels):
writer.writerow([sentence, label])
data = ingest_data(raw_csv)
data = post_process(data, args.remove_punctuation)
data.to_csv(clean_csv, sep='\t', header=False, index=False)
示例2: _process
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def _process(self, X: pd.DataFrame, func: Callable[[str, np.ndarray], Any]):
is_pandas = isinstance(X, pd.DataFrame)
X = convert_input(X)
columns = self.text_columns or [c for c in X.columns if X[c].dtype == np.object]
non_text_columns = [c for c in X.columns if c not in columns]
column_names = []
processed = []
for c in columns:
emb = np.vstack(X[c].progress_apply(lambda x: self._process_text(x)))
emb = func(c, emb)
column_names += [self.column_format.format(col=c, idx=i) for i in range(emb.shape[1])]
processed_df = pd.DataFrame(np.hstack(processed), columns=column_names)
if non_text_columns:
X_ = X[non_text_columns].copy()
X_ = pd.concat([X_, processed_df], axis=1)
X_ = processed_df
return X_ if self.return_same_type and is_pandas else X_.values
示例3: test_pandas_leave
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def test_pandas_leave():
"""Test pandas with `leave=True`"""
from numpy.random import randint
import pandas as pd
except ImportError:
raise SkipTest
with closing(StringIO()) as our_file:
df = pd.DataFrame(randint(0, 100, (1000, 6)))
tqdm.pandas(file=our_file, leave=True, ascii=True)
df.groupby(0).progress_apply(lambda x: None)
exres = '100%|##########| 100/100'
if exres not in our_file.read():
raise AssertionError(
"\nExpected:\n{0}\nIn:{1}\n".format(exres, our_file.read()))
示例4: test_pandas_apply_args_deprecation
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def test_pandas_apply_args_deprecation():
"""Test warning info in
`pandas.Dataframe(Series).progress_apply(func, *args)`"""
from numpy.random import randint
from tqdm import tqdm_pandas
import pandas as pd
except ImportError:
raise SkipTest
with closing(StringIO()) as our_file:
tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True, ncols=20))
df = pd.DataFrame(randint(0, 50, (500, 3)))
df.progress_apply(lambda x: None, 1) # 1 shall cause a warning
# Check deprecation message
res = our_file.getvalue()
assert all([i in res for i in (
"TqdmDeprecationWarning", "not supported",
"keyword arguments instead")])
示例5: frame
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def frame(self) -> 'DataPack.FrameView':
View the data pack as a :class:`pandas.DataFrame`.
Returned data frame is created by merging the left data frame,
the right dataframe and the relation data frame. Use `[]` to access
an item or a slice of items.
:return: A :class:`matchzoo.DataPack.FrameView` instance.
>>> import matchzoo as mz
>>> data_pack = mz.datasets.toy.load_data()
>>> type(data_pack.frame)
<class 'matchzoo.data_pack.data_pack.DataPack.FrameView'>
>>> frame_slice = data_pack.frame[0:5]
>>> type(frame_slice)
<class 'pandas.core.frame.DataFrame'>
>>> list(frame_slice.columns)
['id_left', 'text_left', 'id_right', 'text_right', 'label']
>>> full_frame = data_pack.frame()
>>> len(full_frame) == len(data_pack)
return DataPack.FrameView(self)
示例6: _apply_on_text_right
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def _apply_on_text_right(self, func, rename, verbose=1):
name = rename or 'text_right'
if verbose:
tqdm.pandas(desc="Processing " + name + " with " + func.__name__)
self._right[name] = self._right['text_right'].progress_apply(func)
self._right[name] = self._right['text_right'].apply(func)
示例7: _apply_on_text_left
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def _apply_on_text_left(self, func, rename, verbose=1):
name = rename or 'text_left'
if verbose:
tqdm.pandas(desc="Processing " + name + " with " + func.__name__)
self._left[name] = self._left['text_left'].progress_apply(func)
self._left[name] = self._left['text_left'].apply(func)
示例8: apply
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def apply(
df: pd.DataFrame,
progress_bar: bool = True,
fault_tolerant: bool = False,
return_meta: bool = False,
) -> Union[np.ndarray, Tuple[np.ndarray, ApplierMetadata]]:
"""Label Pandas DataFrame of data points with LFs.
Pandas DataFrame containing data points to be labeled by LFs
Display a progress bar?
Output ``-1`` if LF execution fails?
Return metadata from apply call?
Matrix of labels emitted by LFs
Metadata, such as fault counts, for the apply call
f_caller = _FunctionCaller(fault_tolerant)
apply_fn = partial(apply_lfs_to_data_point, lfs=self._lfs, f_caller=f_caller)
call_fn = df.apply
if progress_bar:
call_fn = df.progress_apply
labels = call_fn(apply_fn, axis=1)
labels_with_index = rows_to_triplets(labels)
L = self._numpy_from_row_data(labels_with_index)
if return_meta:
return L, ApplierMetadata(f_caller.fault_counts)
return L
示例9: predict
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def predict(self, train_file: None, test_file: str, lower_case: bool) -> pd.DataFrame:
"Use tqdm to display model prediction status bar"
# pip install tqdm
from tqdm import tqdm
df = self.read_data(test_file, lower_case)
df['pred'] = df['text'].progress_apply(self.score)
return df
示例10: create_dataloader
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def create_dataloader(self,
df: pd.DataFrame,
batch_size: int = 32,
shuffle: bool = False,
valid_pct: float = None):
"Process rows in pd.DataFrame using n_cpus and return a DataLoader"
with ProcessPoolExecutor(max_workers=n_cpu) as executor:
result = list(
tqdm(executor.map(self.process_row, df.iterrows(), chunksize=8192),
desc=f"Processing {len(df)} examples on {n_cpu} cores",
features = [r[0] for r in result]
labels = [r[1] for r in result]
dataset = TensorDataset(torch.tensor(features, dtype=torch.long),
torch.tensor(labels, dtype=torch.long))
if valid_pct is not None:
valid_size = int(valid_pct * len(df))
train_size = len(df) - valid_size
valid_dataset, train_dataset = random_split(dataset, [valid_size, train_size])
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
return train_loader, valid_loader
data_loader = DataLoader(dataset,
return data_loader
示例11: test_pandas_series
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def test_pandas_series():
"""Test pandas.Series.progress_apply and .progress_map"""
from numpy.random import randint
import pandas as pd
except ImportError:
raise SkipTest
with closing(StringIO()) as our_file:
tqdm.pandas(file=our_file, leave=True, ascii=True)
series = pd.Series(randint(0, 50, (123,)))
res1 = series.progress_apply(lambda x: x + 10)
res2 = series.apply(lambda x: x + 10)
assert res1.equals(res2)
res3 = series.progress_map(lambda x: x + 10)
res4 = series.map(lambda x: x + 10)
assert res3.equals(res4)
expects = ['100%', '123/123']
for exres in expects:
if our_file.getvalue().count(exres) < 2:
raise AssertionError(
exres + " at least twice.", our_file.read()))
示例12: test_pandas_data_frame
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def test_pandas_data_frame():
"""Test pandas.DataFrame.progress_apply and .progress_applymap"""
from numpy.random import randint
import pandas as pd
except ImportError:
raise SkipTest
with closing(StringIO()) as our_file:
tqdm.pandas(file=our_file, leave=True, ascii=True)
df = pd.DataFrame(randint(0, 50, (100, 200)))
def task_func(x):
return x + 1
# applymap
res1 = df.progress_applymap(task_func)
res2 = df.applymap(task_func)
assert res1.equals(res2)
# apply
for axis in [0, 1]:
res3 = df.progress_apply(task_func, axis=axis)
res4 = df.apply(task_func, axis=axis)
assert res3.equals(res4)
if our_file.read().count('100%') < 3:
raise AssertionError("\nExpected:\n{0}\nIn:\n{1}\n".format(
'100% at least three times', our_file.read()))
# apply_map, apply axis=0, apply axis=1
expects = ['20000/20000', '200/200', '100/100']
for exres in expects:
if our_file.getvalue().count(exres) < 1:
raise AssertionError(
"\nExpected:\n{0}\nIn:\n {1}\n".format(
exres + " at least once.", our_file.read()))
示例13: aggregate_profiles
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def aggregate_profiles(all_profiles, type, groups, intercept, span):
if type == 'partial':
aggregated_profiles = \
all_profiles.groupby(['_vname_', '_label_', '_x_'] + groups)['_yhat_'].mean().reset_index()
# split all_profiles into groups
tqdm.pandas(desc='Calculating accumulated dependency') if type == 'accumulated' else tqdm.pandas(
desc="Calculating conditional dependency")
aggregated_profiles = \
all_profiles. \
loc[:, ["_vname_", "_label_", "_x_", "_yhat_", "_ids_", "_original_"] + groups]. \
groupby(['_vname_', '_label_']). \
progress_apply(lambda split_profile: split_over_variables_and_labels(split_profile, type, groups, span))
aggregated_profiles.loc[:, '_ids_'] = 0
if type == 'partial':
if not intercept:
aggregated_profiles.loc[:, '_yhat_'] = aggregated_profiles.loc[:, '_yhat_'] - all_profiles[
aggregated_profiles = aggregated_profiles
elif type == 'conditional':
if not intercept:
aggregated_profiles.loc[:, '_yhat_'] = aggregated_profiles.loc[:, '_yhat_'] - all_profiles[
aggregated_profiles = aggregated_profiles.reset_index().rename(columns={'level_2': '_grid_'})
if intercept:
aggregated_profiles.loc[:, '_yhat_'] = aggregated_profiles.loc[:, '_yhat_'] + all_profiles[
aggregated_profiles = aggregated_profiles.reset_index().rename(columns={'level_2': '_grid_'})
# postprocessing
if len(groups) != 0:
aggregated_profiles['_groups_'] = aggregated_profiles.loc[:, groups].apply(lambda row: '_'.join(row), axis=1)
aggregated_profiles.loc[:, '_label_'] = \
aggregated_profiles.loc[:, ['_label_', '_groups_']].apply(lambda row: '_'.join(row), axis=1)
return aggregated_profiles
示例14: split_over_variables_and_labels
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def split_over_variables_and_labels(split_profile, type, groups, span):
Inner function that calculates actual conditional profiles for one variable only. Iterated over each variable and group.
:param split_profile: pandas.DataFrame, one group of the dataset (with only one variable)
:param groups: str, name of grouping variable
:return: pd.DataFrame, dataframe with calculated conditional profile for only one variable
if split_profile.shape[0] == 0:
return None
if pd.api.types.is_numeric_dtype(split_profile['_x_']):
# for continuous variables we will calculate weighted average
# where weights come from gaussian kernel and distance between points
# scaling factor, range if the range i > 0
split_profile['_original_'] = split_profile['_original_'].astype('float')
range_x = split_profile['_x_'].max() - split_profile['_x_'].min()
if range_x == 0:
range_x = 1
# scalled differences
diffs = (split_profile['_original_'] - split_profile['_x_']) / range_x
split_profile['_w_'] = norm(diffs, 0, span)
# for categorical variables we will calculate weighted average
# but weights are 0-1, 1 if it's the same level and 0 otherwise
split_profile['_w_'] = split_profile['_original_'] == split_profile['_x_']
if type == 'accumulated':
# diffs
split_profile['_yhat_'] = split_profile. \
groupby('_ids_')['_yhat_']. \
transform(lambda column: column.diff())
# diff causes NaNs at the beginning of each group
split_profile.loc[np.isnan(split_profile['_yhat_']), '_yhat_'] = 0
par_profile = split_profile.groupby(['_x_'] + groups). \
apply(lambda point: (point['_yhat_'] * point['_w_']).sum() / point['_w_'].sum() \
if point['_w_'].sum() != 0 else 0)
par_profile.name = '_yhat_'
par_profile = par_profile.reset_index()
if type == 'accumulated':
if len(groups) == 0:
par_profile['_yhat_'] = par_profile['_yhat_'].cumsum()
par_profile['_yhat_'] = par_profile.groupby(groups)['_yhat_'].transform(
lambda column: column.cumsum())
return par_profile
示例15: test_pandas_groupby_apply
# 需要导入模块: from tqdm import tqdm [as 别名]
# 或者: from tqdm.tqdm import pandas [as 别名]
def test_pandas_groupby_apply():
"""Test pandas.DataFrame.groupby(...).progress_apply"""
from numpy.random import randint
import pandas as pd
except ImportError:
raise SkipTest
with closing(StringIO()) as our_file:
tqdm.pandas(file=our_file, leave=False, ascii=True)
df = pd.DataFrame(randint(0, 50, (500, 3)))
df.groupby(0).progress_apply(lambda x: None)
dfs = pd.DataFrame(randint(0, 50, (500, 3)), columns=list('abc'))
dfs.groupby(['a']).progress_apply(lambda x: None)
# don't expect final output since no `leave` and
# high dynamic `miniters`
nexres = '100%|##########|'
if nexres in our_file.read():
raise AssertionError("\nDid not expect:\n{0}\nIn:{1}\n".format(
nexres, our_file.read()))
with closing(StringIO()) as our_file:
tqdm.pandas(file=our_file, leave=True, ascii=True)
dfs = pd.DataFrame(randint(0, 50, (500, 3)), columns=list('abc'))
dfs.loc[0] = [2, 1, 1]
dfs['d'] = 100
expects = ['500/500', '1/1', '4/4', '2/2']
dfs.groupby(dfs.index).progress_apply(lambda x: None)
dfs.groupby('d').progress_apply(lambda x: None)
dfs.groupby(dfs.columns, axis=1).progress_apply(lambda x: None)
dfs.groupby([2, 2, 1, 1], axis=1).progress_apply(lambda x: None)
if our_file.read().count('100%') < 4:
raise AssertionError("\nExpected:\n{0}\nIn:\n{1}\n".format(
'100% at least four times', our_file.read()))
for exres in expects:
if our_file.getvalue().count(exres) < 1:
raise AssertionError(
"\nExpected:\n{0}\nIn:\n {1}\n".format(
exres + " at least once.", our_file.read()))