本文整理汇总了Python中qiita_db.metadata_template.prep_template.PrepTemplate.to_dataframe方法的典型用法代码示例。如果您正苦于以下问题:Python PrepTemplate.to_dataframe方法的具体用法?Python PrepTemplate.to_dataframe怎么用?Python PrepTemplate.to_dataframe使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类qiita_db.metadata_template.prep_template.PrepTemplate
的用法示例。
在下文中一共展示了PrepTemplate.to_dataframe方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_dataframe_from_template
# 需要导入模块: from qiita_db.metadata_template.prep_template import PrepTemplate [as 别名]
# 或者: from qiita_db.metadata_template.prep_template.PrepTemplate import to_dataframe [as 别名]
def test_dataframe_from_template(self):
template = PrepTemplate(1)
obs = template.to_dataframe()
# 27 samples
self.assertEqual(len(obs), 27)
self.assertTrue(set(obs.index), {
u'SKB1.640202', u'SKB2.640194', u'SKB3.640195', u'SKB4.640189',
u'SKB5.640181', u'SKB6.640176', u'SKB7.640196', u'SKB8.640193',
u'SKB9.640200', u'SKD1.640179', u'SKD2.640178', u'SKD3.640198',
u'SKD4.640185', u'SKD5.640186', u'SKD6.640190', u'SKD7.640191',
u'SKD8.640184', u'SKD9.640182', u'SKM1.640183', u'SKM2.640199',
u'SKM3.640197', u'SKM4.640180', u'SKM5.640177', u'SKM6.640187',
u'SKM7.640188', u'SKM8.640201', u'SKM9.640192'})
self.assertTrue(set(obs.columns), {
u'tot_org_carb', u'common_name', u'has_extracted_data',
u'required_sample_info_status', u'water_content_soil',
u'env_feature', u'assigned_from_geo', u'altitude', u'env_biome',
u'texture', u'has_physical_specimen', u'description_duplicate',
u'physical_location', u'latitude', u'ph', u'host_taxid',
u'elevation', u'description', u'collection_timestamp',
u'taxon_id', u'samp_salinity', u'host_subject_id', u'sample_type',
u'season_environment', u'temp', u'country', u'longitude',
u'tot_nitro', u'depth', u'anonymized_name', u'target_subfragment',
u'sample_center', u'samp_size', u'run_date', u'experiment_center',
u'pcr_primers', u'center_name', u'barcodesequence', u'run_center',
u'run_prefix', u'library_construction_protocol', u'emp_status',
u'linkerprimersequence', u'experiment_design_description',
u'target_gene', u'center_project_name', u'illumina_technology',
u'sequencing_meth', u'platform', u'experiment_title',
u'study_center'})
示例2: prep_template_get_req
# 需要导入模块: from qiita_db.metadata_template.prep_template import PrepTemplate [as 别名]
# 或者: from qiita_db.metadata_template.prep_template.PrepTemplate import to_dataframe [as 别名]
def prep_template_get_req(prep_id, user_id):
"""Gets the json of the full prep template
Parameters
----------
prep_id : int
PrepTemplate id to get info for
user_id : str
User requesting the sample template info
Returns
-------
dict of objects
{'status': status,
'message': message,
'template': {sample: {column: value, ...}, ...}
"""
exists = _check_prep_template_exists(int(prep_id))
if exists['status'] != 'success':
return exists
prep = PrepTemplate(int(prep_id))
access_error = check_access(prep.study_id, user_id)
if access_error:
return access_error
df = prep.to_dataframe()
return {'status': 'success',
'message': '',
'template': df.to_dict(orient='index')}
示例3: prep_template_summary_get_req
# 需要导入模块: from qiita_db.metadata_template.prep_template import PrepTemplate [as 别名]
# 或者: from qiita_db.metadata_template.prep_template.PrepTemplate import to_dataframe [as 别名]
def prep_template_summary_get_req(prep_id, user_id):
"""Get the summarized prep template data for each metadata column
Parameters
----------
prep_id : int
PrepTemplate id to get info for
user_id : str
User requesting the sample template info
Returns
-------
dict of objects
Dictionary object where the keys are the metadata categories
and the values are list of tuples. Each tuple is an observed value in
the category and the number of times its seen.
Format {'status': status,
'message': message,
'num_samples': value,
'category': [(val1, count1), (val2, count2), ...],
'editable': bool}
"""
exists = _check_prep_template_exists(int(prep_id))
if exists['status'] != 'success':
return exists
prep = PrepTemplate(int(prep_id))
access_error = check_access(prep.study_id, user_id)
if access_error:
return access_error
editable = Study(prep.study_id).can_edit(User(user_id))
df = prep.to_dataframe()
out = {'num_samples': df.shape[0],
'summary': [],
'status': 'success',
'message': '',
'editable': editable}
cols = sorted(list(df.columns))
for column in cols:
counts = df[column].value_counts()
out['summary'].append(
(str(column), [(str(key), counts[key])
for key in natsorted(counts.index)]))
return out
示例4: study_files_get_req
# 需要导入模块: from qiita_db.metadata_template.prep_template import PrepTemplate [as 别名]
# 或者: from qiita_db.metadata_template.prep_template.PrepTemplate import to_dataframe [as 别名]
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
"""Returns the uploaded files for the study id categorized by artifact_type
It retrieves the files uploaded for the given study and tries to
guess on how those files should be added to the artifact of the given
type. Uses information on the prep template to try to do a better guess.
Parameters
----------
user_id : str
The id of the user making the request
study_id : int
The study id
prep_template_id : int
The prep template id
artifact_type : str
The artifact type
Returns
-------
dict of {str: object}
A dict of the form {'status': str,
'message': str,
'remaining': list of str,
'file_types': list of (str, bool, list of str),
'num_prefixes': int}
where 'status' is a string specifying whether the query is successfull,
'message' is a human-readable description of the error (optional),
'remaining' is the list of files that could not be categorized,
'file_types' is a list of the available filetypes, if it is required
or not and the list of categorized files for the given artifact type
and 'num_prefixes' is the number of different run prefix values in
the given prep template.
"""
supp_file_types = supported_filepath_types(artifact_type)
selected = []
remaining = []
message = []
pt = PrepTemplate(prep_template_id)
if pt.study_id != study_id:
raise IncompetentQiitaDeveloperError(
"The requested prep id (%d) doesn't belong to the study "
"(%d)" % (pt.study_id, study_id))
uploaded = get_files_from_uploads_folders(study_id)
pt = pt.to_dataframe()
ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
if ft != 'raw_sff')
if any(ftypes_if) and 'run_prefix' in pt.columns:
prep_prefixes = tuple(set(pt['run_prefix']))
num_prefixes = len(prep_prefixes)
# sorting prefixes by length to avoid collisions like: 100 1002
# 10003
prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
# group files by prefix
sfiles = defaultdict(list)
for p in prep_prefixes:
to_remove = []
for fid, f in uploaded:
if f.startswith(p):
sfiles[p].append(f)
to_remove.append((fid, f))
uploaded = [x for x in uploaded if x not in to_remove]
inuse = [y for x in sfiles.values() for y in x]
remaining.extend([f for _, f in uploaded if f not in inuse])
supp_file_types_len = len(supp_file_types)
for k, v in viewitems(sfiles):
len_files = len(v)
# if the number of files in the k group is larger than the
# available columns add to the remaining group, if not put them in
# the selected group
if len_files > supp_file_types_len:
remaining.extend(v)
message.append("'%s' has %d matches." % (k, len_files))
else:
v.sort()
selected.append(v)
else:
num_prefixes = 0
remaining = [f for _, f in uploaded]
# get file_types, format: filetype, required, list of files
file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)])
for i, (t, req) in enumerate(supp_file_types)]
# Create a list of artifacts that the user has access to, in case that
# he wants to import the files from another artifact
user = User(user_id)
artifact_options = []
user_artifacts = user.user_artifacts(artifact_type=artifact_type)
study = Study(study_id)
if study not in user_artifacts:
user_artifacts[study] = study.artifacts(artifact_type=artifact_type)
for study, artifacts in viewitems(user_artifacts):
study_label = "%s (%d)" % (study.title, study.id)
for a in artifacts:
artifact_options.append(
(a.id, "%s - %s (%d)" % (study_label, a.name, a.id)))
#.........这里部分代码省略.........
示例5: array_agg
# 需要导入模块: from qiita_db.metadata_template.prep_template import PrepTemplate [as 别名]
# 或者: from qiita_db.metadata_template.prep_template.PrepTemplate import to_dataframe [as 别名]
st_df = st.to_dataframe()[columns]
# converting to datetime
for col in columns:
st_df[col] = st_df[col].apply(transform_date)
st.update(st_df)
if cols_prep:
with TRN:
# a few notes: just getting the preps with duplicated values; ignoring
# column 'sample_id' and tables 'study_sample', 'prep_template',
# 'prep_template_sample'
sql = """SELECT table_name, array_agg(column_name::text)
FROM information_schema.columns
WHERE column_name IN %s
AND table_name LIKE 'prep_%%'
AND table_name NOT IN (
'prep_template', 'prep_template_sample')
GROUP BY table_name"""
# note that we are looking for those columns with duplicated names in
# the headers
TRN.add(sql, [tuple(set(cols_prep))])
for table, columns in viewitems(dict(TRN.execute_fetchindex())):
# [1] the format is table_# so taking the #
pt = PrepTemplate(int(table.split('_')[1]))
# getting just the columns of interest
pt_df = pt.to_dataframe()[columns]
# converting to datetime
for col in columns:
pt_df[col] = pt_df[col].apply(transform_date)
pt.update(pt_df)