本文整理汇总了Python中pycbc.workflow.core.FileList类的典型用法代码示例。如果您正苦于以下问题:Python FileList类的具体用法?Python FileList怎么用?Python FileList使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了FileList类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_node
def create_node(self, parent, inj_trigs, inj_string, max_inc, segment):
node = Node(self)
trig_name = self.cp.get("workflow", "trigger-name")
node.add_opt("--inj-string", inj_string)
node.add_opt("--max-inclination", max_inc)
node.add_opt("--inj-cache", "%s" % parent.storage_path)
out_files = FileList([])
for inj_trig in inj_trigs:
out_string = inj_string.split(max_inc)[0]
out_file_tag = [out_string, "FILTERED", max_inc, inj_trig.tag_str.rsplit("_", 1)[-1]]
out_file = File(
self.ifos,
inj_trig.description,
inj_trig.segment,
extension="xml",
directory=self.out_dir,
tags=out_file_tag,
)
out_file.PFN(out_file.cache_entry.path, site="local")
out_files.append(out_file)
node.add_opt("--output-dir", self.out_dir)
return node, out_files
示例2: create_node
def create_node(self, parent, inj_trigs, inj_string, max_inc, segment):
node = Node(self)
trig_name = self.cp.get('workflow', 'trigger-name')
node.add_opt('--inj-string', inj_string)
node.add_opt('--max-inclination', max_inc)
node.add_opt('--inj-cache', '%s' % parent.storage_path)
out_files = FileList([])
for inj_trig in inj_trigs:
out_file_tag = [
inj_string, "FILTERED", max_inc,
inj_trig.tag_str.rsplit('_', 1)[-1]
]
out_file = File(
self.ifos,
inj_trig.description,
inj_trig.segment,
extension="xml",
directory=self.out_dir,
tags=out_file_tag)
out_file.PFN(out_file.cache_entry.path, site="local")
out_files.append(out_file)
node.add_opt('--output-dir', self.out_dir)
return node, out_files
示例3: setup_background_bins_inj
def setup_background_bins_inj(workflow, coinc_files, background_file, bank_file, out_dir, tags=None):
tags = [] if tags is None else tags
bins_exe = PyCBCDistributeBackgroundBins(workflow.cp, 'distribute_background_bins',
ifos=workflow.ifos, tags=tags, out_dir=out_dir)
statmap_exe = PyCBCStatMapInjExecutable(workflow.cp, 'statmap_inj',
ifos=workflow.ifos,
tags=tags, out_dir=out_dir)
cstat_exe = PyCBCCombineStatmap(workflow.cp, 'combine_statmap', ifos=workflow.ifos,
tags=tags, out_dir=out_dir)
background_bins = workflow.cp.get_opt_tags('workflow-coincidence', 'background-bins', tags).split(' ')
background_bins = [x for x in background_bins if x != '']
for inj_type in ['injinj', 'injfull', 'fullinj']:
bins_node = bins_exe.create_node(FileList(coinc_files[inj_type]), bank_file, background_bins, tags=tags + [inj_type])
workflow += bins_node
coinc_files[inj_type] = bins_node.output_files
stat_files = FileList([])
for i in range(len(background_bins)):
statnode = statmap_exe.create_node(FileList([coinc_files['injinj'][i]]), FileList([background_file[i]]),
FileList([coinc_files['injfull'][i]]), FileList([coinc_files['fullinj'][i]]),
tags=tags + ['BIN_%s' % i])
workflow += statnode
stat_files.append(statnode.output_files[0])
cstat_node = cstat_exe.create_node(stat_files, tags=tags)
workflow += cstat_node
return cstat_node.output_files[0]
示例4: convert_cachelist_to_filelist
def convert_cachelist_to_filelist(datafindcache_list):
"""
Take as input a list of glue.lal.Cache objects and return a pycbc FileList
containing all frames within those caches.
Parameters
-----------
datafindcache_list : list of glue.lal.Cache objects
The list of cache files to convert.
Returns
--------
datafind_filelist : FileList of frame File objects
The list of frame files.
"""
datafind_filelist = FileList([])
prev_file = None
for cache in datafindcache_list:
curr_ifo = cache.ifo
for frame in cache:
# Don't add a new workflow file entry for this frame if
# if is a duplicate. These are assumed to be returned in time
# order
if prev_file and prev_file.cache_entry.url == frame.url:
continue
currFile = File(curr_ifo, frame.description,
frame.segment, file_url=frame.url, use_tmp_subdirs=True)
currFile.PFN(frame.path, site='local')
datafind_filelist.append(currFile)
prev_file = currFile
return datafind_filelist
示例5: setup_background_bins
def setup_background_bins(workflow, coinc_files, bank_file, out_dir, tags=None):
tags = [] if tags is None else tags
bins_exe = PyCBCDistributeBackgroundBins(workflow.cp, 'distribute_background_bins',
ifos=workflow.ifos, tags=tags, out_dir=out_dir)
statmap_exe = PyCBCStatMapExecutable(workflow.cp, 'statmap',
ifos=workflow.ifos,
tags=tags, out_dir=out_dir)
cstat_exe = PyCBCCombineStatmap(workflow.cp, 'combine_statmap', ifos=workflow.ifos,
tags=tags, out_dir=out_dir)
background_bins = workflow.cp.get_opt_tags('workflow-coincidence', 'background-bins', tags).split(' ')
background_bins = [x for x in background_bins if x != '']
bins_node = bins_exe.create_node(coinc_files, bank_file, background_bins)
workflow += bins_node
stat_files = FileList([])
for i, coinc_file in enumerate(bins_node.output_files):
statnode = statmap_exe.create_node(FileList([coinc_file]), tags=tags + ['BIN_%s' % i])
workflow += statnode
stat_files.append(statnode.output_files[0])
stat_files[i].bin_name = bins_node.names[i]
cstat_node = cstat_exe.create_node(stat_files, tags=tags)
workflow += cstat_node
return cstat_node.output_files[0], stat_files
示例6: setup_psd_pregenerated
def setup_psd_pregenerated(workflow, tags=[]):
'''
Setup CBC workflow to use pregenerated psd files.
The file given in cp.get('workflow','pregenerated-psd-file-(ifo)') will
be used as the --psd-file argument to geom_nonspinbank, geom_aligned_bank
and pycbc_plot_psd_file.
Parameters
----------
workflow: pycbc.workflow.core.Workflow
An instanced class that manages the constructed workflow.
tags : list of strings
If given these tags are used to uniquely name and identify output files
that would be produced in multiple calls to this function.
Returns
--------
psd_files : pycbc.workflow.core.FileList
The FileList holding the gating files
'''
psd_files = FileList([])
cp = workflow.cp
global_seg = workflow.analysis_time
user_tag = "PREGEN_PSD"
# Check for one psd for all ifos
try:
pre_gen_file = cp.get_opt_tags('workflow-psd',
'psd-pregenerated-file', tags)
pre_gen_file = resolve_url(pre_gen_file)
file_url = urlparse.urljoin('file:',
urllib.pathname2url(pre_gen_file))
curr_file = File(workflow.ifos, user_tag, global_seg, file_url,
tags=tags)
curr_file.PFN(file_url, site='local')
psd_files.append(curr_file)
except ConfigParser.Error:
# Check for one psd per ifo
for ifo in workflow.ifos:
try:
pre_gen_file = cp.get_opt_tags('workflow-psd',
'psd-pregenerated-file-%s' % ifo.lower(),
tags)
pre_gen_file = resolve_url(pre_gen_file)
file_url = urlparse.urljoin('file:',
urllib.pathname2url(pre_gen_file))
curr_file = File(ifo, user_tag, global_seg, file_url,
tags=tags)
curr_file.PFN(file_url, site='local')
psd_files.append(curr_file)
except ConfigParser.Error:
# It's unlikely, but not impossible, that only some ifos
# will have pregenerated PSDs
logging.warn("No psd file specified for IFO %s." % (ifo,))
pass
return psd_files
示例7: convert_cachelist_to_filelist
def convert_cachelist_to_filelist(datafindcache_list):
"""
Take as input a list of glue.lal.Cache objects and return a pycbc FileList
containing all frames within those caches.
Parameters
-----------
datafindcache_list : list of glue.lal.Cache objects
The list of cache files to convert.
Returns
--------
datafind_filelist : FileList of frame File objects
The list of frame files.
"""
datafind_filelist = FileList([])
prev_file = None
for cache in datafindcache_list:
curr_ifo = cache.ifo
for frame in cache:
# Don't add a new workflow file entry for this frame if
# if is a duplicate. These are assumed to be returned in time
# order
if prev_file:
prev_name = prev_file.cache_entry.url.split('/')[-1]
this_name = frame.url.split('/')[-1]
if prev_name == this_name:
continue
# Pegasus doesn't like "localhost" in URLs.
frame.url = frame.url.replace('file://localhost','file://')
currFile = File(curr_ifo, frame.description,
frame.segment, file_url=frame.url, use_tmp_subdirs=True)
if frame.url.startswith('file://'):
currFile.PFN(frame.url, site='local')
if frame.url.startswith(
'file:///cvmfs/oasis.opensciencegrid.org/'):
# Datafind returned a URL valid on the osg as well
# so add the additional PFNs to allow OSG access.
currFile.PFN(frame.url, site='osg')
currFile.PFN(frame.url.replace(
'file:///cvmfs/oasis.opensciencegrid.org/',
'root://xrootd-local.unl.edu/user/'), site='osg')
currFile.PFN(frame.url.replace(
'file:///cvmfs/oasis.opensciencegrid.org/',
'gsiftp://red-gridftp.unl.edu/user/'), site='osg')
currFile.PFN(frame.url.replace(
'file:///cvmfs/oasis.opensciencegrid.org/',
'gsiftp://ldas-grid.ligo.caltech.edu/hdfs/'), site='osg')
else:
currFile.PFN(frame.url, site='notlocal')
datafind_filelist.append(currFile)
prev_file = currFile
return datafind_filelist
示例8: create_node
def create_node(self, trig_files=None, segment_dir=None, out_tags=[],
tags=[]):
node = Node(self)
if not trig_files:
raise ValueError("%s must be supplied with trigger files"
% self.name)
# Data options
pad_data = self.cp.get('inspiral', 'pad-data')
if pad_data is None:
raise ValueError("The option pad-data is a required option of "
"%s. Please check the ini file." % self.name)
num_trials = int(self.cp.get("trig_combiner", "num-trials"))
trig_name = self.cp.get('workflow', 'trigger-name')
if all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
self.cp.has_option_tag('inspiral', 'do-short-slides',
'coherent_no_injections'):
node.add_opt('--short-slides')
node.add_opt('--grb-name', trig_name)
node.add_opt('--pad-data', pad_data)
node.add_opt('--segment-length', self.cp.get('inspiral',
'segment-duration'))
node.add_opt('--ifo-tag', self.ifos)
node.add_opt('--user-tag', 'INSPIRAL')
# Set input / output options
node.add_input_list_opt('--input-files', trig_files)
node.add_opt('--segment-dir', segment_dir)
node.add_opt('--output-dir', self.out_dir)
out_files = FileList([])
for out_tag in out_tags:
out_file = File(self.ifos, 'INSPIRAL', trig_files[0].segment,
directory=self.out_dir, extension='xml.gz',
tags=["GRB%s" % trig_name, out_tag],
store_file=self.retain_files)
out_files.append(out_file)
for trial in range(1, num_trials + 1):
out_file = File(self.ifos, 'INSPIRAL', trig_files[0].segment,
directory=self.out_dir, extension='xml.gz',
tags=["GRB%s" % trig_name, "OFFTRIAL_%d" % trial],
store_file=self.retain_files)
out_files.append(out_file)
node.add_profile('condor', 'request_cpus', self.num_threads)
return node, out_files
示例9: setup_gate_pregenerated
def setup_gate_pregenerated(workflow, output_dir=None, tags=None):
'''
Setup CBC workflow to use pregenerated gating files.
The file given in cp.get('workflow','gating-file-(ifo)') will
be used as the --gating-file for all jobs for that ifo.
Parameters
----------
workflow: pycbc.workflow.core.Workflow
An instanced class that manages the constructed workflow.
output_dir : path string
The directory where data products will be placed.
tags : list of strings
If given these tags are used to uniquely name and identify output files
that would be produced in multiple calls to this function.
Returns
--------
gate_files : pycbc.workflow.core.FileList
The FileList holding the gating files
'''
if tags is None:
tags = []
gate_files = FileList([])
cp = workflow.cp
global_seg = workflow.analysis_time
user_tag = "PREGEN_GATE"
for ifo in workflow.ifos:
try:
pre_gen_file = cp.get_opt_tags('workflow-gating',
'gating-file-%s' % ifo.lower(),
tags)
pre_gen_file = resolve_url(pre_gen_file,
os.path.join(os.getcwd(),output_dir))
file_url = urlparse.urljoin('file:',
urllib.pathname2url(pre_gen_file))
curr_file = File(ifo, user_tag, global_seg, file_url,
tags=tags)
curr_file.PFN(file_url, site='local')
gate_files.append(curr_file)
logging.info("Using gating file %s for %s", file_url, ifo)
except ConfigParser.Error:
logging.info("No gating file specified for %s", ifo)
return gate_files
示例10: setup_gate_pregenerated
def setup_gate_pregenerated(workflow, tags=[]):
'''
Setup CBC workflow to use pregenerated gating files.
The file given in cp.get('workflow','pregenerated-gating-file-(ifo)') will
be used as the --gating-file for all matched-filtering jobs for that ifo.
Parameters
----------
workflow: pycbc.workflow.core.Workflow
An instanced class that manages the constructed workflow.
tags : list of strings
If given these tags are used to uniquely name and identify output files
that would be produced in multiple calls to this function.
Returns
--------
gate_files : pycbc.workflow.core.FileList
The FileList holding the gating files
'''
gate_files = FileList([])
cp = workflow.cp
global_seg = workflow.analysis_time
user_tag = "PREGEN_GATE"
for ifo in workflow.ifos:
try:
pre_gen_file = cp.get_opt_tags('workflow-gating',
'gating-pregenerated-file-%s' % ifo.lower(),
tags)
pre_gen_file = resolve_url(pre_gen_file)
file_url = urlparse.urljoin('file:',
urllib.pathname2url(pre_gen_file))
curr_file = File(ifo, user_tag, global_seg, file_url,
tags=tags)
curr_file.PFN(file_url, site='local')
gate_files.append(curr_file)
except ConfigParser.Error:
# It's unlikely, but not impossible, that only some ifos
# will be gated
logging.warn("No gating file specified for IFO %s." % (ifo,))
pass
return gate_files
示例11: make_gating_node
def make_gating_node(workflow, datafind_files, outdir=None, tags=None):
'''
Generate jobs for autogating the data for PyGRB runs.
Parameters
----------
workflow: pycbc.workflow.core.Workflow
An instanced class that manages the constructed workflow.
datafind_files : pycbc.workflow.core.FileList
A FileList containing the frame files to be gated.
outdir : string
Path of the output directory
tags : list of strings
If given these tags are used to uniquely name and identify output files
that would be produced in multiple calls to this function.
Returns
--------
condition_strain_nodes : list
List containing the pycbc.workflow.core.Node objects representing the
autogating jobs.
condition_strain_outs : pycbc.workflow.core.FileList
FileList containing the pycbc.workflow.core.File objects representing
the gated frame files.
'''
cp = workflow.cp
if tags is None:
tags = []
condition_strain_class = select_generic_executable(workflow,
"condition_strain")
condition_strain_nodes = []
condition_strain_outs = FileList([])
for ifo in workflow.ifos:
input_files = FileList([datafind_file for datafind_file in \
datafind_files if datafind_file.ifo == ifo])
condition_strain_jobs = condition_strain_class(cp, "condition_strain",
ifo=ifo, out_dir=outdir, tags=tags)
condition_strain_node, condition_strain_out = \
condition_strain_jobs.create_node(input_files, tags=tags)
condition_strain_nodes.append(condition_strain_node)
condition_strain_outs.extend(FileList([condition_strain_out]))
return condition_strain_nodes, condition_strain_outs
示例12: setup_minifollowups
def setup_minifollowups(workflow, out_dir, frame_files,
coinc_file, tmpltbank_file, data_type, tags=None):
''' This performs a series of followup jobs on the num_events-th loudest
events.
'''
logging.info('Entering minifollowups module')
if tags == None: tags = []
# create a FileList that will contain all output files
output_filelist = FileList([])
# check if minifollowups section exists
# if not then do not do add minifollowup jobs to the workflow
if not workflow.cp.has_section('workflow-minifollowups'):
logging.info('There is no [workflow-minifollowups] section in configuration file')
logging.info('Leaving minifollowups')
return output_filelist
# loop over number of loudest events to be followed up
num_events = int(workflow.cp.get_opt_tags('workflow-minifollowups', 'num-events', ''))
for num_event in range(num_events):
# increment by 1 for human readability
num_event += 1
# get output directory for this event
tag_str = '_'.join(tags)
output_dir = out_dir['result/loudest_event_%d_of_%d_%s'%(num_event, num_events, tag_str)]
# make a pycbc_mf_table node for this event
table_exe = MinifollowupsTableExecutable(workflow.cp, 'mf_table',
workflow.ifo_string, output_dir, tags=tags)
table_node = table_exe.create_node(workflow.analysis_time, coinc_file,
tmpltbank_file, data_type, num_event)
workflow.add_node(table_node)
output_filelist.extend(table_node.output_files)
logging.info('Leaving minifollowups module')
return output_filelist
示例13: convert_cachelist_to_filelist
def convert_cachelist_to_filelist(datafindcache_list):
"""
Take as input a list of glue.lal.Cache objects and return a pycbc FileList
containing all frames within those caches.
Parameters
-----------
datafindcache_list : list of glue.lal.Cache objects
The list of cache files to convert.
Returns
--------
datafind_filelist : FileList of frame File objects
The list of frame files.
"""
datafind_filelist = FileList([])
prev_file = None
for cache in datafindcache_list:
curr_ifo = cache.ifo
for frame in cache:
# Don't add a new workflow file entry for this frame if
# if is a duplicate. These are assumed to be returned in time
# order
if prev_file:
prev_name = prev_file.cache_entry.url.split('/')[-1]
this_name = frame.url.split('/')[-1]
if prev_name == this_name:
continue
# Pegasus doesn't like "localhost" in URLs.
frame.url = frame.url.replace('file://localhost','file://')
currFile = File(curr_ifo, frame.description,
frame.segment, file_url=frame.url, use_tmp_subdirs=True)
if frame.url.startswith('file://'):
currFile.PFN(frame.url, site='local')
else:
currFile.PFN(frame.url, site='notlocal')
datafind_filelist.append(currFile)
prev_file = currFile
return datafind_filelist
示例14: datafind_keep_unique_backups
def datafind_keep_unique_backups(backup_outs, orig_outs):
"""This function will take a list of backup datafind files, presumably
obtained by querying a remote datafind server, e.g. CIT, and compares
these against a list of original datafind files, presumably obtained by
querying the local datafind server. Only the datafind files in the backup
list that do not appear in the original list are returned. This allows us
to use only files that are missing from the local cluster.
Parameters
-----------
backup_outs : FileList
List of datafind files from the remote datafind server.
orig_outs : FileList
List of datafind files from the local datafind server.
Returns
--------
FileList
List of datafind files in backup_outs and not in orig_outs.
"""
# NOTE: This function is not optimized and could be made considerably
# quicker if speed becomes in issue. With 4s frame files this might
# be slow, but for >1000s files I don't foresee any issue, so I keep
# this simple.
return_list = FileList([])
# We compare the LFNs to determine uniqueness
# Is there a way to associate two paths with one LFN??
orig_names = [f.name for f in orig_outs]
for file in backup_outs:
if file.name not in orig_names:
return_list.append(file)
else:
index_num = orig_names.index(file.name)
orig_out = orig_outs[index_num]
pfns = list(file.pfns)
# This shouldn't happen, but catch if it does
assert len(pfns) == 1
orig_out.PFN(pfns[0].url, site="notlocal")
return return_list
示例15: setup_segment_gen_mixed
def setup_segment_gen_mixed(workflow, veto_categories, out_dir,
maxVetoAtRunTime, tag=None,
generate_coincident_segs=True):
"""
This function will generate veto files for each ifo and for each veto
category.
It can generate these vetoes at run-time or in the workflow (or do some at
run-time and some in the workflow). However, the CAT_1 vetoes and science
time must be generated at run time as they are needed to plan the workflow.
CATs 2 and higher *may* be needed for other workflow construction.
It can also combine these files to create a set of cumulative,
multi-detector veto files, which can be used in ligolw_thinca and in
pipedown. Again these can be created at run time or within the workflow.
Parameters
-----------
workflow : pycbc.workflow.core.Workflow
The Workflow instance that the coincidence jobs will be added to.
This instance also contains the ifos for which to attempt to obtain
segments for this analysis and the start and end times to search for
segments over.
veto_categories : list of ints
List of veto categories to generate segments for. If this stops being
integers, this can be changed here.
out_dir : path
The directory in which output will be stored.
maxVetoAtRunTime : int
Generate veto files at run time up to this category. Veto categories
beyond this in veto_categories will be generated in the workflow.
If we move to a model where veto
categories are not explicitly cumulative, this will be rethought.
tag : string, optional (default=None)
Use this to specify a tag. This can be used if this module is being
called more than once to give call specific configuration (by setting
options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This
is also used to tag the Files returned by the class to uniqueify
the Files and uniqueify the actual filename.
FIXME: Filenames may not be unique with current codes!
generate_coincident_segs : boolean, optional (default = True)
If given this module will generate a set of coincident, cumulative veto
files that can be used with ligolw_thinca and pipedown.
Returns
-------
segFilesList : dictionary of pycbc.workflow.core.SegFile instances
These are representations of the various segment files that were
constructed
at this stage of the workflow and may be needed at later stages of the
analysis (e.g. for performing DQ vetoes). If the file was generated at
run-time the segment lists contained within these files will be an
attribute
of the instance. (If it will be generated in the workflow it will
not be because I am not psychic).
"""
cp = workflow.cp
segFilesList = FileList([])
start_time = workflow.analysis_time[0]
end_time = workflow.analysis_time[1]
segValidSeg = workflow.analysis_time
# Will I need to add some jobs to the workflow?
vetoGenJob = create_segs_from_cats_job(cp, out_dir, workflow.ifo_string)
for ifo in workflow.ifos:
logging.info("Generating science segments for ifo %s" %(ifo))
currSciSegs, currSciXmlFile = get_science_segments(ifo, cp, start_time,
end_time, out_dir, tag=tag)
segFilesList.append(currSciXmlFile)
for category in veto_categories:
if category > maxVetoAtRunTime:
msg = "Adding creation of CAT_%d segments " %(category)
msg += "for ifo %s to workflow." %(ifo)
logging.info(msg)
execute_status = False
if category <= maxVetoAtRunTime:
logging.info("Generating CAT_%d segments for ifo %s." \
%(category,ifo))
execute_status = True
currVetoXmlFile = get_veto_segs(workflow, ifo, category,
start_time, end_time, out_dir,
vetoGenJob,
execute_now=execute_status)
segFilesList.append(currVetoXmlFile)
# Store the CAT_1 veto segs for use below
if category == 1:
# Yes its yucky to generate a file and then read it back in.
#This will be
# fixed when the new API for segment generation is ready.
vetoXmlFP = open(currVetoXmlFile.storage_path, 'r')
cat1Segs = fromsegmentxml(vetoXmlFP)
vetoXmlFP.close()
analysedSegs = currSciSegs - cat1Segs
analysedSegs.coalesce()
analysedXmlFile = os.path.join(out_dir,
"%s-SCIENCE_OK_SEGMENTS.xml" %(ifo.upper()) )
currUrl = urlparse.urlunparse(['file', 'localhost', analysedXmlFile,
#.........这里部分代码省略.........