本文整理汇总了Python中galaxy.datatypes.registry.Registry.load_datatypes方法的典型用法代码示例。如果您正苦于以下问题:Python Registry.load_datatypes方法的具体用法?Python Registry.load_datatypes怎么用?Python Registry.load_datatypes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类galaxy.datatypes.registry.Registry
的用法示例。
在下文中一共展示了Registry.load_datatypes方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __main__
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def __main__():
filename = sys.argv[1]
try:
max_file_size = int( sys.argv[2] )
except:
max_file_size = 0
job_params, params = load_input_parameters( filename )
if job_params is None: #using an older tabular file
enhanced_handling = False
job_params = dict( param_dict = params )
job_params[ 'output_data' ] = [ dict( out_data_name = 'output',
ext = 'data',
file_name = filename,
files_path = None ) ]
job_params[ 'job_config' ] = dict( GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, TOOL_PROVIDED_JOB_METADATA_FILE = TOOL_PROVIDED_JOB_METADATA_FILE )
else:
enhanced_handling = True
json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata
datatypes_registry = Registry()
datatypes_registry.load_datatypes( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
URL_method = params.get( 'URL_method', None )
simpleD = params.get('galaxyData')
# The Python support for fetching resources from the web is layered. urllib uses the httplib
# library, which in turn uses the socket library. As of Python 2.3 you can specify how long
# a socket should wait for a response before timing out. By default the socket module has no
# timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
# levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
# doing the following.
socket.setdefaulttimeout( 600 )
cur_filename = params.get('output')
outputfile = open( cur_filename, 'w' ).write( simpleD )
示例2: __main__
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def __main__():
if len(sys.argv) < 4:
print('usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...', file=sys.stderr)
sys.exit(1)
output_paths = parse_outputs(sys.argv[4:])
json_file = open('galaxy.json', 'w')
registry = Registry()
registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2])
for line in open(sys.argv[3], 'r'):
dataset = loads(line)
dataset = util.bunch.Bunch(**safe_dict(dataset))
try:
output_path = output_paths[int(dataset.dataset_id)][0]
except:
print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr)
sys.exit(1)
if dataset.type == 'composite':
files_path = output_paths[int(dataset.dataset_id)][1]
add_composite_file(dataset, json_file, output_path, files_path)
else:
add_file(dataset, registry, json_file, output_path)
# clean up paramfile
# TODO: this will not work when running as the actual user unless the
# parent directory is writable by the user.
try:
os.remove(sys.argv[3])
except:
pass
示例3: __main__
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def __main__():
if len(sys.argv) < 4:
print('usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...', file=sys.stderr)
sys.exit(1)
output_paths = parse_outputs(sys.argv[4:])
registry = Registry()
registry.load_datatypes(root_dir=sys.argv[1], config=sys.argv[2])
try:
datasets = __read_paramfile(sys.argv[3])
except (ValueError, AssertionError):
datasets = __read_old_paramfile(sys.argv[3])
metadata = []
for dataset in datasets:
dataset = bunch.Bunch(**safe_dict(dataset))
try:
output_path = output_paths[int(dataset.dataset_id)][0]
except Exception:
print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr)
sys.exit(1)
try:
if dataset.type == 'composite':
files_path = output_paths[int(dataset.dataset_id)][1]
metadata.append(add_composite_file(dataset, output_path, files_path))
else:
metadata.append(add_file(dataset, registry, output_path))
except UploadProblemException as e:
metadata.append(file_err(e.message, dataset))
__write_job_metadata(metadata)
示例4: __init__
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def __init__(self, config):
self.object_store = build_object_store_from_config(config)
# Setup the database engine and ORM
self.model = galaxy.config.init_models_from_config(config, object_store=self.object_store)
registry = Registry()
registry.load_datatypes()
galaxy.model.set_datatypes_registry(registry)
示例5: __main__
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def __main__():
filename = sys.argv[1]
try:
max_file_size = int( sys.argv[2] )
except:
max_file_size = 0
job_params, params = load_input_parameters( filename )
if job_params is None: #using an older tabular file
enhanced_handling = False
job_params = dict( param_dict = params )
job_params[ 'output_data' ] = [ dict( out_data_name = 'output',
ext = 'data',
file_name = filename,
extra_files_path = None ) ]
job_params[ 'job_config' ] = dict( GALAXY_ROOT_DIR=GALAXY_ROOT_DIR, GALAXY_DATATYPES_CONF_FILE=GALAXY_DATATYPES_CONF_FILE, TOOL_PROVIDED_JOB_METADATA_FILE = TOOL_PROVIDED_JOB_METADATA_FILE )
else:
enhanced_handling = True
json_file = open( job_params[ 'job_config' ][ 'TOOL_PROVIDED_JOB_METADATA_FILE' ], 'w' ) #specially named file for output junk to pass onto set metadata
datatypes_registry = Registry()
datatypes_registry.load_datatypes( root_dir = job_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = job_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
URL = params.get( 'URL', None ) #using exactly URL indicates that only one dataset is being downloaded
URL_method = params.get( 'URL_method', None )
# The Python support for fetching resources from the web is layered. urllib uses the httplib
# library, which in turn uses the socket library. As of Python 2.3 you can specify how long
# a socket should wait for a response before timing out. By default the socket module has no
# timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
# levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
# doing the following.
socket.setdefaulttimeout( 600 )
for data_dict in job_params[ 'output_data' ]:
cur_filename = data_dict.get( 'file_name', filename )
cur_URL = params.get( '%s|%s|URL' % ( GALAXY_PARAM_PREFIX, data_dict[ 'out_data_name' ] ), URL )
if not cur_URL:
open( cur_filename, 'w' ).write( "" )
stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )
# The following calls to urllib.urlopen() will use the above default timeout
try:
if not URL_method or URL_method == 'get':
page = urllib.urlopen( cur_URL )
elif URL_method == 'post':
page = urllib.urlopen( cur_URL, urllib.urlencode( params ) )
except Exception, e:
stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
if max_file_size:
file_size = int( page.info().get( 'Content-Length', 0 ) )
if file_size > max_file_size:
stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) )
#do sniff stream for multi_byte
try:
cur_filename, is_multi_byte = sniff.stream_to_open_named_file( page, os.open( cur_filename, os.O_WRONLY | os.O_CREAT ), cur_filename, source_encoding=get_charset_from_http_headers( page.headers ) )
except Exception, e:
stop_err( 'Unable to fetch %s:\n%s' % ( cur_URL, e ) )
示例6: collect_test_data
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def collect_test_data():
registry = Registry()
registry.load_datatypes(root_dir=GALAXY_ROOT, config=DATATYPES_CONFIG)
test_files = os.listdir(TEST_FILE_DIR)
files = [os.path.join(TEST_FILE_DIR, f) for f in test_files]
datatypes = [find_datatype(registry, f) for f in test_files]
uploadable = [datatype.file_ext in registry.upload_file_formats for datatype in datatypes]
test_data_description = [TEST_DATA(*items) for items in zip(files, datatypes, uploadable)]
return {os.path.basename(data.path): data for data in test_data_description}
示例7: __init__
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def __init__(self, config):
if config.database_connection is False:
config.database_connection = "sqlite:///%s?isolation_level=IMMEDIATE" % config.database
self.object_store = build_object_store_from_config(config)
# Setup the database engine and ORM
self.model = galaxy.model.mapping.init(config.file_path, config.database_connection, engine_options={}, create_tables=False, object_store=self.object_store)
registry = Registry()
registry.load_datatypes()
galaxy.model.set_datatypes_registry(registry)
示例8: __init__
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def __init__( self, datatypes_registry=None, ext='data', dbkey='?' ):
self.ext = self.extension = ext
self.dbkey = dbkey
if datatypes_registry is None:
# Default Value Required for unit tests
datatypes_registry = Registry()
datatypes_registry.load_datatypes()
self.datatype = datatypes_registry.get_datatype_by_extension( ext )
self._metadata = None
self.metadata = MetadataCollection( self )
示例9: sniff_and_handle_data_type
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def sniff_and_handle_data_type(json_params, output_file):
"""
The sniff.handle_uploaded_dataset_file() method in Galaxy performs dual
functions: it sniffs the filetype and if it's a compressed archive for
a non compressed datatype such as fasta, it will be unpacked.
"""
try:
datatypes_registry = Registry()
datatypes_registry.load_datatypes(
root_dir=json_params['job_config']['GALAXY_ROOT_DIR'],
config=json_params['job_config']['GALAXY_DATATYPES_CONF_FILE'])
file_type = sniff.handle_uploaded_dataset_file(
output_file,
datatypes_registry)
return file_type
except:
return None
示例10: main
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
args = _arg_parser().parse_args(argv)
registry = Registry()
registry.load_datatypes(root_dir=args.galaxy_root, config=args.datatypes_registry)
request_path = args.request
assert os.path.exists(request_path)
with open(request_path) as f:
request = json.load(f)
upload_config = UploadConfig(request, registry)
galaxy_json = _request_to_galaxy_json(upload_config, request)
with open("galaxy.json", "w") as f:
json.dump(galaxy_json, f)
示例11: download_from_genomespace_file_browser
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def download_from_genomespace_file_browser( json_parameter_file, genomespace_site ):
json_params = json.loads( open( json_parameter_file, 'r' ).read() )
datasource_params = json_params.get( 'param_dict' )
username = datasource_params.get( "gs-username", None )
token = datasource_params.get( "gs-token", None )
assert None not in [ username, token ], "Missing GenomeSpace username or token."
output_filename = datasource_params.get( "output", None )
dataset_id = json_params['output_data'][0]['dataset_id']
hda_id = json_params['output_data'][0]['hda_id']
url_opener = get_cookie_opener( username, token )
#load and set genomespace format ids to galaxy exts
genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ]
set_genomespace_format_identifiers( url_opener, genomespace_site_dict['dmServer'] )
file_url_prefix = "fileUrl"
file_type_prefix = "fileFormat"
metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' )
#setup datatypes registry for sniffing
datatypes_registry = Registry()
datatypes_registry.load_datatypes( root_dir = json_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = json_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
file_numbers = []
for name in datasource_params.keys():
if name.startswith( file_url_prefix ):
name = name[len( file_url_prefix ):]
file_numbers.append( int( name ) )
if not file_numbers:
if output_filename:
open( output_filename, 'wb' ) #erase contents of file
raise Exception( "You must select at least one file to import into Galaxy." )
file_numbers.sort()
used_filenames = []
for file_num in file_numbers:
url_key = "%s%i" % ( file_url_prefix, file_num )
download_url = datasource_params.get( url_key, None )
if download_url is None:
break
filetype_key = "%s%i" % ( file_type_prefix, file_num )
filetype_url = datasource_params.get( filetype_key, None )
galaxy_ext = get_galaxy_ext_from_genomespace_format_url( url_opener, filetype_url )
formated_download_url = "%s?%s" % ( download_url, urllib.urlencode( [ ( 'dataformat', filetype_url ) ] ) )
new_file_request = urllib2.Request( formated_download_url )
new_file_request.get_method = lambda: 'GET'
target_download_url = url_opener.open( new_file_request )
filename = None
if 'Content-Disposition' in target_download_url.info():
# If the response has Content-Disposition, try to get filename from it
content_disposition = dict( map( lambda x: x.strip().split('=') if '=' in x else ( x.strip(),'' ), target_download_url.info()['Content-Disposition'].split( ';' ) ) )
if 'filename' in content_disposition:
filename = content_disposition[ 'filename' ].strip( "\"'" )
if not filename:
parsed_url = urlparse.urlparse( download_url )
query_params = urlparse.parse_qs( parsed_url[4] )
filename = urllib.unquote_plus( parsed_url[2].split( '/' )[-1] )
if not filename:
filename = download_url
metadata_dict = None
original_filename = filename
if output_filename is None:
filename = ''.join( c in VALID_CHARS and c or '-' for c in filename )
while filename in used_filenames:
filename = "-%s" % filename
used_filenames.append( filename )
output_filename = os.path.join( os.getcwd(), 'primary_%i_%s_visible_%s' % ( hda_id, filename, galaxy_ext ) )
metadata_dict = dict( type = 'new_primary_dataset',
base_dataset_id = dataset_id,
ext = galaxy_ext,
filename = output_filename,
name = "GenomeSpace import on %s" % ( original_filename ) )
else:
if dataset_id is not None:
metadata_dict = dict( type = 'dataset',
dataset_id = dataset_id,
ext = galaxy_ext,
name = "GenomeSpace import on %s" % ( filename ) )
output_file = open( output_filename, 'wb' )
chunk_write( target_download_url, output_file )
output_file.close()
if ( galaxy_ext == AUTO_GALAXY_EXT or filetype_url == GENOMESPACE_FORMAT_IDENTIFIER_UNKNOWN ) and metadata_dict:
#try to sniff datatype
try:
galaxy_ext = sniff.handle_uploaded_dataset_file( output_filename, datatypes_registry )
except:
#sniff failed
galaxy_ext = original_filename.rsplit( '.', 1 )[-1]
if galaxy_ext not in datatypes_registry.datatypes_by_extension:
galaxy_ext = DEFAULT_GALAXY_EXT
metadata_dict[ 'ext' ] = galaxy_ext
output_filename = None #only have one filename available
#write out metadata info
if metadata_dict:
metadata_parameter_file.write( "%s\n" % json.dumps( metadata_dict ) )
metadata_parameter_file.close()
return True
示例12: download_from_genomespace_importer
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
def download_from_genomespace_importer( username, token, json_parameter_file, genomespace_site ):
json_params = simplejson.loads( open( json_parameter_file, 'r' ).read() )
datasource_params = json_params.get( 'param_dict' )
assert None not in [ username, token ], "Missing GenomeSpace username or token."
output_filename = datasource_params.get( "output_file1", None )
dataset_id = json_params['output_data'][0]['dataset_id']
hda_id = json_params['output_data'][0]['hda_id']
url_opener = get_cookie_opener( username, token )
#load and set genomespace format ids to galaxy exts
genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ]
set_genomespace_format_identifiers( url_opener, genomespace_site_dict['dmServer'] )
file_url_name = "URL"
metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' )
#setup datatypes registry for sniffing
datatypes_registry = Registry()
datatypes_registry.load_datatypes( root_dir = json_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config = json_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
url_param = datasource_params.get( file_url_name, None )
for download_url in url_param.split( ',' ):
using_temp_file = False
parsed_url = urlparse.urlparse( download_url )
query_params = urlparse.parse_qs( parsed_url[4] )
#write file to disk
new_file_request = urllib2.Request( download_url )
new_file_request.get_method = lambda: 'GET'
target_download_url = url_opener.open( new_file_request )
filename = None
if 'Content-Disposition' in target_download_url.info():
content_disposition = dict( map( lambda x: x.strip().split('=') if '=' in x else ( x.strip(),'' ), target_download_url.info()['Content-Disposition'].split( ';' ) ) )
if 'filename' in content_disposition:
filename = content_disposition[ 'filename' ].strip( "\"'" )
if not filename:
parsed_url = urlparse.urlparse( download_url )
query_params = urlparse.parse_qs( parsed_url[4] )
filename = urllib.unquote_plus( parsed_url[2].split( '/' )[-1] )
if output_filename is None:
#need to use a temp file here, because we do not know the ext yet
using_temp_file = True
output_filename = tempfile.NamedTemporaryFile( prefix='tmp-genomespace-importer-' ).name
output_file = open( output_filename, 'wb' )
chunk_write( target_download_url, output_file )
output_file.close()
#determine file format
file_type = None
if 'dataformat' in query_params: #this is a converted dataset
file_type = query_params[ 'dataformat' ][0]
file_type = get_galaxy_ext_from_genomespace_format_url( url_opener, file_type )
else:
try:
#get and use GSMetadata object
download_file_path = download_url.split( "%s/file/" % ( genomespace_site_dict['dmServer'] ), 1)[-1] #FIXME: This is a very bad way to get the path for determining metadata. There needs to be a way to query API using download URLto get to the metadata object
metadata_request = urllib2.Request( "%s/%s/filemetadata/%s" % ( genomespace_site_dict['dmServer'], GENOMESPACE_API_VERSION_STRING, download_file_path ) )
metadata_request.get_method = lambda: 'GET'
metadata_url = url_opener.open( metadata_request )
file_metadata_dict = simplejson.loads( metadata_url.read() )
metadata_url.close()
file_type = file_metadata_dict.get( 'dataFormat', None )
if file_type and file_type.get( 'url' ):
file_type = file_type.get( 'url' )
file_type = get_galaxy_ext_from_genomespace_format_url( url_opener, file_type, default = None )
except:
pass
if file_type is None:
#try to sniff datatype
try:
file_type = sniff.handle_uploaded_dataset_file( output_filename, datatypes_registry )
except:
pass #sniff failed
if file_type is None and '.' in parsed_url[2]:
#still no known datatype, fall back to using extension
file_type = parsed_url[2].rsplit( '.', 1 )[-1]
file_type = GENOMESPACE_EXT_TO_GALAXY_EXT.get( file_type, file_type )
if file_type is None:
#use default extension (e.g. 'data')
file_type = DEFAULT_GALAXY_EXT
#save json info for single primary dataset
if dataset_id is not None:
metadata_parameter_file.write( "%s\n" % simplejson.dumps( dict( type = 'dataset',
dataset_id = dataset_id,
ext = file_type,
name = "GenomeSpace importer on %s" % ( filename ) ) ) )
#if using tmp file, move the file to the new file path dir to get scooped up later
if using_temp_file:
shutil.move( output_filename, os.path.join( datasource_params['__new_file_path__'], 'primary_%i_output%s_visible_%s' % ( hda_id, ''.join( c in VALID_CHARS and c or '-' for c in filename ), file_type ) ) )
dataset_id = None #only one primary dataset available
output_filename = None #only have one filename available
metadata_parameter_file.close()
return True
示例13: Registry
# 需要导入模块: from galaxy.datatypes.registry import Registry [as 别名]
# 或者: from galaxy.datatypes.registry.Registry import load_datatypes [as 别名]
from sqlalchemy.exc import * # noqa
from sqlalchemy.sql import label # noqa
sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'lib')))
from galaxy.datatypes.registry import Registry
from galaxy.model import * # noqa
from galaxy.model import set_datatypes_registry # More explicit than `*` import
from galaxy.model.mapping import init
from galaxy.model.orm.scripts import get_config
if sys.version_info > (3,):
long = int
registry = Registry()
registry.load_datatypes()
set_datatypes_registry(registry)
db_url = get_config(sys.argv)['db_url']
sa_session = init('/tmp/', db_url).context
# Helper function for debugging sqlalchemy queries...
# http://stackoverflow.com/questions/5631078/sqlalchemy-print-the-actual-query
def printquery(statement, bind=None):
"""
Print a query, with values filled in
for debugging purposes *only*
for security, you should always separate queries from their values
please also note that this function is quite slow
"""
import sqlalchemy.orm