当前位置: 首页>>代码示例>>Python>>正文


Python programs.docker_call函数代码示例

本文整理汇总了Python中toil_scripts.lib.programs.docker_call函数的典型用法代码示例。如果您正苦于以下问题:Python docker_call函数的具体用法?Python docker_call怎么用?Python docker_call使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了docker_call函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_reference_index

def create_reference_index(job, ref_id):
    """
    Uses Samtools to create reference index file (.fasta.fai)

    ref_id: str     The fileStore ID of the reference
    """
    work_dir = job.fileStore.getLocalTempDir()
    # Retrieve file path to reference
    try:
        job.fileStore.readGlobalFile(ref_id, os.path.join(work_dir, 'ref.fa'))  
    except:
        sys.stderr.write("Failed when reading global file %s to %s. Retrying with dict index." % (ref_id,
                                                                                                  os.path.join(work_dir, 'ref.fa')))
        
        try:
            job.fileStore.readGlobalFile(ref_id['ref.fa'], os.path.join(work_dir, 'ref.fa'))  
        except:
            sys.stderr.write("Reading %s on retry failed." % ref_id['ref.fa'])
            raise

    # Call: Samtools
    command = ['faidx', 'ref.fa']
    docker_call(work_dir=work_dir, parameters=command,
                tool='quay.io/ucsc_cgl/samtools:0.1.19--dd5ac549b95eb3e5d166a5e310417ef13651994e',
                inputs=['ref.fa'],
                outputs={'ref.fa.fai': None})
    output = os.path.join(work_dir, 'ref.fa.fai')
    assert os.path.exists(output)
    # Write to fileStore
    return job.fileStore.writeGlobalFile(output)
开发者ID:ImRichardLiu,项目名称:toil-scripts,代码行数:30,代码来源:gatk_preprocessing.py

示例2: start

    def start(self, fileStore):
        """
        Start spark and hdfs master containers

        fileStore: Unused
        """
        
        self.IP = check_output(["hostname", "-f",])[:-1]

        _log.info("Started Spark master container.")
        self.sparkContainerID = docker_call(tool = "quay.io/ucsc_cgl/apache-spark-master:1.5.2",
                                            docker_parameters = ["--net=host",
                                                                 "-d",
                                                                 "-v", "/mnt/ephemeral/:/ephemeral/:rw",
                                                                 "-e", "SPARK_MASTER_IP="+self.IP,
                                                                 "-e", "SPARK_LOCAL_DIRS=/ephemeral/spark/local",
                                                                 "-e", "SPARK_WORKER_DIR=/ephemeral/spark/work"],
                                            rm=False,
                                            sudo = self.sudo,
                                            check_output = True,
                                            mock = False)[:-1]
        _log.info("Started HDFS Datanode.")
        self.hdfsContainerID = docker_call(tool = "quay.io/ucsc_cgl/apache-hadoop-master:2.6.2",
                                           docker_parameters = ["--net=host",
                                                                "-d"],
                                           parameters = [self.IP],
                                           rm=False,
                                           sudo = self.sudo,
                                           check_output = True,
                                           mock = False)[:-1]
        return self.IP
开发者ID:jsteward2930,项目名称:toil-scripts,代码行数:31,代码来源:spawn_cluster.py

示例3: index

def index(job, shared_ids, input_args):
    """
    Index sample bam using samtools, calls haplotypeCaller.

    :param job: Job instance
    :param shared_ids: dictionary of shared file promises
    :param input_args: dictionary of input arguments
    """
    work_dir = job.fileStore.getLocalTempDir()
    # Retrieve file path
    # FIXME: unused variable
    bam_path = return_input_paths(job, work_dir, shared_ids, 'toil.bam')
    output_path = os.path.join(work_dir, 'toil.bam.bai')
    # Call: index the normal.bam
    parameters = ['index', 'toil.bam']
    inputs=['toil.bam']
    outputs={'toil.bam.bai': None}
    docker_call(work_dir = work_dir,
                parameters = parameters,
                tool = 'quay.io/ucsc_cgl/samtools',
                inputs=inputs,
                outputs=outputs,
                sudo = input_args['sudo'])
    # Update FileStore and call child
    shared_ids['toil.bam.bai'] = job.fileStore.writeGlobalFile(output_path)
    job.addChildJobFn(haplotype_caller, shared_ids, input_args, cores = input_args['cpu_count'])
开发者ID:jsteward2930,项目名称:toil-scripts,代码行数:26,代码来源:germline.py

示例4: print_reads

def print_reads(job, cores, table, indel_bam, indel_bai, ref, ref_dict, fai, mem):
    """
    Creates BAM that has had the base quality scores recalibrated

    :param JobFunctionWrappingJob job: passed automatically by Toil
    :param int cores: Maximum number of cores on host node
    :param str table: Recalibration table FileStoreID
    :param str indel_bam: Indel interval FileStoreID
    :param str indel_bai: Bam Index FileStoreID
    :param str ref: Reference genome FileStoreID
    :param str ref_dict: Reference dictionary FileStoreID
    :param str fai: Reference index FileStoreID
    :param str mem: Memory value to be passed to children. Needed for CI tests
    :return: FileStoreID for the processed bam
    :rtype: str
    """
    work_dir = job.fileStore.getLocalTempDir()
    file_ids = [ref, fai, ref_dict, table, indel_bam, indel_bai]
    file_names = ['ref.fasta', 'ref.fasta.fai', 'ref.dict', 'sample.recal.table',
                  'sample.indel.bam', 'sample.indel.bai']
    for file_store_id, name in zip(file_ids, file_names):
        job.fileStore.readGlobalFile(file_store_id, os.path.join(work_dir, name))
    # Call: GATK -- PrintReads
    parameters = ['-T', 'PrintReads',
                  '-nct', str(cores),
                  '-R', '/data/ref.fasta',
                  '--emit_original_quals',
                  '-I', '/data/sample.indel.bam',
                  '-BQSR', '/data/sample.recal.table',
                  '-o', '/data/sample.bqsr.bam']
    docker_call(tool='quay.io/ucsc_cgl/gatk:3.4--dd5ac549b95eb3e5d166a5e310417ef13651994e',
                work_dir=work_dir, parameters=parameters, env=dict(JAVA_OPTS='-Xmx{}'.format(mem)))
    # Write ouptut to file store
    bam_id = job.fileStore.writeGlobalFile(os.path.join(work_dir, 'sample.bqsr.bam'))
    return bam_id
开发者ID:jsteward2930,项目名称:toil-scripts,代码行数:35,代码来源:exome_variant_pipeline.py

示例5: base_recalibration

def base_recalibration(job, shared_ids, input_args):
    """
    Creates recal table to perform Base Quality Score Recalibration

    job_vars: tuple     Contains the input_args and ids dictionaries
    sample: str         Either "normal" or "tumor" to track which one is which
    """
    # Unpack convenience variables for job
    work_dir = job.fileStore.getLocalTempDir()
    # Retrieve input file paths
    return_input_paths(job, work_dir, shared_ids, 'ref.fa', 'sample.indel.bam',
                       'dbsnp.vcf', 'ref.fa.fai',
                       'ref.dict', 'sample.indel.bam.bai')
    # Output file path
    output = os.path.join(work_dir, 'sample.recal.table')
    # Call: GATK -- IndelRealigner
    parameters = ['-U', 'ALLOW_SEQ_DICT_INCOMPATIBILITY', # RISKY! (?) See #189
                  '-T', 'BaseRecalibrator',
                  '-nct', str(input_args.cpu_count),
                  '-R', 'ref.fa',
                  '-I', 'sample.indel.bam',
                  '-knownSites', 'dbsnp.vcf',
                  '-o', 'sample.recal.table']
    docker_call(tool='quay.io/ucsc_cgl/gatk:3.5--dba6dae49156168a909c43330350c6161dc7ecc2',
                work_dir=work_dir, parameters=parameters,
                inputs=['ref.fa', 'sample.indel.bam', 'dbsnp.vcf', 'ref.fa.fai',
                        'ref.dict', 'sample.indel.bam.bai'],
                outputs={'sample.recal.table': None},
                env={'JAVA_OPTS':'-Xmx%sg' % input_args.memory})
    # Write to fileStore
    shared_ids['sample.recal.table'] = job.fileStore.writeGlobalFile(output)
    job.addChildJobFn(print_reads, shared_ids, input_args, cores = input_args.cpu_count)
开发者ID:ImRichardLiu,项目名称:toil-scripts,代码行数:32,代码来源:gatk_preprocessing.py

示例6: mark_dups_sample

def mark_dups_sample(job, shared_ids, input_args):
    """
    Uses picardtools MarkDuplicates
    """
    work_dir = job.fileStore.getLocalTempDir()
    # Retrieve file path
    read_from_filestore(job, work_dir, shared_ids, 'sample.sorted.bam')
    outpath = os.path.join(work_dir, 'sample.mkdups.bam')
    # Call: picardtools
    command = ['MarkDuplicates',
               'INPUT=sample.sorted.bam',
               'OUTPUT=sample.mkdups.bam',
               'METRICS_FILE=metrics.txt',
               'ASSUME_SORTED=true',
               'CREATE_INDEX=true']
    docker_call(work_dir=work_dir, parameters=command,
                env={'JAVA_OPTS':'-Xmx%sg' % input_args.memory},
                tool='quay.io/ucsc_cgl/picardtools:1.95--dd5ac549b95eb3e5d166a5e310417ef13651994e',
                inputs=['sample.sorted.bam'],
                outputs={'sample.mkdups.bam': None, 'sample.mkdups.bai': None})
    shared_ids['sample.mkdups.bam'] = job.fileStore.writeGlobalFile(outpath)

    # picard writes the index for file.bam at file.bai, not file.bam.bai
    _move_bai(outpath)
    shared_ids['sample.mkdups.bam.bai'] = job.fileStore.writeGlobalFile(outpath + ".bai")
    job.addChildJobFn(realigner_target_creator, shared_ids, input_args, cores = input_args.cpu_count)
开发者ID:ImRichardLiu,项目名称:toil-scripts,代码行数:26,代码来源:gatk_preprocessing.py

示例7: run_rsem_postprocess

def run_rsem_postprocess(job, uuid, rsem_gene_id, rsem_isoform_id):
    """
    Parses RSEMs output to produce the separate .tab files (TPM, FPKM, counts) for both gene and isoform.
    These are two-column files: Genes and Quantifications.
    HUGO files are also provided that have been mapped from Gencode/ENSEMBLE names.

    :param JobFunctionWrappingJob job: passed automatically by Toil
    :param str uuid: UUID to mark the samples with
    :param str rsem_gene_id: FileStoreID of rsem_gene_ids
    :param str rsem_isoform_id: FileStoreID of rsem_isoform_ids
    :return: FileStoreID from RSEM post process tarball
    :rytpe: str
    """
    work_dir = job.fileStore.getLocalTempDir()
    # I/O
    job.fileStore.readGlobalFile(rsem_gene_id, os.path.join(work_dir, 'rsem_gene.tab'))
    job.fileStore.readGlobalFile(rsem_isoform_id, os.path.join(work_dir, 'rsem_isoform.tab'))
    # Convert RSEM files into individual .tab files.
    docker_call(tool='jvivian/rsem_postprocess', parameters=[uuid], work_dir=work_dir)
    os.rename(os.path.join(work_dir, 'rsem_gene.tab'), os.path.join(work_dir, 'rsem_genes.results'))
    os.rename(os.path.join(work_dir, 'rsem_isoform.tab'), os.path.join(work_dir, 'rsem_isoforms.results'))
    output_files = ['rsem.genes.norm_counts.tab', 'rsem.genes.raw_counts.tab', 'rsem.isoform.norm_counts.tab',
                    'rsem.isoform.raw_counts.tab', 'rsem_genes.results', 'rsem_isoforms.results']
    # Perform HUGO gene / isoform name mapping
    genes = [x for x in output_files if 'rsem.genes' in x]
    isoforms = [x for x in output_files if 'rsem.isoform' in x]
    command = ['-g'] + genes + ['-i'] + isoforms
    docker_call(tool='jvivian/gencode_hugo_mapping', parameters=command, work_dir=work_dir)
    hugo_files = [os.path.splitext(x)[0] + '.hugo' + os.path.splitext(x)[1] for x in genes + isoforms]
    # Create tarballs for outputs
    tarball_files('rsem.tar.gz', file_paths=[os.path.join(work_dir, x) for x in output_files], output_dir=work_dir)
    tarball_files('rsem_hugo.tar.gz', [os.path.join(work_dir, x) for x in hugo_files], output_dir=work_dir)
    rsem_id = job.fileStore.writeGlobalFile(os.path.join(work_dir, 'rsem.tar.gz'))
    hugo_id = job.fileStore.writeGlobalFile(os.path.join(work_dir, 'rsem_hugo.tar.gz'))
    return rsem_id, hugo_id
开发者ID:jsteward2930,项目名称:toil-scripts,代码行数:35,代码来源:quantifiers.py

示例8: realigner_target_creator

def realigner_target_creator(job, shared_ids, input_args):
    """
    Creates <type>.intervals file needed for indel realignment

    job_vars: tuple     Contains the input_args and ids dictionaries
    sample: str         Either "normal" or "tumor" to track which one is which
    """
    work_dir = job.fileStore.getLocalTempDir()
    # Retrieve input file paths
    read_from_filestore(job, work_dir, shared_ids, 'ref.fa',
                        'sample.mkdups.bam', 'ref.fa.fai', 'ref.dict',
                        'sample.mkdups.bam.bai', 'phase.vcf', 'mills.vcf')

    # Output file path
    output = os.path.join(work_dir, 'sample.intervals')
    # Call: GATK -- RealignerTargetCreator
    parameters = ['-U', 'ALLOW_SEQ_DICT_INCOMPATIBILITY', # RISKY! (?) See #189
                  '-T', 'RealignerTargetCreator',
                  '-nt', str(input_args.cpu_count),
                  '-R', 'ref.fa',
                  '-I', 'sample.mkdups.bam',
                  '-known', 'phase.vcf',
                  '-known', 'mills.vcf',
                  '--downsampling_type', 'NONE',
                  '-o', 'sample.intervals']

    docker_call(work_dir=work_dir, parameters=parameters,
                tool='quay.io/ucsc_cgl/gatk:3.5--dba6dae49156168a909c43330350c6161dc7ecc2',
                inputs=['ref.fa','sample.mkdups.bam', 'ref.fa.fai', 'ref.dict',
                        'sample.mkdups.bam.bai', 'phase.vcf', 'mills.vcf'],
                outputs={'sample.intervals': None},
                env={'JAVA_OPTS':'-Xmx%sg' % input_args.memory})
    shared_ids['sample.intervals'] = job.fileStore.writeGlobalFile(output)
    job.addChildJobFn(indel_realignment, shared_ids, input_args)
开发者ID:ImRichardLiu,项目名称:toil-scripts,代码行数:34,代码来源:gatk_preprocessing.py

示例9: call_adam

def call_adam(master_ip, arguments, memory=None, override_parameters=None):
    """
    Invokes the ADAM container. Find ADAM at https://github.com/bigdatagenomics/adam.

    :param masterIP: The Spark leader IP address.
    :param arguments: Arguments to pass to ADAM.
    :param memory: Gigabytes of memory to provision for Spark driver/worker.
    :param override_parameters: Parameters passed by the user, that override our defaults.

    :type masterIP: MasterAddress
    :type arguments: list of string
    :type memory: int or None
    :type override_parameters: list of string or None
    """
    default_params = ["--conf", "spark.driver.maxResultSize=0"] # set max result size to unlimited, see #177

    docker_call(rm=False,
                tool="quay.io/ucsc_cgl/adam:962-ehf--6e7085f8cac4b9a927dc9fb06b48007957256b80",
                docker_parameters=master_ip.docker_parameters(["--net=host"]),
                parameters=_make_parameters(master_ip,
                                            default_params,
                                            memory,
                                            arguments,
                                            override_parameters),
                mock=False)
开发者ID:ImRichardLiu,项目名称:toil-scripts,代码行数:25,代码来源:spark_tools.py

示例10: run_kallisto

def run_kallisto(job, cores, r1_id, r2_id, kallisto_index_url):
    """
    RNA quantification via Kallisto

    :param JobFunctionWrappingJob job: passed automatically by Toil
    :param int cores: Number of cores to run Kallisto with
    :param str r1_id: FileStoreID of fastq (pair 1)
    :param str r2_id: FileStoreID of fastq (pair 2 if applicable, otherwise pass None for single-end)
    :param str kallisto_index_url: FileStoreID for Kallisto index file
    :return: FileStoreID from Kallisto output
    :rtype: str
    """
    work_dir = job.fileStore.getLocalTempDir()
    download_url(url=kallisto_index_url, name='kallisto_hg38.idx', work_dir=work_dir)
    # Retrieve files
    parameters = ['quant',
                  '-i', '/data/kallisto_hg38.idx',
                  '-t', str(cores),
                  '-o', '/data/',
                  '-b', '100']
    if r1_id and r2_id:
        job.fileStore.readGlobalFile(r1_id, os.path.join(work_dir, 'R1_cutadapt.fastq'))
        job.fileStore.readGlobalFile(r2_id, os.path.join(work_dir, 'R2_cutadapt.fastq'))
        parameters.extend(['/data/R1_cutadapt.fastq', '/data/R2_cutadapt.fastq'])
    else:
        job.fileStore.readGlobalFile(r1_id, os.path.join(work_dir, 'R1_cutadapt.fastq'))
        parameters.extend(['--single', '-l', '200', '-s', '15', '/data/R1_cutadapt.fastq'])

    # Call: Kallisto
    docker_call(tool='quay.io/ucsc_cgl/kallisto:0.42.4--35ac87df5b21a8e8e8d159f26864ac1e1db8cf86',
                work_dir=work_dir, parameters=parameters)
    # Tar output files together and store in fileStore
    output_files = [os.path.join(work_dir, x) for x in ['run_info.json', 'abundance.tsv', 'abundance.h5']]
    tarball_files(tar_name='kallisto.tar.gz', file_paths=output_files, output_dir=work_dir)
    return job.fileStore.writeGlobalFile(os.path.join(work_dir, 'kallisto.tar.gz'))
开发者ID:jsteward2930,项目名称:toil-scripts,代码行数:35,代码来源:quantifiers.py

示例11: base_recalibration

def base_recalibration(job, cores, indel_bam, indel_bai, ref, ref_dict, fai, dbsnp, mem):
    """
    Creates recal table used in Base Quality Score Recalibration

    :param JobFunctionWrappingJob job: passed automatically by Toil
    :param int cores: Maximum number of cores on a worker node
    :param str indel_bam: Indel interval FileStoreID
    :param str indel_bai: Bam Index FileStoreID
    :param str ref: Reference genome FileStoreID
    :param str ref_dict: Reference dictionary FileStoreID
    :param str fai: Reference index FileStoreID
    :param str dbsnp: DBSNP VCF FileStoreID
    :param str mem: Memory value to be passed to children. Needed for CI tests
    :return: FileStoreID for the processed bam
    :rtype: str
    """
    work_dir = job.fileStore.getLocalTempDir()
    file_ids = [ref, fai, ref_dict, indel_bam, indel_bai, dbsnp]
    file_names = ['ref.fasta', 'ref.fasta.fai', 'ref.dict', 'sample.indel.bam', 'sample.indel.bai', 'dbsnp.vcf']
    for file_store_id, name in zip(file_ids, file_names):
        job.fileStore.readGlobalFile(file_store_id, os.path.join(work_dir, name))
    # Call: GATK -- IndelRealigner
    parameters = ['-T', 'BaseRecalibrator',
                  '-nct', str(cores),
                  '-R', '/data/ref.fasta',
                  '-I', '/data/sample.indel.bam',
                  '-knownSites', '/data/dbsnp.vcf',
                  '-o', '/data/sample.recal.table']
    docker_call(tool='quay.io/ucsc_cgl/gatk:3.4--dd5ac549b95eb3e5d166a5e310417ef13651994e',
                work_dir=work_dir, parameters=parameters, env=dict(JAVA_OPTS='-Xmx{}'.format(mem)))
    # Write output to file store
    table = job.fileStore.writeGlobalFile(os.path.join(work_dir, 'sample.recal.table'))
    return job.addChildJobFn(print_reads, cores, table, indel_bam, indel_bai, ref, ref_dict, fai, mem,
                             cores=cores, memory=mem, disk='25G').rv()
开发者ID:jsteward2930,项目名称:toil-scripts,代码行数:34,代码来源:exome_variant_pipeline.py

示例12: call_conductor

def call_conductor(master_ip, src, dst, memory=None, override_parameters=None):
    """
    Invokes the Conductor container to copy files between S3 and HDFS and vice versa.
    Find Conductor at https://github.com/BD2KGenomics/conductor.

    :param masterIP: The Spark leader IP address.
    :param src: URL of file to copy.
    :param src: URL of location to copy file to.
    :param memory: Gigabytes of memory to provision for Spark driver/worker.
    :param override_parameters: Parameters passed by the user, that override our defaults.

    :type masterIP: MasterAddress
    :type src: string
    :type dst: string
    :type memory: int or None
    :type override_parameters: list of string or None
    """

    arguments = ["--", "-C", src, dst]

    docker_call(rm=False,
                tool="quay.io/ucsc_cgl/conductor",
                docker_parameters=master_ip.docker_parameters(["--net=host"]),
                parameters=_make_parameters(master_ip,
                                            [], # no conductor specific spark configuration
                                            memory,
                                            arguments,
                                            override_parameters),
                mock=False)
开发者ID:ImRichardLiu,项目名称:toil-scripts,代码行数:29,代码来源:spark_tools.py

示例13: create_reference_dict_hc

def create_reference_dict_hc(job, shared_ids, input_args):
    """
    Uses Picardtools to create sequence dictionary for reference genome.
    Calls next step in pipeline - spawn batch jobs

    :param job: Job instance
    :param shared_ids: dictionary of shared file promises
    :param input_args: dictionary of input arguments
    """
    # Unpack convenience variables for job
    work_dir = job.fileStore.getLocalTempDir()
    # Retrieve file path
    # FIXME: unused variable
    ref_path = return_input_paths(job, work_dir, shared_ids, 'ref.fa')
    # Call: picardtools
    picard_output = os.path.join(work_dir, 'ref.dict')
    command = ['CreateSequenceDictionary', 'R=ref.fa', 'O=ref.dict']
    inputs=['ref.fa']
    outputs={picard_output: None}
    docker_call(work_dir = work_dir,
                env={'JAVA_OPTS':'-Xmx%sg' % input_args.memory},
                parameters = command,
                tool = 'quay.io/ucsc_cgl/picardtools',
                inputs=inputs,
                outputs=outputs)
    # Update fileStore for output
    shared_ids['ref.dict'] = job.fileStore.writeGlobalFile(picard_output)
    job.addChildJobFn(spawn_batch_variant_calling, shared_ids, input_args)
开发者ID:ImRichardLiu,项目名称:toil-scripts,代码行数:28,代码来源:germline.py

示例14: create_reference_index_hc

def create_reference_index_hc(job, shared_ids, input_args):
    """
    Uses samtools to create reference index file in working directory,
    spawns next job in pipeline - create reference dictionary

    :param job: Job instance
    :param shared_ids: dictionary of shared file promises
    :param input_args: dictionary of input arguments
    """
    # Unpack convenience variables for job
    work_dir = job.fileStore.getLocalTempDir()
    # Retrieve file path
    # FIXME: unused variable
    ref_path = return_input_paths(job, work_dir, shared_ids, 'ref.fa')
    faidx_output = os.path.join(work_dir, 'ref.fa.fai')
    # Call: Samtools
    faidx_command = ['faidx', 'ref.fa']
    inputs= ref_path
    outputs={'ref.fa.fai': None}
    docker_call(work_dir = work_dir,
                parameters = faidx_command,
                tool = 'quay.io/ucsc_cgl/samtools',
                inputs=inputs,
                outputs=outputs)
    # Update fileStore for output
    shared_ids['ref.fa.fai'] = job.fileStore.writeGlobalFile(faidx_output)
    job.addChildJobFn(create_reference_dict_hc, shared_ids, input_args)
开发者ID:ImRichardLiu,项目名称:toil-scripts,代码行数:27,代码来源:germline.py

示例15: run_star

def run_star(job, cores, r1_id, r2_id, star_index_url, wiggle=False):
    """
    Performs alignment of fastqs to bam via STAR

    :param JobFunctionWrappingJob job: passed automatically by Toil
    :param int cores: Number of cores to run star with
    :param str r1_id: FileStoreID of fastq (pair 1)
    :param str r2_id: FileStoreID of fastq (pair 2 if applicable, else pass None)
    :param str star_index_url: STAR index tarball
    :param bool wiggle: If True, will output a wiggle file and return it
    :return: FileStoreID from RSEM
    :rtype: str
    """
    work_dir = job.fileStore.getLocalTempDir()
    download_url(url=star_index_url, name='starIndex.tar.gz', work_dir=work_dir)
    subprocess.check_call(['tar', '-xvf', os.path.join(work_dir, 'starIndex.tar.gz'), '-C', work_dir])
    os.remove(os.path.join(work_dir, 'starIndex.tar.gz'))
    # Determine tarball structure - star index contains are either in a subdir or in the tarball itself
    star_index = os.path.join('/data', os.listdir(work_dir)[0]) if len(os.listdir(work_dir)) == 1 else '/data'
    # Parameter handling for paired / single-end data
    parameters = ['--runThreadN', str(cores),
                  '--genomeDir', star_index,
                  '--outFileNamePrefix', 'rna',
                  '--outSAMtype', 'BAM', 'SortedByCoordinate',
                  '--outSAMunmapped', 'Within',
                  '--quantMode', 'TranscriptomeSAM',
                  '--outSAMattributes', 'NH', 'HI', 'AS', 'NM', 'MD',
                  '--outFilterType', 'BySJout',
                  '--outFilterMultimapNmax', '20',
                  '--outFilterMismatchNmax', '999',
                  '--outFilterMismatchNoverReadLmax', '0.04',
                  '--alignIntronMin', '20',
                  '--alignIntronMax', '1000000',
                  '--alignMatesGapMax', '1000000',
                  '--alignSJoverhangMin', '8',
                  '--alignSJDBoverhangMin', '1',
                  '--sjdbScore', '1']
    if wiggle:
        parameters.extend(['--outWigType', 'bedGraph',
                           '--outWigStrand', 'Unstranded',
                           '--outWigReferencesPrefix', 'chr'])
    if r1_id and r2_id:
        job.fileStore.readGlobalFile(r1_id, os.path.join(work_dir, 'R1.fastq'))
        job.fileStore.readGlobalFile(r2_id, os.path.join(work_dir, 'R2.fastq'))
        parameters.extend(['--readFilesIn', '/data/R1.fastq', '/data/R2.fastq'])
    else:
        job.fileStore.readGlobalFile(r1_id, os.path.join(work_dir, 'R1_cutadapt.fastq'))
        parameters.extend(['--readFilesIn', '/data/R1.fastq'])
    # Call: STAR Mapping
    docker_call(tool='quay.io/ucsc_cgl/star:2.4.2a--bcbd5122b69ff6ac4ef61958e47bde94001cfe80',
                work_dir=work_dir, parameters=parameters)
    # Write to fileStore
    transcriptome_id = job.fileStore.writeGlobalFile(os.path.join(work_dir, 'rnaAligned.toTranscriptome.out.bam'))
    sorted_id = job.fileStore.writeGlobalFile(os.path.join(work_dir, 'rnaAligned.sortedByCoord.out.bam'))
    if wiggle:
        wiggle_id = job.fileStore.writeGlobalFile(os.path.join(work_dir, 'rnaSignal.UniqueMultiple.str1.out.bg'))
        return transcriptome_id, sorted_id, wiggle_id
    else:
        return transcriptome_id, sorted_id
开发者ID:ImRichardLiu,项目名称:toil-scripts,代码行数:59,代码来源:aligners.py


注:本文中的toil_scripts.lib.programs.docker_call函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。