本文整理汇总了Java中htsjdk.variant.vcf.VCFFileReader类的典型用法代码示例。如果您正苦于以下问题:Java VCFFileReader类的具体用法?Java VCFFileReader怎么用?Java VCFFileReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
VCFFileReader类属于htsjdk.variant.vcf包,在下文中一共展示了VCFFileReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: processVCF
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
private static Collection<EligibilityReport> processVCF(final String patient, final boolean isGermline, final File vcf,
final BachelorEligibility eligibility) {
final EligibilityReport.ReportType type =
isGermline ? EligibilityReport.ReportType.GERMLINE_MUTATION : EligibilityReport.ReportType.SOMATIC_MUTATION;
LOGGER.info("process {} vcf: {}", type, vcf.getPath());
try (final VCFFileReader reader = new VCFFileReader(vcf, true)) {
// TODO: always correct? germline has R,T somatic has just T
final String sample = reader.getFileHeader().getGenotypeSamples().get(0);
return eligibility.processVCF(patient, sample, type, reader);
} catch (final TribbleException e) {
LOGGER.error("error with VCF file {}: {}", vcf.getPath(), e.getMessage());
return Collections.emptyList();
}
}
示例2: processVCF
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
@NotNull
Collection<EligibilityReport> processVCF(final String patient, final String sample, final EligibilityReport.ReportType type,
final VCFFileReader reader) {
final List<EligibilityReport> results = Lists.newArrayList();
for (final HmfGenomeRegion region : querySet.values()) {
final CloseableIterator<VariantContext> query =
reader.query(region.chromosome(), (int) region.geneStart(), (int) region.geneEnd());
while (query.hasNext()) {
final VariantContext variant = query.next();
results.addAll(processVariant(variant, patient, sample, type));
}
query.close();
}
return results;
}
示例3: processVariants
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
private static void processVariants(@NotNull final String filePath, @NotNull final Slicer highConfidenceSlicer,
@NotNull final String outputVcf, @NotNull final String sampleName, @NotNull final String tumorBam) throws HartwigException {
final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), sampleName);
final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVcf)
.setReferenceDictionary(outputHeader.getSequenceDictionary())
.build();
writer.writeHeader(outputHeader);
final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
final VariantContextFilter filter = new StrelkaPostProcess(highConfidenceSlicer);
for (final VariantContext variantContext : vcfReader) {
if (filter.test(variantContext)) {
final VariantContext simplifiedVariant = StrelkaPostProcess.simplifyVariant(variantContext, sampleName);
final PotentialMNVRegion potentialMNV = outputPair.getLeft();
outputPair = MNVDetector.fitsMNVRegion(potentialMNV, simplifiedVariant);
outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion).forEach(writer::add));
}
}
validator.mergeVariants(outputPair.getLeft()).forEach(writer::add);
writer.close();
vcfReader.close();
LOGGER.info("Written output variants to " + outputVcf);
}
示例4: processVariants
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String tumorBam) {
final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
.setReferenceDictionary(vcfReader.getFileHeader().getSequenceDictionary())
.build();
vcfWriter.writeHeader(vcfReader.getFileHeader());
final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
for (final VariantContext variant : vcfReader) {
final PotentialMNVRegion potentialMNV = outputPair.getLeft();
outputPair = MNVDetector.fitsMNVRegion(potentialMNV, variant);
outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion).forEach(vcfWriter::add));
}
validator.mergeVariants(outputPair.getLeft()).forEach(vcfWriter::add);
vcfWriter.close();
vcfReader.close();
LOGGER.info("Written output variants to " + outputVcf);
}
示例5: testGetCalls
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
@Test
/**
* Note: HTSJDK cannot distinguish between PASS filters and
* unfiltered for variant calls (it can for general filters).
* As a result, all variant calls which PASS do not have filter
* information applied.
*/
public void testGetCalls() throws Exception {
File vcfFile =
new File(VcfToVariant.class.getClassLoader().getResource(VALID_VCF_4_1).getFile());
VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
VCFHeader vcfHeader = vcfReader.getFileHeader();
int currVariant = 0;
for (final VariantContext vc : vcfReader) {
List<VariantCall> callList = VcfToVariant.getCalls(vc, vcfHeader);
assertEquals(callList, TRUTH.get(currVariant).getCallsList());
currVariant++;
}
}
示例6: genotypeAsNumericInJS
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
@Test
public void genotypeAsNumericInJS() throws ClassNotFoundException, IOException, InvalidGenomicCoordsException, InvalidRecordException, SQLException, InvalidColourException, IOException{
VCFFileReader reader = new VCFFileReader(new File("test_data/info_formats.vcf.gz"));
VCFHeader vcfHeader = reader.getFileHeader();
reader.close();
GenomicCoords gc= new GenomicCoords("1:17822074-17822184", 80, null, null);
TrackIntervalFeature vcf= new TrackIntervalFeature("test_data/info_formats.vcf.gz", gc);
List<IntervalFeature> linf = vcf.getIntervalFeatureList();
GenotypeMatrix gm= new GenotypeMatrix();
gm.setJsScriptFilter("{GT} == '1/0'");
String x= gm.printToScreen(true, linf, 80, vcfHeader);
assertTrue(x.contains("sample1"));
assertTrue( ! x.contains("sample2") );
gm.setJsScriptFilter("{GT} == './.'");
x= gm.printToScreen(true, linf, 80, vcfHeader);
assertTrue( x.contains("sample1"));
assertTrue( ! x.contains("sample2") );
}
示例7: invalidJSscripts
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
@Test
public void invalidJSscripts() throws ClassNotFoundException, IOException, InvalidGenomicCoordsException, InvalidRecordException, SQLException, InvalidColourException, IOException{
VCFFileReader reader = new VCFFileReader(new File("test_data/info_formats.vcf.gz"));
VCFHeader vcfHeader = reader.getFileHeader();
reader.close();
GenomicCoords gc= new GenomicCoords("1:17822074-17822184", 80, null, null);
TrackIntervalFeature vcf= new TrackIntervalFeature("test_data/info_formats.vcf.gz", gc);
List<IntervalFeature> linf = vcf.getIntervalFeatureList();
GenotypeMatrix gm= new GenotypeMatrix();
// JS script must return boolean
gm.setJsScriptFilter("{DP} > 5 && 10 + 3");
gm.printToScreen(true, linf, 80, vcfHeader);
assertNull(gm.getJsScriptFilter()); // After faulty script reset to null.
// Invalid JS syntax. E.g. when {TAG} does not exist.
gm.setJsScriptFilter("{FOOBAR} > 10");
gm.printToScreen(true, linf, 80, vcfHeader);
assertNull(gm.getJsScriptFilter());
// After exception the invalid filter has been removed
gm.printToScreen(true, linf, 80, vcfHeader);
}
示例8: canFilterGenotypeWithJS
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
@Test
public void canFilterGenotypeWithJS() throws IOException, ClassNotFoundException, IOException, InvalidGenomicCoordsException, InvalidRecordException, SQLException, InvalidColourException{
VCFFileReader reader = new VCFFileReader(new File("test_data/info_formats.vcf.gz"));
VCFHeader vcfHeader = reader.getFileHeader();
reader.close();
GenomicCoords gc= new GenomicCoords("1:17822074-17822184", 80, null, null);
TrackIntervalFeature vcf= new TrackIntervalFeature("test_data/info_formats.vcf.gz", gc);
List<IntervalFeature> linf = vcf.getIntervalFeatureList();
GenotypeMatrix gm= new GenotypeMatrix();
// Missing alleles
gm.setJsScriptFilter("{GT} == './.'");
String x= gm.printToScreen(true, linf, 80, vcfHeader);
assertTrue(x.contains("sample1"));
assertTrue( ! x.contains("sample2"));
}
示例9: overalappingSymbols
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
@Test
public void overalappingSymbols() throws IOException, InvalidGenomicCoordsException, IOException, ClassNotFoundException, InvalidRecordException, SQLException, InvalidColourException {
VCFFileReader reader = new VCFFileReader(new File("test_data/ALL.wgs.mergedSV.v8.20130502.svs.genotypes.vcf.gz"));
VCFHeader vcfHeader = reader.getFileHeader();
reader.close();
// Same genotype: no ambiguity.
GenomicCoords gc= new GenomicCoords("1:199882-200100", 80, null, null);
TrackIntervalFeature vcf= new TrackIntervalFeature("test_data/ALL.wgs.mergedSV.v8.20130502.svs.genotypes.vcf.gz", gc);
List<IntervalFeature> linf = vcf.getIntervalFeatureList();
GenotypeMatrix gm= new GenotypeMatrix();
String x= gm.printToScreen(false, linf, 80, vcfHeader);
assertTrue(x.contains("O")); // Two genotype at the same position: Both the same
assertTrue(x.contains("*")); // Different
}
示例10: canFormatVCFLineStructVar
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
@Test
public void canFormatVCFLineStructVar() throws InvalidGenomicCoordsException, InvalidColourException, IOException, InvalidConfigException{
List<Double> rulerMap= new ArrayList<Double>();
for(int i= 1; i < 100; i++){
rulerMap.add((double)i);
}
// Prepare header
VCFFileReader reader = new VCFFileReader(new File("test_data/ALL.wgs.mergedSV.v8.20130502.svs.genotypes.vcf.gz"));
VCFHeader vcfHeader= reader.getFileHeader();
reader.close();
VCFCodec vcfCodec= new VCFCodec();
vcfCodec.setVCFHeader(vcfHeader, Utils.getVCFHeaderVersion(vcfHeader));
String vcfLine= "1 668630 DUP_delly_DUP20532 G <CN2> . PASS AC=64;AF=0.0127795;AFR_AF=0.0015;AMR_AF=0;AN=5008;CIEND=-150,150;CIPOS=-150,150;CS=DUP_delly;EAS_AF=0.0595;END=850204;EUR_AF=0.001;IMPRECISE;NS=2504;SAS_AF=0.001;SITEPOST=1;SVTYPE=DUP GT 0|0 0|0 0|0".replaceAll(" ", "\t");
IntervalFeature ift= new IntervalFeature(vcfLine, TrackFormat.VCF, vcfCodec);
ift.mapToScreen(rulerMap);
assertEquals(850204, ift.getTo());
assertEquals("|", ift.getIdeogram(true, true).get(0).format(true));
}
示例11: composeExpectedSegments
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
final VCFFileReader reader = new VCFFileReader(vcf, false);
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
reader.iterator().forEachRemaining(vc -> {
final int targetCount = targets.indexRange(vc).size();
for (final Genotype genotype : vc.getGenotypes()) {
final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(","))
.mapToDouble(Double::parseDouble).toArray();
final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
final CopyNumberTriState call = expectedCall(cn);
final double exactLog10Prob = expectedExactLog10(call, cnp);
final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(
new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()),
0.000, call, -10.0 * exactLog10Prob, Double.NaN, Double.NaN, Double.NaN,
-10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum)
);
result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
}
});
return result;
}
开发者ID:broadinstitute,项目名称:gatk-protected,代码行数:24,代码来源:ConvertGSVariantsToSegmentsIntegrationTest.java
示例12: sortInputs
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
/**
* Merge the inputs and sort them by adding each input's content to a single SortingCollection.
* <p/>
* NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
* Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
* MergeVcfs exists for simple merging of presorted inputs.
*
* @param readers - a list of VCFFileReaders, one for each input VCF
* @param outputHeader - The merged header whose information we intend to use in the final output file
*/
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records");
// NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
// We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
final SortingCollection<VariantContext> sorter =
SortingCollection.newInstance(
VariantContext.class,
new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT),
outputHeader.getVCFRecordComparator(),
MAX_RECORDS_IN_RAM,
TMP_DIR);
int readerCount = 1;
for (final VCFFileReader reader : readers) {
log.info("Reading entries from input file " + readerCount);
for (final VariantContext variantContext : reader) {
sorter.add(variantContext);
readProgress.record(variantContext.getContig(), variantContext.getStart());
}
reader.close();
readerCount++;
}
return sorter;
}
示例13: ensureSameVariantsReadAsSimpleVcfFileIterator
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
@Test
public void ensureSameVariantsReadAsSimpleVcfFileIterator() {
final VariantIteratorProducer.Threadsafe iteratorFactory =
new VariantIteratorProducer.Threadsafe(
VcfFileSegmentGenerator.byWholeContigSubdividingWithWidth(TEN_MILLION),
Arrays.asList(VCF_WITH_MULTI_ALLELIC_VARIANT_AT_POSITION_10MILLION)
);
final Set<String> observedVcs = new HashSet<String>();
final Set<String> actual = new HashSet<String>();
final VCFFileReader actualVcs = new VCFFileReader(VCF_WITH_MULTI_ALLELIC_VARIANT_AT_POSITION_10MILLION);
for (final VariantContext actualVc : actualVcs) {
actual.add(actualVc.toString());
}
for (final CloseableIterator<VariantContext> i : iteratorFactory.iterators()) {
while (i.hasNext()) {
observedVcs.add(i.next().toString());
}
}
Assert.assertEquals(actual, observedVcs);
}
示例14: validateSortingResults
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
/**
* Checks the ordering and total number of variant context entries in the specified output VCF file.
* Does NOT check explicitly that the VC genomic positions match exactly those from the inputs. We assume this behavior from other tests.
*
* @param output VCF file representing the output of SortVCF
* @param expectedVariantContextCount the total number of variant context entries from all input files that were merged/sorted
*/
private void validateSortingResults(final File output, final int expectedVariantContextCount) {
final VCFFileReader outputReader = new VCFFileReader(output, false);
final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
VariantContext last = null;
int variantContextCount = 0;
final CloseableIterator<VariantContext> iterator = outputReader.iterator();
while (iterator.hasNext()) {
final VariantContext outputContext = iterator.next();
if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
last = outputContext;
variantContextCount++;
}
iterator.close();
Assert.assertEquals(variantContextCount, expectedVariantContextCount);
}
示例15: validateSnpAndIndelResults
import htsjdk.variant.vcf.VCFFileReader; //导入依赖的package包/类
/**
* Make sure that the order of the output file is identical to the order
* of the input files by iterating through the output, making sure that,
* if the context is an indel (snp), the next genomic position in the indel
* (snp) queue is the same. Also make sure that the context is in the order
* specified by the input files.
*/
private void validateSnpAndIndelResults(final File output, final Queue<String> indelContigPositions, final Queue<String> snpContigPositions) {
final VCFFileReader outputReader = new VCFFileReader(output, false);
final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator();
VariantContext last = null;
final CloseableIterator<VariantContext> iterator = outputReader.iterator();
while (iterator.hasNext()) {
final VariantContext outputContext = iterator.next();
if (outputContext.isIndel()) Assert.assertEquals(getContigPosition(outputContext), indelContigPositions.poll());
if (outputContext.isSNP()) Assert.assertEquals(getContigPosition(outputContext), snpContigPositions.poll());
if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0);
last = outputContext;
}
iterator.close();
// We should have polled everything off the indel (snp) queues
Assert.assertEquals(indelContigPositions.size(), 0);
Assert.assertEquals(snpContigPositions.size(), 0);
}