Java FastaReader类代码示例

本文整理汇总了Java中org.biojava.nbio.core.sequence.io.FastaReader类的典型用法代码示例。如果您正苦于以下问题：Java FastaReader类的具体用法？Java FastaReader怎么用？Java FastaReader使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

FastaReader类属于org.biojava.nbio.core.sequence.io包，在下文中一共展示了FastaReader类的13个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getRawParentSequence

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private DNASequence getRawParentSequence(String accessId) throws IOException {
	String seqUrlTemplate = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=%s&rettype=fasta&retmode=text";
	URL url = new URL(String.format(seqUrlTemplate, accessId));

	logger.trace("Getting parent DNA sequence from URL: {}", url.toString());

	InputStream is = url.openConnection().getInputStream();

	FastaReader<DNASequence, NucleotideCompound> parentReader
			= new FastaReader<DNASequence, NucleotideCompound>(is,
					new PlainFastaHeaderParser<DNASequence, NucleotideCompound>(),
					new DNASequenceCreator(AmbiguityDNACompoundSet.getDNACompoundSet()));
	LinkedHashMap<String, DNASequence> seq = parentReader.process();

	DNASequence parentSeq = null;
	if (seq.size() == 1) {
		parentSeq = seq.values().iterator().next();
	}
	is.close();

	return parentSeq;
}

开发者ID:biojava，项目名称:biojava，代码行数:23，代码来源:ProteinSequence.java

示例2: testProcessAll

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private void testProcessAll(String path) throws Exception {
       ClasspathResource r = new ClasspathResource(path);
       FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
       try( InputStream inStream = r.getInputStream() ) {
           fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
                   inStream,
                   new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
                   new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
           LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
           assertThat(sequences,is(notNullValue()));
           assertThat(sequences.size(),is(1));
           assertThat(sequences.containsKey("P02768"),is(true));
           assertThat(sequences.get("P02768").getLength(),is(609));
       } finally {
           if(fastaReader != null) fastaReader.close();
       }
}

开发者ID:biojava，项目名称:biojava，代码行数:18，代码来源:TestFASTAReader.java

示例3: testProcess1

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private void testProcess1(String path) throws Exception {
    ClasspathResource r = new ClasspathResource(path);
    FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
    try( InputStream inStream = r.getInputStream() ) {
        fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
                inStream,
                new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
                new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
        LinkedHashMap<String,ProteinSequence> out1 = fastaReader.process(1);
        assertThat(out1,is(notNullValue()));
        assertThat(out1.size(),is(1));
        assertThat(out1.containsKey("P02768"),is(true));
        assertThat(out1.get("P02768").getLength(),is(609));
        LinkedHashMap<String,ProteinSequence> out2 = fastaReader.process(1);
        assertThat(out2,is(nullValue()));
    } finally {
        if(fastaReader != null) fastaReader.close();
    }
}

开发者ID:biojava，项目名称:biojava，代码行数:20，代码来源:TestFASTAReader.java

示例4: testProcess2

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private void testProcess2(String path) throws Exception {
    ClasspathResource r = new ClasspathResource(path);
    FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
    try( InputStream inStream = r.getInputStream() ) {
        fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
                inStream,
                new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
                new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
        LinkedHashMap<String,ProteinSequence> out1 = fastaReader.process(1);
        assertThat(out1,is(notNullValue()));
        assertThat(out1.size(),is(1));
        assertThat(out1.containsKey("P02768"),is(true));
        assertThat(out1.get("P02768").getLength(),is(609));
        LinkedHashMap<String,ProteinSequence> out2 = fastaReader.process(1);
        assertThat(out2,is(notNullValue()));
        assertThat(out2.size(),is(1));
        assertThat(out2.containsKey("P00698"),is(true));
        assertThat(out2.get("P00698").getLength(),is(147));
        LinkedHashMap<String,ProteinSequence> out3 = fastaReader.process(1);
        assertThat(out3,is(nullValue()));
    } finally {
        if(fastaReader != null) fastaReader.close();
    }
}

开发者ID:biojava，项目名称:biojava，代码行数:25，代码来源:TestFASTAReader.java

示例5: processing

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
public void processing(Callback callback) throws IOException {
   ObjectMapper mapper = new ObjectMapper();

   FileInputStream inStream = new FileInputStream( fastaFile );
   FastaReader<ProteinSequence,AminoAcidCompound> fastaReader =
           new FastaReader<>(
                   inStream,
                   new GenericFastaHeaderParser<>(),
                   new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
   LinkedHashMap<String, ProteinSequence> b = fastaReader.process();

   for (  Map.Entry<String, ProteinSequence> entry : b.entrySet() ) {
       String header = entry.getValue().getOriginalHeader();
       String sequence = entry.getValue().getSequenceAsString();
       String[] parts = header.split("\\|");
       ProteinObj obj = new ProteinObj();

       if (parts.length < 3)
           logger.error("faste parsing error " + header);
       else {
           obj.setAcxn(parts[1]);
           obj.setDefline(parts[2]);
           obj.setSequence(sequence);

           JsonNode node = mapper.valueToTree(obj);
           callback.processSingleJSONRecord(node);
       }

   }
}

开发者ID:NCBI-Hackathons，项目名称:seqr，代码行数:31，代码来源:FastaStreamParser.java

示例6: main

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
public static void main(String[] args) throws IOException {

        //Try reading with the FastaReader
        FileInputStream inStream = new FileInputStream(BASE_PATH);
        FastaReader<ProteinSequence,AminoAcidCompound> fastaReader =
                new FastaReader<>(inStream,
                        new GenericFastaHeaderParser<ProteinSequence,AminoAcidCompound>(),
                        new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
        LinkedHashMap<String, ProteinSequence> b = fastaReader.process();
        for (  Map.Entry<String, ProteinSequence> entry : b.entrySet() ) {
            System.out.println(entry.getValue().getOriginalHeader());
        }
    }

开发者ID:CameronTolooee，项目名称:Mendel，代码行数:14，代码来源:BioJavaTest.java

示例7: FastaStructureParser

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
public FastaStructureParser(InputStream is,
		SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser,
		SequenceCreatorInterface<AminoAcidCompound> sequenceCreator,
		AtomCache cache)
{
	this(new FastaReader<ProteinSequence, AminoAcidCompound>(
			is, headerParser, sequenceCreator),cache);
}

开发者ID:biojava，项目名称:biojava，代码行数:9，代码来源:FastaStructureParser.java

示例8: cpFastaToAfpChain

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
/**
 * Takes a structure and sequence corresponding to an alignment between a structure or sequence and itself (or even a structure with a sequence), where the result has a circular permutation site
 * {@link cpSite} residues to the right.
 *
 * @param fastaFile A FASTA file containing exactly 2 sequences, the first unpermuted and the second permuted
 * @param cpSite
 *            The number of residues from the beginning of the sequence at which the circular permutation site occurs; can be positive or negative; values greater than the length of the sequence
 *            are acceptable
 * @throws IOException
 * @throws StructureException
 */
public static AFPChain cpFastaToAfpChain(File fastaFile, Structure structure, int cpSite) throws IOException, StructureException {
	InputStream inStream = new FileInputStream(fastaFile);
	SequenceCreatorInterface<AminoAcidCompound> creator = new CasePreservingProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet());
	SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser = new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();
	FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(inStream, headerParser, creator);
	LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
	inStream.close();
	Iterator<ProteinSequence> iter = sequences.values().iterator();
	ProteinSequence first = iter.next();
	ProteinSequence second = iter.next();
	return cpFastaToAfpChain(first, second, structure, cpSite);
}

开发者ID:biojava，项目名称:biojava，代码行数:24，代码来源:FastaAFPChainConverter.java

示例9: fastaFileToAfpChain

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
/**
 * Reads the file {@code fastaFile}, expecting exactly two sequences which give a pairwise alignment. Uses this and two structures to create an AFPChain corresponding to the alignment. Uses a
 * {@link CasePreservingProteinSequenceCreator} and assumes that a residue is aligned if and only if it is given by an uppercase letter.
 *
 * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure)
 * @throws IOException
 * @throws StructureException
 */
public static AFPChain fastaFileToAfpChain(File fastaFile, Structure structure1, Structure structure2)
		throws IOException, StructureException {
	InputStream inStream = new FileInputStream(fastaFile);
	SequenceCreatorInterface<AminoAcidCompound> creator = new CasePreservingProteinSequenceCreator(
			AminoAcidCompoundSet.getAminoAcidCompoundSet());
	SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser = new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();
	FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
			inStream, headerParser, creator);
	LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
	inStream.close();
	return fastaToAfpChain(sequences, structure1, structure2);
}

开发者ID:biojava，项目名称:biojava，代码行数:21，代码来源:FastaAFPChainConverter.java

示例10: indexData

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private static void indexData(String dbFile, ExpectationCalc client,
                              PerformanceTimer indexTimer) throws IOException {


    NetworkInfo network = NetworkConfig.readNodesFile(new File("/s/chopin/k/grad/ctolooee/Research/Mendel/conf/nodes"));

    List<NodeInfo> list = network.getAllNodes();

    indexTimer.start();
    int count = 0;

    NetworkDestination dest;
    List<mendel.vptree.types.ProteinSequence> batch = new ArrayList<>();

    FileInputStream inStream = new FileInputStream(dbFile);
    FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
            new FastaReader<>(inStream,
                    new GenericFastaHeaderParser<>(),
                    new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
    LinkedHashMap<String, ProteinSequence> b = fastaReader.process();
    int window_size = 30;
    for (Map.Entry<String, ProteinSequence> entry : b.entrySet()) {
        String seq = entry.getValue().getSequenceAsString();

        int len = seq.length();
        for (int i = 0; i + window_size < len; ++i) {
            while (i + window_size < len && batch.size() < 500) {
                mendel.vptree.types.ProteinSequence sequence =
                        new mendel.vptree.types.ProteinSequence(
                                seq.substring(i, i + window_size));
                sequence.setSequenceID(entry.getValue().getOriginalHeader());
                sequence.setWholeSequence(seq);
                batch.add(sequence);
                ++i;
            }
            NodeInfo info = list.get(count++ % list.size());
            dest = new NetworkDestination(info.getHostname(), info.getPort());
            client.store(batch, dest);
            batch.clear();
        }
    }
    indexTimer.stopAndPrint();
}

开发者ID:CameronTolooee，项目名称:Mendel，代码行数:44，代码来源:ExpectationCalc.java

示例11: main

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

		// 0. This is just to load an example MSA from a FASTA file
		InputStream inStream = TreeConstructor.class
				.getResourceAsStream("/PF00104_small.fasta");

		FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
				new FastaReader<ProteinSequence, AminoAcidCompound>(
				inStream,
				new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
				new ProteinSequenceCreator(AminoAcidCompoundSet
						.getAminoAcidCompoundSet()));

		LinkedHashMap<String, ProteinSequence> proteinSequences =
				fastaReader.process();

		inStream.close();

		MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound> msa =
				new MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound>();

		for (ProteinSequence proteinSequence : proteinSequences.values()) {
			msa.addAlignedSequence(proteinSequence);
		}

		long readT = System.currentTimeMillis();

		// 1. Calculate the evolutionary distance matrix (can take long)
		SubstitutionMatrix<AminoAcidCompound> M = SubstitutionMatrixHelper
				.getBlosum62();
		DistanceMatrix DM = DistanceMatrixCalculator
				.dissimilarityScore(msa, M);

		// 2. Construct a distance tree using the NJ algorithm
		Phylogeny phylo = TreeConstructor.distanceTree(
				(BasicSymmetricalDistanceMatrix) DM, TreeConstructorType.NJ);

		long treeT = System.currentTimeMillis();
		String newick = ForesterWrapper.getNewickString(phylo, true);
		System.out.println(newick);
		System.out.println("Tree Construction: " + (treeT - readT) + " ms.");

		// 3. Evaluate the goodness of fit of the tree
		double cv = DistanceTreeEvaluator.evaluate(phylo, DM);
		System.out.println("CV of the tree: " + (int) (cv * 100) + " %");

	}

开发者ID:biojava，项目名称:biojava，代码行数:48，代码来源:DemoDistanceTree.java

示例12: testMSAconversion

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
@Test
public void testMSAconversion() throws Exception {

	// Load the msa FASTA file into a BioJava MSA object
	InputStream inStream = TestForesterWrapper.class
			.getResourceAsStream("/1u6d_symm.fasta");

	FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
			new FastaReader<ProteinSequence, AminoAcidCompound>(
			inStream,
			new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
			new ProteinSequenceCreator(AminoAcidCompoundSet
					.getAminoAcidCompoundSet()));

	LinkedHashMap<String, ProteinSequence> proteinSequences = fastaReader
			.process();

	inStream.close();

	MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound> msa =
			new MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound>();

	String expected = "";
	for (ProteinSequence proteinSequence : proteinSequences.values()) {
		msa.addAlignedSequence(proteinSequence);
		expected += ">" + proteinSequence.getOriginalHeader() + "\n"
				+ proteinSequence.toString() + "\n";
	}

	// Convert the biojava MSA to a FASTA String
	OutputStream os = new ByteArrayOutputStream();
	FastaWriter<ProteinSequence, AminoAcidCompound> fastaW =
			new FastaWriter<ProteinSequence, AminoAcidCompound>(os,
			msa.getAlignedSequences(),
			new FastaHeaderFormatInterface<ProteinSequence, AminoAcidCompound>() {
				@Override
				public String getHeader(ProteinSequence sequence) {
					return sequence.getAccession().toString();
				};
			});
	fastaW.process();
	String biojava = os.toString();

	// Convert the biojava MSA to a forester Msa
	Msa fMsa = ForesterWrapper.convert(msa);

	StringBuilder sb = new StringBuilder();
	for (int i = 0; i < fMsa.getNumberOfSequences(); i++) {
		sb.append(">" + fMsa.getIdentifier(i) + "\n");
		sb.append(fMsa.getSequenceAsString(i) + "\n");
	}
	String forester = sb.toString();

	// Assert that all FASTA files are equal
	assertEquals(expected, biojava);
	assertEquals(expected, forester);

}

开发者ID:biojava，项目名称:biojava，代码行数:59，代码来源:TestForesterWrapper.java

示例13: main

import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
/** e.g. download ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz
* and pass in path to local location of file
*
* @param args
*/
public static void main(String[] args) {

	int mb = 1024*1024;

	//Getting the runtime reference from system
	Runtime runtime = Runtime.getRuntime();

	System.out.println("##### Heap utilization statistics [MB] #####");

	//Print used memory
	System.out.println("Used Memory:"
			+ (runtime.totalMemory() - runtime.freeMemory()) / mb);

	//Print free memory
	System.out.println("Free Memory:"
			+ runtime.freeMemory() / mb);

	//Print total available memory
	System.out.println("Total Memory:" + runtime.totalMemory() / mb);

	//Print Maximum available memory
	System.out.println("Max Memory:" + runtime.maxMemory() / mb);


	if ( args.length < 1) {
		System.err.println("First argument needs to be path to fasta file");
		return;
	}

	File f = new File(args[0]);

	if ( ! f.exists()) {
		System.err.println("File does not exist " + args[0]);
		return;
	}

	long timeS = System.currentTimeMillis();

	try {

		// automatically uncompress files using InputStreamProvider
		InputStreamProvider isp = new InputStreamProvider();

		InputStream inStream = isp.getInputStream(f);


		FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
				inStream,
				new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
				new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));

		LinkedHashMap<String, ProteinSequence> b;

		int nrSeq = 0;

		while ((b = fastaReader.process(100)) != null) {
			for (String key : b.keySet()) {
				nrSeq++;
				//System.out.println(nrSeq + " : " + key + " " + b.get(key));
				if ( nrSeq % 100000 == 0)
					System.out.println(nrSeq );
			}

		}
		long timeE = System.currentTimeMillis();
		System.out.println("parsed a total of " + nrSeq + " TREMBL sequences! in " + (timeE - timeS));
	} catch (Exception ex) {
		Logger.getLogger(ParseFastaFileDemo.class.getName()).log(Level.SEVERE, null, ex);
	}
}

开发者ID:biojava，项目名称:biojava，代码行数:76，代码来源:ParseFastaFileDemo.java

注：本文中的org.biojava.nbio.core.sequence.io.FastaReader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。