本文整理汇总了Java中org.biojava.nbio.core.sequence.io.FastaReader类的典型用法代码示例。如果您正苦于以下问题:Java FastaReader类的具体用法?Java FastaReader怎么用?Java FastaReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FastaReader类属于org.biojava.nbio.core.sequence.io包,在下文中一共展示了FastaReader类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getRawParentSequence
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private DNASequence getRawParentSequence(String accessId) throws IOException {
String seqUrlTemplate = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=%s&rettype=fasta&retmode=text";
URL url = new URL(String.format(seqUrlTemplate, accessId));
logger.trace("Getting parent DNA sequence from URL: {}", url.toString());
InputStream is = url.openConnection().getInputStream();
FastaReader<DNASequence, NucleotideCompound> parentReader
= new FastaReader<DNASequence, NucleotideCompound>(is,
new PlainFastaHeaderParser<DNASequence, NucleotideCompound>(),
new DNASequenceCreator(AmbiguityDNACompoundSet.getDNACompoundSet()));
LinkedHashMap<String, DNASequence> seq = parentReader.process();
DNASequence parentSeq = null;
if (seq.size() == 1) {
parentSeq = seq.values().iterator().next();
}
is.close();
return parentSeq;
}
示例2: testProcessAll
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private void testProcessAll(String path) throws Exception {
ClasspathResource r = new ClasspathResource(path);
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
try( InputStream inStream = r.getInputStream() ) {
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream,
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
assertThat(sequences,is(notNullValue()));
assertThat(sequences.size(),is(1));
assertThat(sequences.containsKey("P02768"),is(true));
assertThat(sequences.get("P02768").getLength(),is(609));
} finally {
if(fastaReader != null) fastaReader.close();
}
}
示例3: testProcess1
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private void testProcess1(String path) throws Exception {
ClasspathResource r = new ClasspathResource(path);
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
try( InputStream inStream = r.getInputStream() ) {
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream,
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
LinkedHashMap<String,ProteinSequence> out1 = fastaReader.process(1);
assertThat(out1,is(notNullValue()));
assertThat(out1.size(),is(1));
assertThat(out1.containsKey("P02768"),is(true));
assertThat(out1.get("P02768").getLength(),is(609));
LinkedHashMap<String,ProteinSequence> out2 = fastaReader.process(1);
assertThat(out2,is(nullValue()));
} finally {
if(fastaReader != null) fastaReader.close();
}
}
示例4: testProcess2
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private void testProcess2(String path) throws Exception {
ClasspathResource r = new ClasspathResource(path);
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
try( InputStream inStream = r.getInputStream() ) {
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream,
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
LinkedHashMap<String,ProteinSequence> out1 = fastaReader.process(1);
assertThat(out1,is(notNullValue()));
assertThat(out1.size(),is(1));
assertThat(out1.containsKey("P02768"),is(true));
assertThat(out1.get("P02768").getLength(),is(609));
LinkedHashMap<String,ProteinSequence> out2 = fastaReader.process(1);
assertThat(out2,is(notNullValue()));
assertThat(out2.size(),is(1));
assertThat(out2.containsKey("P00698"),is(true));
assertThat(out2.get("P00698").getLength(),is(147));
LinkedHashMap<String,ProteinSequence> out3 = fastaReader.process(1);
assertThat(out3,is(nullValue()));
} finally {
if(fastaReader != null) fastaReader.close();
}
}
示例5: processing
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
public void processing(Callback callback) throws IOException {
ObjectMapper mapper = new ObjectMapper();
FileInputStream inStream = new FileInputStream( fastaFile );
FastaReader<ProteinSequence,AminoAcidCompound> fastaReader =
new FastaReader<>(
inStream,
new GenericFastaHeaderParser<>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
LinkedHashMap<String, ProteinSequence> b = fastaReader.process();
for ( Map.Entry<String, ProteinSequence> entry : b.entrySet() ) {
String header = entry.getValue().getOriginalHeader();
String sequence = entry.getValue().getSequenceAsString();
String[] parts = header.split("\\|");
ProteinObj obj = new ProteinObj();
if (parts.length < 3)
logger.error("faste parsing error " + header);
else {
obj.setAcxn(parts[1]);
obj.setDefline(parts[2]);
obj.setSequence(sequence);
JsonNode node = mapper.valueToTree(obj);
callback.processSingleJSONRecord(node);
}
}
}
示例6: main
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
//Try reading with the FastaReader
FileInputStream inStream = new FileInputStream(BASE_PATH);
FastaReader<ProteinSequence,AminoAcidCompound> fastaReader =
new FastaReader<>(inStream,
new GenericFastaHeaderParser<ProteinSequence,AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
LinkedHashMap<String, ProteinSequence> b = fastaReader.process();
for ( Map.Entry<String, ProteinSequence> entry : b.entrySet() ) {
System.out.println(entry.getValue().getOriginalHeader());
}
}
示例7: FastaStructureParser
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
public FastaStructureParser(InputStream is,
SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser,
SequenceCreatorInterface<AminoAcidCompound> sequenceCreator,
AtomCache cache)
{
this(new FastaReader<ProteinSequence, AminoAcidCompound>(
is, headerParser, sequenceCreator),cache);
}
示例8: cpFastaToAfpChain
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
/**
* Takes a structure and sequence corresponding to an alignment between a structure or sequence and itself (or even a structure with a sequence), where the result has a circular permutation site
* {@link cpSite} residues to the right.
*
* @param fastaFile A FASTA file containing exactly 2 sequences, the first unpermuted and the second permuted
* @param cpSite
* The number of residues from the beginning of the sequence at which the circular permutation site occurs; can be positive or negative; values greater than the length of the sequence
* are acceptable
* @throws IOException
* @throws StructureException
*/
public static AFPChain cpFastaToAfpChain(File fastaFile, Structure structure, int cpSite) throws IOException, StructureException {
InputStream inStream = new FileInputStream(fastaFile);
SequenceCreatorInterface<AminoAcidCompound> creator = new CasePreservingProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet());
SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser = new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(inStream, headerParser, creator);
LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
inStream.close();
Iterator<ProteinSequence> iter = sequences.values().iterator();
ProteinSequence first = iter.next();
ProteinSequence second = iter.next();
return cpFastaToAfpChain(first, second, structure, cpSite);
}
示例9: fastaFileToAfpChain
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
/**
* Reads the file {@code fastaFile}, expecting exactly two sequences which give a pairwise alignment. Uses this and two structures to create an AFPChain corresponding to the alignment. Uses a
* {@link CasePreservingProteinSequenceCreator} and assumes that a residue is aligned if and only if it is given by an uppercase letter.
*
* @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure)
* @throws IOException
* @throws StructureException
*/
public static AFPChain fastaFileToAfpChain(File fastaFile, Structure structure1, Structure structure2)
throws IOException, StructureException {
InputStream inStream = new FileInputStream(fastaFile);
SequenceCreatorInterface<AminoAcidCompound> creator = new CasePreservingProteinSequenceCreator(
AminoAcidCompoundSet.getAminoAcidCompoundSet());
SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser = new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream, headerParser, creator);
LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
inStream.close();
return fastaToAfpChain(sequences, structure1, structure2);
}
示例10: indexData
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
private static void indexData(String dbFile, ExpectationCalc client,
PerformanceTimer indexTimer) throws IOException {
NetworkInfo network = NetworkConfig.readNodesFile(new File("/s/chopin/k/grad/ctolooee/Research/Mendel/conf/nodes"));
List<NodeInfo> list = network.getAllNodes();
indexTimer.start();
int count = 0;
NetworkDestination dest;
List<mendel.vptree.types.ProteinSequence> batch = new ArrayList<>();
FileInputStream inStream = new FileInputStream(dbFile);
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
new FastaReader<>(inStream,
new GenericFastaHeaderParser<>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
LinkedHashMap<String, ProteinSequence> b = fastaReader.process();
int window_size = 30;
for (Map.Entry<String, ProteinSequence> entry : b.entrySet()) {
String seq = entry.getValue().getSequenceAsString();
int len = seq.length();
for (int i = 0; i + window_size < len; ++i) {
while (i + window_size < len && batch.size() < 500) {
mendel.vptree.types.ProteinSequence sequence =
new mendel.vptree.types.ProteinSequence(
seq.substring(i, i + window_size));
sequence.setSequenceID(entry.getValue().getOriginalHeader());
sequence.setWholeSequence(seq);
batch.add(sequence);
++i;
}
NodeInfo info = list.get(count++ % list.size());
dest = new NetworkDestination(info.getHostname(), info.getPort());
client.store(batch, dest);
batch.clear();
}
}
indexTimer.stopAndPrint();
}
示例11: main
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
// 0. This is just to load an example MSA from a FASTA file
InputStream inStream = TreeConstructor.class
.getResourceAsStream("/PF00104_small.fasta");
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream,
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet
.getAminoAcidCompoundSet()));
LinkedHashMap<String, ProteinSequence> proteinSequences =
fastaReader.process();
inStream.close();
MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound> msa =
new MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound>();
for (ProteinSequence proteinSequence : proteinSequences.values()) {
msa.addAlignedSequence(proteinSequence);
}
long readT = System.currentTimeMillis();
// 1. Calculate the evolutionary distance matrix (can take long)
SubstitutionMatrix<AminoAcidCompound> M = SubstitutionMatrixHelper
.getBlosum62();
DistanceMatrix DM = DistanceMatrixCalculator
.dissimilarityScore(msa, M);
// 2. Construct a distance tree using the NJ algorithm
Phylogeny phylo = TreeConstructor.distanceTree(
(BasicSymmetricalDistanceMatrix) DM, TreeConstructorType.NJ);
long treeT = System.currentTimeMillis();
String newick = ForesterWrapper.getNewickString(phylo, true);
System.out.println(newick);
System.out.println("Tree Construction: " + (treeT - readT) + " ms.");
// 3. Evaluate the goodness of fit of the tree
double cv = DistanceTreeEvaluator.evaluate(phylo, DM);
System.out.println("CV of the tree: " + (int) (cv * 100) + " %");
}
示例12: testMSAconversion
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
@Test
public void testMSAconversion() throws Exception {
// Load the msa FASTA file into a BioJava MSA object
InputStream inStream = TestForesterWrapper.class
.getResourceAsStream("/1u6d_symm.fasta");
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader =
new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream,
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet
.getAminoAcidCompoundSet()));
LinkedHashMap<String, ProteinSequence> proteinSequences = fastaReader
.process();
inStream.close();
MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound> msa =
new MultipleSequenceAlignment<ProteinSequence, AminoAcidCompound>();
String expected = "";
for (ProteinSequence proteinSequence : proteinSequences.values()) {
msa.addAlignedSequence(proteinSequence);
expected += ">" + proteinSequence.getOriginalHeader() + "\n"
+ proteinSequence.toString() + "\n";
}
// Convert the biojava MSA to a FASTA String
OutputStream os = new ByteArrayOutputStream();
FastaWriter<ProteinSequence, AminoAcidCompound> fastaW =
new FastaWriter<ProteinSequence, AminoAcidCompound>(os,
msa.getAlignedSequences(),
new FastaHeaderFormatInterface<ProteinSequence, AminoAcidCompound>() {
@Override
public String getHeader(ProteinSequence sequence) {
return sequence.getAccession().toString();
};
});
fastaW.process();
String biojava = os.toString();
// Convert the biojava MSA to a forester Msa
Msa fMsa = ForesterWrapper.convert(msa);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < fMsa.getNumberOfSequences(); i++) {
sb.append(">" + fMsa.getIdentifier(i) + "\n");
sb.append(fMsa.getSequenceAsString(i) + "\n");
}
String forester = sb.toString();
// Assert that all FASTA files are equal
assertEquals(expected, biojava);
assertEquals(expected, forester);
}
示例13: main
import org.biojava.nbio.core.sequence.io.FastaReader; //导入依赖的package包/类
/** e.g. download ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz
* and pass in path to local location of file
*
* @param args
*/
public static void main(String[] args) {
int mb = 1024*1024;
//Getting the runtime reference from system
Runtime runtime = Runtime.getRuntime();
System.out.println("##### Heap utilization statistics [MB] #####");
//Print used memory
System.out.println("Used Memory:"
+ (runtime.totalMemory() - runtime.freeMemory()) / mb);
//Print free memory
System.out.println("Free Memory:"
+ runtime.freeMemory() / mb);
//Print total available memory
System.out.println("Total Memory:" + runtime.totalMemory() / mb);
//Print Maximum available memory
System.out.println("Max Memory:" + runtime.maxMemory() / mb);
if ( args.length < 1) {
System.err.println("First argument needs to be path to fasta file");
return;
}
File f = new File(args[0]);
if ( ! f.exists()) {
System.err.println("File does not exist " + args[0]);
return;
}
long timeS = System.currentTimeMillis();
try {
// automatically uncompress files using InputStreamProvider
InputStreamProvider isp = new InputStreamProvider();
InputStream inStream = isp.getInputStream(f);
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
inStream,
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
LinkedHashMap<String, ProteinSequence> b;
int nrSeq = 0;
while ((b = fastaReader.process(100)) != null) {
for (String key : b.keySet()) {
nrSeq++;
//System.out.println(nrSeq + " : " + key + " " + b.get(key));
if ( nrSeq % 100000 == 0)
System.out.println(nrSeq );
}
}
long timeE = System.currentTimeMillis();
System.out.println("parsed a total of " + nrSeq + " TREMBL sequences! in " + (timeE - timeS));
} catch (Exception ex) {
Logger.getLogger(ParseFastaFileDemo.class.getName()).log(Level.SEVERE, null, ex);
}
}