本文整理汇总了Java中org.biojava.nbio.core.sequence.ProteinSequence类的典型用法代码示例。如果您正苦于以下问题:Java ProteinSequence类的具体用法?Java ProteinSequence怎么用?Java ProteinSequence使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ProteinSequence类属于org.biojava.nbio.core.sequence包,在下文中一共展示了ProteinSequence类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: makeMultifasta
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
/**
* Makes a multifasta file from a list of fasta files
*
* @param fileNames
* : list of fasta files names
* @param outputName
* : name of the resulting multifasta file
* @return outputName : name of the resulting multifasta file
**/
public static String makeMultifasta(ArrayList<String> fileNames, String outputName) {
ArrayList<ProteinSequence> seqList = new ArrayList<>();
for (int i = 0; i < fileNames.size(); i++) {
String name = fileNames.get(i);
try {
readProtFasta(name, seqList);
} catch (Exception e) {
System.out.println(e);
System.out.println(name);
}
}
writeProtFasta(outputName, seqList);
System.out.println("Le fichier multifasta a été créé : " + outputName);
return outputName;
}
示例2: readProtFasta
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
/**
* Reads a fasta file containing proteic sequences
*
* @param filename
* : name of the fasta file to read from
* @param seqList
* : list of sequences found in the file
**/
public void readProtFasta(String filename) {
LinkedHashMap<String, ProteinSequence> helper;
try {
File file = new File(filename);
helper = FastaReaderHelper.readFastaProteinSequence(file);
for (Entry<String, ProteinSequence> entry : helper.entrySet()) {
enzymeNcbiId = entry.getValue().getAccession().toString();
String[] fields = enzymeNcbiId.split(" ");
enzymeNcbiId = fields[0];
}
} catch (IOException e) {
e.printStackTrace();
}
}
示例3: preprocessPDBsequences
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
/**
* Preprocess the PDB sequences download from PDB
* (ftp://ftp.rcsb.org/pub/pdb/derived_data/pdb_seqres.txt.gz) Only choose
* protein entries of PDB
*
* @param infileName:
* downloaded gunzip file
* @param outfileName:
* input for makeblastdb
*/
public void preprocessPDBsequences(String infileName, String outfileName) {
try {
log.info("[Preprocessing] Preprocessing PDB sequences... ");
LinkedHashMap<String, ProteinSequence> a = FastaReaderHelper.readFastaProteinSequence(new File(infileName));
StringBuffer sb = new StringBuffer();
for (Entry<String, ProteinSequence> entry : a.entrySet()) {
String[] tmp = entry.getValue().getOriginalHeader().toString().split("\\s+");
if (tmp[1].equals("mol:protein")) {
sb.append(">" + entry.getValue().getOriginalHeader() + "\n" + entry.getValue().getSequenceAsString()
+ "\n");
}
}
// one line contains all AA
FileWriter fw = new FileWriter(new File(outfileName));
fw.write(sb.toString());
fw.close();
log.info("[Preprocessing] PDB sequences Ready ... ");
} catch (Exception ex) {
log.error("[Preprocessing] Fatal Error: Could not Successfully Preprocessing PDB sequences");
log.error(ex.getMessage());
ex.printStackTrace();
}
}
示例4: preprocessPDBsequencesUpdate
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
/**
* preprocess PDB sequence update for single file
*
* @param infileName
* @param outfileName
*/
public void preprocessPDBsequencesUpdate(String infileName, String outfileName) {
try {
log.info("[Preprocessing] Preprocessing PDB sequences... ");
LinkedHashMap<String, ProteinSequence> a = FastaReaderHelper.readFastaProteinSequence(new File(infileName));
StringBuffer sb = new StringBuffer();
for (Entry<String, ProteinSequence> entry : a.entrySet()) {
String[] tmp = entry.getValue().getOriginalHeader().toString().split("\\|");
String outstr = tmp[0].replaceAll(":", "_");
sb.append(">" + outstr + "\n" + entry.getValue().getSequenceAsString() + "\n");
}
// one line contains all AA
FileWriter fw = new FileWriter(new File(outfileName));
fw.write(sb.toString());
fw.close();
} catch (Exception ex) {
log.error("[Preprocessing] Fatal Error: Could not Successfully Preprocessing PDB sequences");
log.error(ex.getMessage());
ex.printStackTrace();
}
}
示例5: preprocessUniqSeqEnsembl
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
/**
* For Ensembl: deal with redundancy,combine the name together, split with
* ";"
*
* @param infilename
* @param outHm
* @return
*/
HashMap<String, String> preprocessUniqSeqEnsembl(String infilename, HashMap<String, String> outHm) {
try {
LinkedHashMap<String, ProteinSequence> originalHm = FastaReaderHelper
.readFastaProteinSequence(new File(infilename));
for (Entry<String, ProteinSequence> entry : originalHm.entrySet()) {
if (outHm.containsKey(entry.getValue().getSequenceAsString())) {
String tmpStr = outHm.get(entry.getValue().getSequenceAsString());
tmpStr = tmpStr + ";" + getUniqueSeqIDEnsembl(entry.getKey());
outHm.put(entry.getValue().getSequenceAsString(), tmpStr);
} else {
outHm.put(entry.getValue().getSequenceAsString(), getUniqueSeqIDEnsembl(entry.getKey()));
}
}
} catch (Exception ex) {
log.error(ex.getMessage());
ex.printStackTrace();
}
return outHm;
}
示例6: preprocessUniqSeqUniprot
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
/**
*
* For Uniprot deal with redundancy,combine the name together, split with
* ";"
*
* @param infilename
* @param outHm
* @return
*/
HashMap<String, String> preprocessUniqSeqUniprot(String infilename, HashMap<String, String> accMap,
HashMap<String, String> outHm) {
try {
LinkedHashMap<String, ProteinSequence> originalHm = FastaReaderHelper
.readFastaProteinSequence(new File(infilename));
for (Entry<String, ProteinSequence> entry : originalHm.entrySet()) {
if (outHm.containsKey(entry.getValue().getSequenceAsString())) {
String tmpStr = outHm.get(entry.getValue().getSequenceAsString());
tmpStr = tmpStr + ";" + getUniqueSeqIDUniprot(entry.getKey(), accMap);
outHm.put(entry.getValue().getSequenceAsString(), tmpStr);
} else {
outHm.put(entry.getValue().getSequenceAsString(), getUniqueSeqIDUniprot(entry.getKey(), accMap));
}
}
} catch (Exception ex) {
log.error(ex.getMessage());
ex.printStackTrace();
}
return outHm;
}
示例7: filterSequenceSimilar
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
/**
* Filter the {@link SegmentDataRDD} based on minimum sequence similarity to a reference sequence.
* @param inputSequence the reference sequence to compare
* @param minSimilarity the minimum similarity (as a double between 0.00 and 1.00)
* @return the {@link SegmentDataRDD} after being filtered
* @throws CompoundNotFoundException if Biojava cannot accurately convert the String sequence to a {@link ProteinSequence}
*/
public static SegmentDataRDD filterSequenceSimilar(SegmentDataRDD segmentDataRDD, String inputSequence, double minSimilarity) throws CompoundNotFoundException {
ProteinSequence proteinSequence = new ProteinSequence(inputSequence);
// First set up the environment
int gop = 8;
int extend = 1;
GapPenalty penalty = new SimpleGapPenalty();
penalty.setOpenPenalty(gop);
penalty.setExtensionPenalty(extend);
SubstitutionMatrix<AminoAcidCompound> matrix = SubstitutionMatrixHelper.getBlosum65();
return new SegmentDataRDD(segmentDataRDD.getSegmentRDD().filter(t -> {
ProteinSequence otherSequence = new ProteinSequence(t._2.getSequence());
PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman =
Alignments.getPairwiseAligner(proteinSequence, otherSequence, PairwiseSequenceAlignerType.LOCAL, penalty, matrix);
if(smithWaterman.getSimilarity()<minSimilarity){
return false;
}
return true;
}));
}
示例8: swAlignment
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
private void swAlignment() throws CompoundNotFoundException {
ProteinSequence s1 = new ProteinSequence(query);
s1.setAccession(new AccessionID("Query"));
ProteinSequence s2 = new ProteinSequence(subject);
s2.setAccession(new AccessionID("Subject"));
SubstitutionMatrix<AminoAcidCompound> matrix
= SimpleSubstitutionMatrix.getBlosum62();
alignment = Alignments.getPairwiseAlignment(s1, s2,
Alignments.PairwiseSequenceAlignerType.LOCAL,
new SimpleGapPenalty(), matrix);
FractionalSimilarityScorer<ProteinSequence, AminoAcidCompound> scorer =
new FractionalSimilarityScorer<>(alignment);
score = scorer.getScore();
}
示例9: setLowercaseToNull
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
/**
* Takes a {@link ProteinSequence} which was created by a
* {@link CasePreservingProteinSequenceCreator}. Uses the case info
* stored in the user collection to modify the output array.
*
* <p>Sets elements of the output array which correspond to lowercase letters
* to null.
*
* @param seq Input sequence with case stored as the user collection
* @param out
*/
public static void setLowercaseToNull( ProteinSequence seq,
Object[] out) {
// should have been set by seq creator
Collection<Object> userCollection = seq.getUserCollection();
if(userCollection == null)
throw new IllegalArgumentException("Sequence doesn't contain valid case info");
if(userCollection.size() != out.length)
throw new IllegalArgumentException("Sequence length doesn't math output array length");
int pos = 0;
for(Object isAligned : userCollection) {
assert(isAligned instanceof Boolean);
if(!(Boolean)isAligned) {
out[pos] = null;
}
pos++;
}
}
示例10: fastaToAfpChain
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
/**
* Uses two sequences each with a corresponding structure to create an AFPChain corresponding to the alignment. Provided only for convenience since FastaReaders return such maps.
*
* @param sequences
* A Map containing exactly two entries from sequence names as Strings to gapped ProteinSequences; the name is ignored
* @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure)
* @throws StructureException
*/
public static AFPChain fastaToAfpChain(Map<String, ProteinSequence> sequences, Structure structure1,
Structure structure2) throws StructureException {
if (sequences.size() != 2) {
throw new IllegalArgumentException("There must be exactly 2 sequences, but there were " + sequences.size());
}
if (structure1 == null || structure2 == null) {
throw new IllegalArgumentException("A structure is null");
}
List<ProteinSequence> seqs = new ArrayList<ProteinSequence>();
List<String> names = new ArrayList<String>(2);
for (Map.Entry<String, ProteinSequence> entry : sequences.entrySet()) {
seqs.add(entry.getValue());
names.add(entry.getKey());
}
return fastaToAfpChain(seqs.get(0), seqs.get(1), structure1, structure2);
}
示例11: setup
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
@Before
public void setup() throws CompoundNotFoundException {
protein1 = new ProteinSequence("ARND");
protein2 = new ProteinSequence("ARND");
protein3 = new ProteinSequence("HILK");
protein4 = new ProteinSequence("ANDR");
gaps = new SimpleGapPenalty(2, 1);
blosum62 = SubstitutionMatrixHelper.getBlosum62();
prof1 = new SimpleProfile<ProteinSequence, AminoAcidCompound>(protein1);
prof2 = new SimpleProfile<ProteinSequence, AminoAcidCompound>(protein2);
prof3 = new SimpleProfile<ProteinSequence, AminoAcidCompound>(protein3);
prof4 = new SimpleProfile<ProteinSequence, AminoAcidCompound>(protein4);
sppa1 = new SimpleProfileProfileAligner<ProteinSequence, AminoAcidCompound>(prof1, prof2, gaps, blosum62);
pp1 = sppa1.getPair();
sppa2 = new SimpleProfileProfileAligner<ProteinSequence, AminoAcidCompound>(prof3, prof4, gaps, blosum62);
pp2 = sppa2.getPair();
sppa3 = new SimpleProfileProfileAligner<ProteinSequence, AminoAcidCompound>(pp1, pp2, gaps, blosum62);
all = sppa3.getPair();
}
示例12: testFromFasta
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
@Test
public void testFromFasta() throws IOException, StructureException, CompoundNotFoundException {
Structure s1 = cache.getStructure("1w0p");
Structure s2 = cache.getStructure("1qdm");
ProteinSequence seq1 = new ProteinSequence("GWGG----SEL--YRRNTSLNS--QQDW-------QSNAKIRIVDGAA-----NQIQ");
ProteinSequence seq2 = new ProteinSequence("WMQNQLAQNKT--QDLILDYVNQLCNRL---PSPMESAV----DCGSLGSMPDIEFT");
AFPChain afpChain = FastaAFPChainConverter.fastaToAfpChain(seq1, seq2, s1, s2);
assertEquals("Wrong number of EQRs", 33, afpChain.getNrEQR());
assertEquals("Wrong number of alnLength",53,afpChain.getAlnLength());
String xml = AFPChainXMLConverter.toXML(afpChain);
File expected = new File("src/test/resources/1w0p_1qdm.xml");
File x = File.createTempFile("1w0p_1qdm_output", "xml.tmp");
x.deleteOnExit();
BufferedWriter bw = new BufferedWriter(new FileWriter(x));
bw.write(xml);
bw.close();
boolean match = compareXml(expected, x);
if (!match) {
System.err.println(xml);
fail("AFPChain is wrong");
}
}
示例13: testGetProteinSequenceForStructure
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
@Test
public void testGetProteinSequenceForStructure() {
Map<Integer,Group> groupIndexPos = new HashMap<Integer,Group>();
ProteinSequence prot = StructureSequenceMatcher.getProteinSequenceForStructure(struct1, groupIndexPos);
// Test returned sequence
assertEquals("Unreported residues", seq1.length(), prot.getLength() );
assertEquals("Modified residues",seq1, prot.toString());
// Test mapping
assertEquals("Missing residues in mapping",seq1.length(),groupIndexPos.size());
for(int res=0;res<seq1.length();res++) {
assertTrue("no mapping for group "+res,groupIndexPos.containsKey(res));
Group g = groupIndexPos.get(res);
ResidueNumber resnum = g.getResidueNumber();
Character aa = StructureTools.get1LetterCodeAmino(g.getPDBName());
assertEquals("Wrong PDB number at pos "+res,pdbNum1[res],resnum.toString());
assertEquals("Wrong Amino acid at pos "+res,
Character.valueOf(seq1.charAt(res)),aa);
//System.out.format("%4d %.5s %s\n", res,resnum.toString(),aa.toString());
}
}
示例14: testGetAlignedSequencesSArray
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
@Test
public void testGetAlignedSequencesSArray() {
List<AlignedSequence<ProteinSequence, AminoAcidCompound>> list = global.getAlignedSequences(query, query,
target);
assertEquals(list.size(), 3);
assertEquals(list.get(0).toString(), "ARND-");
assertEquals(list.get(1).toString(), "ARND-");
assertEquals(list.get(2).toString(), "-R-DG");
list = local.getAlignedSequences(target, query, target);
assertEquals(list.size(), 3);
assertEquals(list.get(0).toString(), "R-D");
assertEquals(list.get(1).toString(), "RND");
assertEquals(list.get(2).toString(), "R-D");
list = single.getAlignedSequences(query, query);
assertEquals(list.size(), 2);
assertEquals(list.get(0).toString(), "ARND");
assertEquals(list.get(1).toString(), "ARND");
}
示例15: testConstructor
import org.biojava.nbio.core.sequence.ProteinSequence; //导入依赖的package包/类
@Test
public void testConstructor() throws CompoundNotFoundException {
CasePreservingProteinSequenceCreator creator = new CasePreservingProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet());
String seq = "aCDEfgHI-Jkl";
ProteinSequence prot = (ProteinSequence) creator.getSequence(seq, 0);
Collection<Object> uppercase = prot.getUserCollection();
//test some assumptions. Hopefully work on non-english locals too?
assertFalse(Character.isUpperCase('-'));
assertFalse(Character.isUpperCase('.'));
assertEquals("Lengths differ",seq.length(),uppercase.size());
int i=0;
for(Object obj : uppercase) {
assertTrue("Not a Boolean",obj instanceof Boolean);
Boolean bool = (Boolean)obj;
assertEquals("Doesn't match case of "+seq.charAt(i),Character.isUpperCase(seq.charAt(i)),bool);
i++;
}
}