当前位置: 首页>>代码示例>>C++>>正文


C++ FASTAReader::ReadAllSequences方法代码示例

本文整理汇总了C++中FASTAReader::ReadAllSequences方法的典型用法代码示例。如果您正苦于以下问题:C++ FASTAReader::ReadAllSequences方法的具体用法?C++ FASTAReader::ReadAllSequences怎么用?C++ FASTAReader::ReadAllSequences使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在FASTAReader的用法示例。


在下文中一共展示了FASTAReader::ReadAllSequences方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main

int main(int argc, char* argv[]) {
  if (argc < 3) {
    cout << "usage: testRandomSequence genome.fa ntries " << endl;
    exit(0);
  }
  string inFile = argv[1];
  int nSamples = atoi(argv[2]);

  if (nSamples == 0) {
    return 0;
  }

  FASTAReader reader;
  reader.Initialize(inFile);
  vector<FASTASequence> genome;
  reader.ReadAllSequences(genome);
  
  int i;
  cout << "title pos" << endl;
  for (i = 0; i < nSamples; i++) {
    DNALength chrIndex, chrPos;
    FindRandomPos(genome, chrIndex, chrPos);
    cout << genome[chrIndex].title << " " << chrPos << endl;
  }

  return 0;
}
开发者ID:EichlerLab,项目名称:blasr,代码行数:27,代码来源:TestRandomSequence.cpp

示例2: main

int main(int argc, char* argv[]) {
  
  CommandLineParser clp;
  string readsFileName;
  string alignmentsFileName;
  string outputFileName;
  float minMergeIdentity = 0.70;
  clp.RegisterStringOption("reads", &readsFileName, "Reads used for alignments.");
  clp.RegisterStringOption("alignments", &alignmentsFileName, "SAM formatted alignments.");
  clp.RegisterIntOption("k", &vertexSize, "Minimum match length", CommandLineParser::PositiveInteger);
  clp.RegisterStringOption("outfile", &outputFileName, "Alignment output.");
  clp.RegisterPreviousFlagsAsHidden();
  clp.RegisterFlagOption("v", &verbose, "");
  clp.RegisterFloatOption("minMergeIdentity", 
                          &minMergeIdentity, 
                          "Minimum identity to merge paths.", CommandLineParser::PositiveFloat);
  
  clp.ParseCommandLine(argc, argv);

  if (minMergeIdentity < 0 or minMergeIdentity > 1) {
    cout << "ERROR. minMergeIdentity must be between 0 and 1" << endl;
    exit(1);
  }
  
  vector<FASTASequence> reads;

  FASTAReader fastaReader;
  fastaReader.Initialize(readsFileName);
  fastaReader.ReadAllSequences(reads);

  //
  // It is necessary to go from read title to index in the list of reads. 
  //
  map<string, int> readNameToIndex;
  BuildReadNameToIndexMap(reads, readNameToIndex);

  ReadWordMatchVector readWordMatches;
  InitializeFromReads(reads, readWordMatches);
  
  //
  // Get ready to read in the alignments.
  //
  SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> samReader;
  samReader.Initialize(alignmentsFileName);
  AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet;
  samReader.ReadHeader(alignmentSet);
  
  SAMAlignment samAlignment;
  AlignmentCandidate<> alignment;
  int numAlignedBases = 0;
  int alignmentIndex = 0;
  while ( samReader.GetNextAlignment( samAlignment ) ) {
    vector<AlignmentCandidate<> > alignments;
    SAMAlignmentsToCandidates(samAlignment,
                              reads,
                              readNameToIndex,
                              alignments, false, true);

    int i;
    ++alignmentIndex;
    int a;
    for (a = 0; a < alignments.size();a++) {
      if (alignments[a].qName != alignments[a].tName) {
        MarkMatches(alignments[a], readNameToIndex, vertexSize, readWordMatches);
      }
    }
    if (alignmentIndex % 1000 == 0) {
      cout << alignmentIndex << endl;
    }
  }


  int numMatches = 0;
  int parentIndex = 1;
  int r;
  for (r = 0; r < readWordMatches.size(); r++) {
    readWordMatches[r].CreateParents();
    numMatches += readWordMatches[r].pos.size();
  }

  vector<int> parentIndices;
  parentIndices.resize(2*numMatches + 1);
  fill(parentIndices.begin(), parentIndices.end(), 0);
  //
  // Start indexing off at 1 so that 0 does not need to be treated in
  // a special case.
  //
  int curParentIndex = 1;
  cout << "There are " << numMatches << " matches." << endl;

  samReader.Close();
  samReader.Initialize(alignmentsFileName);
  AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet2;
  samReader.ReadHeader(alignmentSet2);
  
  numAlignedBases = 0;
  alignmentIndex = 0;
  while ( samReader.GetNextAlignment( samAlignment ) ) {
    vector<AlignmentCandidate<> > alignments;
    SAMAlignmentsToCandidates(samAlignment,
//.........这里部分代码省略.........
开发者ID:BioinformaticsArchive,项目名称:blasr,代码行数:101,代码来源:BuildEBruijnGraph.cpp

示例3: main

int main(int argc, char* argv[]) {


	string refFileName, notNormalFileName, normalFileName;

	if (argc < 4) {
		cout << "usage: normalizeGCContent ref source dest " << endl
				 << "       flips the C/Gs in source randomly until they are the same gc content as ref." << endl;
		exit(1);
	}
		
	refFileName = argv[1];
	notNormalFileName = argv[2];
	normalFileName = argv[3];


	FASTAReader reader;
	FASTAReader queryReader;
	FASTASequence ref;
	vector<FASTASequence> querySequences;
	int queryTotalLength;
	reader.Initialize(refFileName);
	reader.ReadAllSequencesIntoOne(ref);

	queryReader.Initialize(notNormalFileName);
	int refCounts[5], queryCounts[5];
	int s;
	refCounts[0] = refCounts[1] =refCounts[2] = refCounts[3] = refCounts[4] = 0;
	queryCounts[0] = queryCounts[1] =queryCounts[2] = queryCounts[3] = queryCounts[4] = 0;
	
	queryReader.ReadAllSequences(querySequences);
	ofstream normOut;
	CrucialOpen(normalFileName, normOut);

	CountNucs(ref, refCounts);
	
	float refGC = (1.0*refCounts[TwoBit['c']] + refCounts[TwoBit['g']]) / (refCounts[TwoBit['a']] + refCounts[TwoBit['c']] + refCounts[TwoBit['g']] + refCounts[TwoBit['t']]);

	int q;
	for (q = 0; q < querySequences.size(); q++) {
		CountNucs(querySequences[q], queryCounts);
	}

	float queryGC = (1.0*queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']]) / (queryCounts[TwoBit['a']] + queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']] + queryCounts[TwoBit['t']]);

	
	float gcToat = 0.0;
	float atTogc = 0.0;
	if (refGC > queryGC) {
		atTogc = (refGC - queryGC);
	}
	else {
		gcToat = (queryGC - refGC);
	}

	
	DNALength queryGenomeLength = queryCounts[0] +  queryCounts[1] + queryCounts[2] + queryCounts[3] + queryCounts[4];

	DNALength unmaskedQueryLength = queryCounts[0] +  queryCounts[1] + queryCounts[2] + queryCounts[3];

	DNALength ngc2at = unmaskedQueryLength * gcToat;
	DNALength nat2gc = unmaskedQueryLength * atTogc;
	cout << refGC << " " << queryGC << " " << gcToat << " " << atTogc << " " << ngc2at << " " << nat2gc << endl;

	vector<FASTASequence> normalized;

	normalized.resize(querySequences.size());
	vector<DNALength> cumLengths;
	
	cumLengths.resize(normalized.size()+1);
	cumLengths[0] = 0;
	for (q = 0; q < querySequences.size(); q++) {
		normalized[q]   = querySequences[q];
		cumLengths[q+1] = cumLengths[q] + querySequences[q].length;
	}
	
	DNALength i;

																
	for (i = 0; i < ngc2at; i+=2) {
		DNALength pos, chr;
		FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'G', chr, pos);
		normalized[chr].seq[pos] = 'A';
		FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'C', chr, pos);
		normalized[chr].seq[pos] = 'T';		
	}
	
	for (i = 0; i < nat2gc; i+=2) {
		DNALength pos, chr;
		FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'A', chr, pos);
		normalized[chr].seq[pos] = 'g';
		FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'T', chr, pos);
		normalized[chr].seq[pos] = 'c';		
	}

	for (q = 0; q < normalized.size(); q++ ){
		normalized[q].PrintSeq(normOut);
	}

}
开发者ID:EichlerLab,项目名称:blasr,代码行数:100,代码来源:NormalizeGCContent.cpp

示例4: main

int main(int argc, char* argv[]) {
  string program = "samtoh5";
  string versionString = VERSION;
  AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);
  string samFileName, cmpFileName, refFileName;
  bool parseSmrtTitle = false;
  bool useShortRefName = false;
  CommandLineParser clp;
  string readType = "standard";
  int verbosity = 0;

  clp.SetProgramName(program);
  clp.SetProgramSummary("Converts in.sam file to out.cmp.h5 file.");
  clp.SetVersion(versionString);

  clp.RegisterStringOption("in.sam", &samFileName, 
                           "Input SAM file.", true);
  clp.RegisterStringOption("reference.fasta", &refFileName, 
                           "Reference used to generate reads.", true);
  clp.RegisterStringOption("out.cmp.h5", &cmpFileName, 
                           "Output cmp.h5 file.", true);
  clp.RegisterPreviousFlagsAsHidden();
  clp.RegisterFlagOption("smrtTitle", &parseSmrtTitle, 
                         "Use this option when converting alignments "
                         "generated from reads produced by the "
                         "pls2fasta from bas.h5 files by parsing read "
                         "coordinates from the SMRT read title.  The title " 
                         "is in the format /name/hole/coordinates, where "
                         "coordinates are in the format \\d+_\\d+, and "
                         "represent the interval of the read that was "
                         "aligned.");
  clp.RegisterStringOption("readType", &readType, 
                         "Set the read type: 'standard', 'strobe', 'CCS', "
                         "or 'cDNA'");
  clp.RegisterIntOption("verbosity", &verbosity, 
                         "Set desired verbosity.", 
                         CommandLineParser::PositiveInteger);
  clp.RegisterFlagOption("useShortRefName", &useShortRefName, 
                         "Use abbreviated reference names obtained "
                         "from file.sam instead of using full names "
                         "from reference.fasta.");
  string description = ("Because SAM has optional tags that have different "
    "meanings in different programs, careful usage is required in order to "
    "have proper output. The \"xs\" tag in bwa-sw is used to show the "
    "suboptimal score, but in PacBio SAM (blasr) it is defined as the start "
    "in the query sequence of the alignment.\nWhen \"-smrtTitle\" is "
    "specified, the xs tag is ignored, but when it is not specified, the "
    "coordinates given by the xs and xe tags are used to define the interval "
    "of a read that is aligned. The CIGAR string is relative to this interval.");
  clp.SetExamples(description);

  clp.ParseCommandLine(argc, argv);

  if (readType != "standard" and readType != "strobe" and 
      readType != "cDNA" and readType != "CCS") {
    cout << "ERROR. Read type '" << readType 
         << "' must be one of either 'standard', 'strobe', 'cDNA' or 'CCS'." 
         << endl;
    exit(1);
  }
    
  cerr << "[INFO] " << GetTimestamp() << " [" << program << "] started." << endl;

  SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> samReader;
  FASTAReader fastaReader;
  HDFCmpFile<AlignmentCandidate<FASTASequence, FASTASequence> > cmpFile;

  //
  // Initialize input/output files.
  //
  samReader.Initialize(samFileName);
  fastaReader.Initialize(refFileName);
  cmpFile.Create(cmpFileName);

  //
  // Configure the file log.
  //
  string command;
  CommandLineParser::CommandLineToString(argc, argv, command);
  string log = "Convert sam to cmp.h5";
  cmpFile.fileLogGroup.AddEntry(command, log, program, GetTimestamp(), versionString);

  //
  // Set the readType
  //
  cmpFile.SetReadType(readType);

  //
  // Read necessary input.
  //

  vector<FASTASequence> references;
  fastaReader.ReadAllSequences(references);
  
  //
  // This should probably be handled by the alignmentSetAdapter, but
  // time constraints...
  //
  AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet;
  samReader.ReadHeader(alignmentSet);
//.........这里部分代码省略.........
开发者ID:macmanes,项目名称:blasr,代码行数:101,代码来源:SamToCmpH5.cpp

示例5: main

int main(int argc, char* argv[]) {
  string gencodeGffFileName, genomeFileName, genesOutFileName;
  string geneType = "protein_coding";
  bool randomSplicing = false;
  int numRandomSplicing = 1;
  float pSkip = 0.5;
  if (argc < 4) {
    cout << "Usage: extractGenes gencodeGTFFile genomeFile genesOutFileName [-geneType type (protein_coding)] [-randomSplicing] [-numRandomSplicing n] [-pSkip prob (0-1, default:0.5)]" << endl;
    exit(1);
  }

  gencodeGffFileName = argv[1];
  genomeFileName     = argv[2];
  genesOutFileName   = argv[3];

  int argi = 4;
  string coordinatesFileName;

  while (argi < argc) {
    if (strcmp(argv[argi], "-geneType") == 0) {
      geneType = argv[++argi];
    }
    else if (strcmp(argv[argi], "-randomSplicing") == 0) {
      randomSplicing = true;
    }
    else if (strcmp(argv[argi], "-numRandomSplicing") == 0) {
      numRandomSplicing = atoi(argv[++argi]);
    }
    else if (strcmp(argv[argi], "-pSkip") == 0) {
      pSkip = atof(argv[++argi]);
    }
    else {
      cout << "ERROR, bad option  " << argv[argi] << endl;
      exit(1);
    }
    ++argi;
  }

  coordinatesFileName = genesOutFileName;
  coordinatesFileName.append(".pos");
  FASTAReader reader;
  reader.Initialize(genomeFileName);

  ofstream outFile, coordsFile;
  CrucialOpen(genesOutFileName, outFile, std::ios::out);

  string coordsFileName = genesOutFileName + ".coords";
  CrucialOpen(coordsFileName, coordsFile, std::ios::out);

  vector<FASTASequence> referenceSequences;
  reader.ReadAllSequences(referenceSequences);
  int i;
  map<string, int> titleToIndex;
  for (i = 0; i < referenceSequences.size(); i++) {
    titleToIndex[referenceSequences[i].title] = i;
  }

  GencodeGFFFile gencodeFile;
  gencodeFile.ReadAll(gencodeGffFileName);
  
  vector<GencodeGFFGene> genes;
  IndexGencodeGenes(gencodeFile, genes, geneType);

  for (i = 0; i < genes.size(); i++) {
    genes[i].OrderExonsByStart();
  }

  int e;
  for (i = 0; i < genes.size(); i++) {
    FASTASequence geneSequence;
    geneSequence.CopyTitle(genes[i].geneName);
    if (titleToIndex.find(genes[i].chromosome) == titleToIndex.end()) {
      continue;
    }
    int chrIndex = titleToIndex[genes[i].chromosome];
    string sequence = "";
    //
    // Do nothing with 0 length exons.
    //
    if (genes[i].exons.size() == 0) {
      continue;
    }
    vector<FASTASequence> geneSequences;
    vector<GeneCoordinates> geneCoordinates;
    genes[i].GenerateGeneSequences(referenceSequences[chrIndex], geneSequences, geneCoordinates, randomSplicing);
    int gi;
    for (gi = 0; gi < geneSequences.size(); gi++) {
      if (genes[i].GetStrand() == '+') {
        geneSequences[gi].PrintSeq(outFile);
      }
      else {
        FASTASequence rc;
        geneSequences[gi].MakeRC(rc);
        rc.PrintSeq(outFile);
        rc.Free();
      }
      coordsFile << geneSequences[gi].title << " " << geneCoordinates[gi].chromosome << " " << geneCoordinates[gi].exonCoordinates.size() << " " << geneCoordinates[gi].strand;
      int i;
      for (i = 0; i < geneCoordinates[gi].exonCoordinates.size(); i++) {
        coordsFile << " " 
//.........这里部分代码省略.........
开发者ID:BioinformaticsArchive,项目名称:blasr,代码行数:101,代码来源:ExtractGenes.cpp

示例6: main

int main(int argc, char* argv[]) {
    string inFileName, readsFileName;
    DNALength readLength;
    float coverage = 0;
    bool noRandInit = false;
    int numReads = -1;
    CommandLineParser clp;
    int qualityValue = 20;
    bool printFastq = false;
    int stratify = 0;
    string titleType = "pacbio";
    string fastqType = "illumina"; // or "sanger"
    clp.RegisterStringOption("inFile", &inFileName, "Reference sequence", 0);
    clp.RegisterPreviousFlagsAsHidden();
    clp.RegisterIntOption("readLength", (int*) &readLength, "The length of reads to simulate.  The length is fixed.",
                          CommandLineParser::PositiveInteger, "Length of every read.", 0);
    clp.RegisterFloatOption("coverage", &coverage, "Total coverage (from which the number of reads is calculated",
                            CommandLineParser::PositiveFloat, 0);
    clp.RegisterFlagOption("nonRandInit", &noRandInit, "Skip initializing the random number generator with time.");
    clp.RegisterIntOption("nReads", &numReads, "Total number of reads (from which coverage is calculated)", CommandLineParser::PositiveInteger, 0);
    clp.RegisterStringOption("readsFile", &readsFileName, "Reads output file", 0);
    clp.RegisterFlagOption("fastq", &printFastq, "Fake fastq output with constant quality value (20)");
    clp.RegisterIntOption("quality", &qualityValue, "Value to use for fastq quality", CommandLineParser::PositiveInteger);
    clp.RegisterIntOption("stratify", &stratify, "Sample a read every 'stratify' bases, rather than randomly.", CommandLineParser::PositiveInteger);
    clp.RegisterStringOption("titleType", &titleType, "Set the name of the title: 'pacbio'|'illumina'");
    clp.RegisterStringOption("fastqType", &fastqType, "Set the type of fastq: 'illumina'|'sanger'");
    vector<string> leftovers;
    clp.ParseCommandLine(argc, argv, leftovers);

    if (!noRandInit) {
        InitializeRandomGeneratorWithTime();
    }

    FASTAReader inReader;
    inReader.Init(inFileName);
    vector<FASTASequence> reference;

    inReader.ReadAllSequences(reference);
    ofstream readsFile;
    if (readsFileName == "") {
        cout << "ERROR.  You must specify a reads file." << endl;
        exit(0);
    }
    CrucialOpen(readsFileName, readsFile, std::ios::out);

    ofstream sangerFastqFile;
    if (fastqType == "sanger") {
        string sangerFastqFileName = readsFileName + ".fastq";
        CrucialOpen(sangerFastqFileName, sangerFastqFile, std::ios::out);
    }

    DNALength refLength = 0;
    int i;
    for (i = 0; i < reference.size(); i++) {
        refLength += reference[i].length;
    }
    if (numReads == -1 and coverage == 0 and stratify == 0) {
        cout << "Error, you must specify either coverage, nReads, or stratify." << endl;
        exit(1);
    }
    else if (numReads == -1) {
        numReads = (refLength / readLength) * coverage;
    }

    if (stratify) {
        if (!readLength) {
            cout << "ERROR. If you are using stratification, a read length must be specified." << endl;
            exit(1);
        }
    }

    DNASequence sampleSeq;
    sampleSeq.length = readLength;
    int maxRetry = 10000000;
    int retryNumber = 0;
    DNALength seqIndex, seqPos;
    if (stratify) {
        seqIndex = 0;
        seqPos   = 0;
    }
    DNALength origReadLength = readLength;
    for (i = 0; stratify or i < numReads; i++) {
        if (stratify == 0) {
            FindRandomPos(reference, seqIndex, seqPos, readLength );
        }
        else {
            //
            // find the next start pos, or bail if done
            //
            if (seqPos >= reference[seqIndex].length) {
                if (seqIndex == reference.size() - 1) {
                    break;
                }
                else {
                    seqIndex = seqIndex + 1;
                    seqPos   = 0;
                    continue;
                }
            }
            readLength = min(reference[seqIndex].length - seqPos, origReadLength);
//.........这里部分代码省略.........
开发者ID:ylipacbio,项目名称:ForReferenceOnly,代码行数:101,代码来源:SimpleShredder.cpp

示例7: main

int main(int argc, char* argv[]) {
	
	if (argc < 4) {
		PrintUsage();
		exit(0);
	}

	string rgFileName, vertexSeqFileName, scaffoldDirName;
	
	rgFileName         = argv[1];
	vertexSeqFileName  = argv[2];
	scaffoldDirName    = argv[3];

	string repeatFileName = "";
	bool printRepeatsSeparately = false;
	int argi = 4;
	bool printSeparate=false;
	while (argi < argc) {
		if (strcmp(argv[argi], "-separate") == 0) {
			printSeparate=true;
		}
		else if (strcmp(argv[argi], "-repeats") == 0) {
			printRepeatsSeparately = true;
			repeatFileName = argv[++argi];
		}
		else {
			cout << "bad option: " << argv[argi] << endl;
			PrintUsage();
			exit(1);
		}
		++argi;
	}
	
	FASTAReader vertexSequenceReader;
	vertexSequenceReader.Init(vertexSeqFileName);

	//
	// Input necessary data
	//
	vector<FASTASequence> vertexSequences;
	vertexSequenceReader.ReadAllSequences(vertexSequences);
	RepeatGraph<string> rg;
	rg.ReadGraph(rgFileName);

	vector<FASTASequence> vertexRCSequences;
	VectorIndex vertexIndex;	
	vertexRCSequences.resize(vertexSequences.size());
	for (vertexIndex = 0; vertexIndex < vertexSequences.size(); vertexIndex++ ){
		vertexSequences[vertexIndex].MakeRC(vertexRCSequences[vertexIndex]);
	}
	
	VectorIndex outEdgeIndex;
	int scaffoldIndex = 0;
	ofstream scaffoldOut;

	if (printSeparate==false) {
		// scaffold dir name is really a file name here.
		CrucialOpen(scaffoldDirName, scaffoldOut, std::ios::out);
	}
	for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++ ){
		rg.vertices[vertexIndex].traversed = false;
	}

	//
	// Set up flow for calling multiplicity.
	//
	/*
		Test all this out later.
	AssignMinimumFlowToEdges(rg, 2);
	AssignVertexFlowBalance(rg);
	BalanceKirchhoffFlow(rg);

	UInt edgeIndex;
	for (edgeIndex = 0; edgeIndex < rg.edges.size(); edgeIndex++) {
		if (rg.edges[edgeIndex].flow > 1) {
			cout << edgeIndex << " " << rg.edges[edgeIndex].flow << endl;
		}
	}
	*/

	int numPrintedVertices = 0;
	for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++ ){
		//
		// Look to see if this vertex is a branching vertex.
		//
		if ((rg.vertices[vertexIndex].inEdges.size() != 1 or
				 rg.vertices[vertexIndex].outEdges.size() != 1) and
				rg.vertices[vertexIndex].traversed == false) {

			//
			// This is a branching vertex.  Print all paths from this vertex, but not the vertex
			// itself if it appears repetitive. 
			//
			VectorIndex outEdgeIndex;
			bool printedThisVertex = false;
			for (outEdgeIndex = 0; outEdgeIndex < rg.vertices[vertexIndex].outEdges.size(); outEdgeIndex++ ){
				//
				// This is a branching vertex.
				// 

//.........这里部分代码省略.........
开发者ID:jcombs1,项目名称:blasr,代码行数:101,代码来源:PrintScaffolds.cpp

示例8: main

int main(int argc, char* argv[]) {
    string program = "samtom4";
    string versionString = VERSION;
    AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);

    string samFileName, refFileName, outFileName;
    bool printHeader = false;
    bool parseSmrtTitle = false;
    bool useShortRefName = false;

    CommandLineParser clp;
    clp.SetProgramName(program);
    clp.SetVersion(versionString);
    clp.SetProgramSummary("Converts a SAM file generated by blasr to M4 format.");
    clp.RegisterStringOption("in.sam",        &samFileName,
                             "Input SAM file, which is produced by blasr.");
    clp.RegisterStringOption("reference.fasta", &refFileName,
                             "Reference used to generate file.sam.");
    clp.RegisterStringOption("out.m4",          &outFileName,
                             "Output in blasr M4 format.");
    clp.RegisterPreviousFlagsAsHidden();
    clp.RegisterFlagOption("header",            &printHeader,
                           "Print M4 header.");
    clp.RegisterFlagOption("useShortRefName",   &useShortRefName, 
                           "Use abbreviated reference names obtained "
                           "from file.sam instead of using full names "
                           "from reference.fasta.");
    //clp.SetExamples(program + " file.sam reference.fasta out.m4");

    clp.ParseCommandLine(argc, argv);

    ostream * outFilePtr = &cout;
	ofstream outFileStrm;
	if (outFileName != "") {
		CrucialOpen(outFileName, outFileStrm, std::ios::out);
		outFilePtr = &outFileStrm;
	}

    SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> samReader;
    FASTAReader fastaReader;

    //
    // Initialize samReader and fastaReader.
    //
    samReader.Initialize(samFileName);
    fastaReader.Initialize(refFileName);

    //
    // Configure the file log.
    //
    string command;
    CommandLineParser::CommandLineToString(argc, argv, command);

    //
    // Read necessary input.
    //
    vector<FASTASequence> references;
    fastaReader.ReadAllSequences(references);

    AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> alignmentSet;
    samReader.ReadHeader(alignmentSet); 

    //
    // The order of references in vector<FASTASequence> references and
    // AlignmentSet<, , >alignmentSet.references can be different.
    // Rearrange alignmentSet.references such that it is ordered in
    // exactly the same way as vector<FASTASequence> references.
    //
    alignmentSet.RearrangeReferences(references);

    //
    // Map short names for references obtained from file.sam to 
    // full names obtained from reference.fasta
    //
    map<string, string> shortRefNameToFull;
    map<string, string>::iterator it;
    assert(references.size() == alignmentSet.references.size());
    if (!useShortRefName) {
        for (size_t i = 0; i < references.size(); i++) {
            string shortRefName = alignmentSet.references[i].GetSequenceName();
            string fullRefName(references[i].title); 
            if (shortRefNameToFull.find(shortRefName) != shortRefNameToFull.end()) {
                cout << "ERROR, Found more than one reference " << shortRefName << "in sam header" << endl;
                exit(1);
            } 
            shortRefNameToFull[shortRefName] = fullRefName;
            alignmentSet.references[i].sequenceName = fullRefName;
        }
    }

    // Map reference name obtained from SAM file to indices
    map<string, int> refNameToIndex;
    for (size_t i = 0; i < references.size(); i++) {
        string refName = alignmentSet.references[i].GetSequenceName();
        refNameToIndex[refName] = i;
    }

    //
    // Store the alignments.
    //
//.........这里部分代码省略.........
开发者ID:Coryza,项目名称:blasr,代码行数:101,代码来源:SamToM4.cpp

示例9: main

int main(int argc, char* argv[]) {
	string rgInName, rgOutName;
	int minPathLength;
	string vertexSequenceFileName;
	if (argc < 5) {
		cout << "usage: trimShortEnds in.rg  vertexSequences minPathLength out.rg" << endl;
		exit(1);
	}

	rgInName      = argv[1];
	vertexSequenceFileName = argv[2];
	minPathLength = atoi(argv[3]);
	rgOutName     = argv[4];

	ofstream rgOut;
	CrucialOpen(rgOutName, rgOut, std::ios::out);
	FASTAReader vertexSequenceReader;
	vertexSequenceReader.Init(vertexSequenceFileName);

	RepeatGraph<string> rg;
	vector<FASTASequence> vertexSequences;
	rg.ReadGraph(rgInName);
	vertexSequenceReader.ReadAllSequences(vertexSequences);

	VectorIndex vertexIndex;
	VectorIndex outEdgeIndex;
	VectorIndex edgeIndex;
	
	if (rg.edges.size() == 0) {
		cout << "LIKELY INVALID GRAPH. There are no edges." << endl;
		return 0;
	}
	//
	// At first, any edge that exists is connected to a vertex. This
	// will change as low coverage edges are deleted and replaced by
	// high coverage edges from the end of the array.
	//
	for (edgeIndex = 0; edgeIndex < rg.edges.size(); edgeIndex++) {
		rg.edges[edgeIndex].connected = true;
	}
	set<std::pair<VectorIndex, VectorIndex> > srcDestToRemove;
	
	for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++) {
		if (rg.vertices[vertexIndex].inEdges.size() == 0 and
				rg.vertices[vertexIndex].outEdges.size() == 1) {
			//
			// This is a source.  Traverse this until a branching vertex or the end is found.
			//
			vector<VectorIndex> path;
			path.push_back(vertexIndex);
			int pathLength = 0;
			VectorIndex pathVertex;
			VectorIndex pathEdge;
			pathEdge = rg.vertices[vertexIndex].outEdges[0];
			pathVertex = rg.edges[pathEdge].dest;
			while (rg.vertices[pathVertex].inEdges.size() == 1 and
						 rg.vertices[pathVertex].outEdges.size() == 1) {
				path.push_back(pathVertex);
				pathEdge   =  rg.vertices[pathVertex].outEdges[0];
				pathVertex =  rg.edges[pathEdge].dest;
				pathLength += vertexSequences[pathVertex/2].length;
			}
			pathLength += vertexSequences[pathVertex/2].length;
			path.push_back(pathVertex);
			if (pathLength < minPathLength and path.size() < 3) {
				//
				// Remove this path, it is too short.
				// Also remove the complement.
				//
				cout << "trimming path of " << path.size() << " is of sequence length " << pathLength << endl;

				VectorIndex pathIndex;
				for (pathIndex = 0; pathIndex < path.size() - 1; pathIndex++) {
					srcDestToRemove.insert(pair<VectorIndex, VectorIndex>(path[pathIndex], path[pathIndex+1]));
					srcDestToRemove.insert(pair<VectorIndex, VectorIndex>(2*(path[pathIndex+1]/2) + !(path[pathIndex+1]%2),
																																2*(path[pathIndex]/2) + !(path[pathIndex]%2)));
				}
			}
		}
	}

	MarkEdgePairsForRemoval(srcDestToRemove, rg.vertices, rg.edges);
	RemoveUnconnectedEdges(rg.vertices, rg.edges);

	rg.WriteGraph(rgOut);
	return 0;
}
开发者ID:BioinformaticsArchive,项目名称:blasr,代码行数:87,代码来源:TrimShortEnds.cpp

示例10: main

int main(int argc, char* argv[]) {


	FASTAReader reader;
	FASTASequence read;
	int maxLength = 100;
	if (argc < 3) {
		cout << "usage: pairAlignAllContigs inFile maxLength equivalencies [-minIdent i]" << endl; 
		exit(0);
	}
	string readsFileName, equivalenciesFileName;
	readsFileName = argv[1];
	maxLength = atoi(argv[2]);
	equivalenciesFileName = argv[3];
	int argi = 4;
  float minIdentity = 80;
	while (argi < argc) {
		if (strcmp(argv[argi], "-minIdent") == 0) {
			minIdentity = atoi(argv[++argi]);
		}
		++argi;
	}
	vector<FASTASequence> reads, readsRC;;
	reader.Init(readsFileName); 
  reader.ReadAllSequences(reads);
	readsRC.resize(reads.size());
	int r;
	for (r =0; r < reads.size();r++) {
	  reads[r].MakeRC(readsRC[r]);
  }
	ofstream equivOut;
	CrucialOpen(equivalenciesFileName, equivOut);

	Matrix<int> alignScores;
	Matrix<float> alignIdentities;
	alignScores.Resize(reads.size(), reads.size());
	alignIdentities.Resize(reads.size(), reads.size());
	vector<int> scoreMat;
	vector<Arrow> pathMat;
	int i, j;
	int alignScore;
	FASTASequence readi, readj;
	FASTASequence rcReadi, rcReadj;
	
	for (i = 0; i < reads.size(); i++) {
		float maxFrontIdent, maxEndIdent;
		int   maxFrontIdentIndex, maxEndIdentIndex;
		maxFrontIdent = 0; maxEndIdent = 0;
		maxFrontIdentIndex = 0;
		maxEndIdentIndex   = 0;
		int maxFrontIdentLength = 0;
		int maxEndIdentLength  = 0;
		int maxFrontLength     = 0;
		int	maxEndLength       = 0;
		int nmaxFrontLengthIndex = 0;
		int maxEndLengthIndex  = 0;
		float maxFrontLengthIdent = 0;
		float maxEndLengthIdent = 0;
		int maxFrontLengthIndex = 0;
		equivOut << reads[i].GetName();
		for (j = 0; j < reads.size(); j++ ){
			// 
			// Store the two ends of the alignment.
			//
			alignScore = 0;
			int rcAlignScore;
			Alignment alignment;
			Alignment rcAlignment;
			Alignment *optAlignment;
			if (i != j) {
  	  	if (reads[i].length < maxLength and reads[j].length < maxLength) {
  				alignScore = SWAlign(reads[i], reads[j], SMRTDistanceMatrix, 3, scoreMat, pathMat, alignment, Global);
  			}
				if (reads[i].length < maxLength and reads[j].length < maxLength) {
          rcAlignScore = SWAlign(reads[i], readsRC[j], SMRTDistanceMatrix, 3, scoreMat, pathMat, rcAlignment, Global);
        }	
  			ComputeAlignmentStats(alignment, reads[i].seq, reads[j].seq, SMRTDistanceMatrix, 3,3 );
        ComputeAlignmentStats(rcAlignment, reads[i].seq, readsRC[j].seq, SMRTDistanceMatrix, 3,3 );

  			if (alignment.pctSimilarity > minIdentity or rcAlignment.pctSimilarity > minIdentity) {
  				equivOut << " " << reads[j].GetName();
  			}	
      }
	}
		equivOut << endl;	
	}

	return 0;
}
开发者ID:jcombs1,项目名称:blasr,代码行数:89,代码来源:PairAlignAllShortContigs.cpp

示例11: main


//.........这里部分代码省略.........
	ofstream outFileStrm;
	if (outFileName != "") {
		CrucialOpen(outFileName, outFileStrm, std::ios::out);
		outFilePtr = &outFileStrm;
	}
    
    GFFFile adapterGffFile;
    if (adapterGffFileName != "")
        adapterGffFile.ReadAll(adapterGffFileName);
    
    SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> samReader;
    FASTAReader fastaReader;

    //
    // Initialize samReader and fastaReader.
    //
    samReader.Initialize(samFileName);
    fastaReader.Initialize(refFileName);

    //
    // Configure the file log.
    //
    string command;
    CommandLineParser::CommandLineToString(argc, argv, command);
    string log = "Filter sam hits.";
    string program = "samFilter";
    string versionString = VERSION;
    AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);

    //
    // Read necessary input.
    //
    vector<FASTASequence> references;
    fastaReader.ReadAllSequences(references);

    // If the SAM file is generated by blasr with -titleTable,
    // then references in the SAM are represented by 
    // their corresponding indices in the title table.
    // In that case, we need to convert reference titles in fasta file
    // to their corresponding indices in the title table, such that
    // references in both SAM and fasta files are represented
    // by title table indices and therefore can match.
    if (titleTableName != "") {
        ConvertTitlesToTitleTableIndices(references, titleTableName);
    }
 
    AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> alignmentSet;
    vector<string> allHeaders = samReader.ReadHeader(alignmentSet); 

    // Process SAM Header.
    string commandLineString;
    clp.CommandLineToString(argc, argv, commandLineString);
    allHeaders.push_back("@PG\tID:SAMFILTER\tVN:" + versionString + \
                         "\tCL:" + program + " " + commandLineString);
    for (int i = 0; i < allHeaders.size(); i++) {
        outFileStrm << allHeaders[i] << endl;
    }

    //
    // The order of references in vector<FASTASequence> references and
    // AlignmentSet<, , >alignmentSet.references can be different.
    // Rearrange alignmentSet.references such that they are ordered in
    // exactly the same way as vector<FASTASequence> references.
    //
    alignmentSet.RearrangeReferences(references);
开发者ID:Mondale-tw,项目名称:blasr,代码行数:66,代码来源:SamFilter.cpp


注:本文中的FASTAReader::ReadAllSequences方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。