当前位置: 首页>>代码示例>>C++>>正文


C++ FASTAReader::Initialize方法代码示例

本文整理汇总了C++中FASTAReader::Initialize方法的典型用法代码示例。如果您正苦于以下问题:C++ FASTAReader::Initialize方法的具体用法?C++ FASTAReader::Initialize怎么用?C++ FASTAReader::Initialize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在FASTAReader的用法示例。


在下文中一共展示了FASTAReader::Initialize方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main

int main(int argc, char* argv[]) {
	string refFileName, queryFileName;
	int maxHammingDistance;
	if (argc < 4) {
		cout << "usage: hammer ref query maxHam " << endl;
		exit(1);
	}
	refFileName = argv[1];
	queryFileName = argv[2];
	maxHammingDistance = atoi(argv[3]);

	FASTAReader reader;
	reader.Initialize(refFileName);
	FASTASequence ref, refRC;
	reader.GetNext(ref);
	ref.MakeRC(refRC);
	
	FASTAReader queryReader;
	queryReader.Initialize(queryFileName);
	FASTASequence query;
	queryReader.GetNext(query);
	DNALength p;
	for(p=0; p < ref.length-query.length-1; p++ ){
		DNASequence subseq;
		subseq.seq = &ref.seq[p];
		subseq.length = query.length;
		//		cout << "t "; subseq.PrintSeq(cout);
		//		cout << "q "; ((DNASequence*)&query)->PrintSeq(cout);
		if (HammingDistance(&subseq.seq[0], &query.seq[0], query.length) < maxHammingDistance) {
			cout << ">" << p << endl;
			subseq.PrintSeq(cout);
		}
		int i;
		for (i =0; i < query.length; i++) {
			subseq.seq[i] = toupper(subseq.seq[i]);
		}
	}

	for(p=0; p < ref.length-query.length-1; p++ ){
		DNASequence subseq;
		subseq.seq = &refRC.seq[p];
		subseq.length = query.length;
		if (HammingDistance(&subseq.seq[0], &query.seq[0], query.length) < maxHammingDistance) {
			cout << ">" << p << "rc" << endl;
			subseq.PrintSeq(cout);
		}
		int i;
		for (i =0; i < query.length; i++) {
			subseq.seq[i] = toupper(subseq.seq[i]);
		}
	}

}
开发者ID:EichlerLab,项目名称:blasr,代码行数:53,代码来源:Hammer.cpp

示例2: main

int main(int argc, char* argv[]) {
  string inFileName, outFileName;
  int length;
  inFileName = argv[1];
  outFileName = argv[2];
  length = atoi(argv[3]);

  int argi = 4;
  int stride = 0;
  float coverage = 0;
  while (argi < argc) {
    if (strcmp(argv[argi], "-stride")) {
      stride = atoi(argv[++argi]);
    }
    else if (strcmp(argv[argi], "-coverage")) {
      coverage = atof(argv[++argi]);
    }
    ++argi;
  }

  FASTAReader reader;
  reader.Initialize(inFileName);
  FASTASequence genome;
  reader.GetNext(genome);
  if (stride == 0 and coverage == 0) {
    cout << "ERROR, must provide stride or coverage. " << endl;
    exit(1);
  }
  if (stride == 0) {
    stride = genome.length * coverage / length;
  }
开发者ID:asifemon,项目名称:blasr,代码行数:31,代码来源:FileGenome.cpp

示例3: main

int main(int argc, char* argv[]) {
  if (argc < 3) {
    cout << "usage: testRandomSequence genome.fa ntries " << endl;
    exit(0);
  }
  string inFile = argv[1];
  int nSamples = atoi(argv[2]);

  if (nSamples == 0) {
    return 0;
  }

  FASTAReader reader;
  reader.Initialize(inFile);
  vector<FASTASequence> genome;
  reader.ReadAllSequences(genome);
  
  int i;
  cout << "title pos" << endl;
  for (i = 0; i < nSamples; i++) {
    DNALength chrIndex, chrPos;
    FindRandomPos(genome, chrIndex, chrPos);
    cout << genome[chrIndex].title << " " << chrPos << endl;
  }

  return 0;
}
开发者ID:EichlerLab,项目名称:blasr,代码行数:27,代码来源:TestRandomSequence.cpp

示例4: main

int main(int argc, char* argv[]) {

	if (argc < 4) {
		PrintUsage();
		exit(1);
	}
	int argi = 1;
	string saInFile = argv[argi++];
	string genomeFileName = argv[argi++];
	string saOutFile = argv[argi++];
	vector<string> inFiles;
	
	int doBLT = 0;
	int doBLCP = 0;
	int bltPrefixLength = 0;
	int lcpLength = 0;
	int parsingOptions = 0;
	
	while (argi < argc) {
		if (strcmp(argv[argi], "-blt") == 0) {
			doBLT = 1;
			bltPrefixLength = atoi(argv[++argi]);
		}
		else if (strcmp(argv[argi], "-blcp") == 0) {
			doBLCP = 1;
				lcpLength = atoi(argv[++argi]);
		}
		else {
			PrintUsage();
			cout << "Bad option: " << argv[argi] << endl;
			exit(1);
		}
		++argi;
	}

	//
	// Read the suffix array to modify.
	//

	DNASuffixArray  sa;
	sa.Read(saInFile);

	FASTAReader reader;
	reader.Initialize(genomeFileName);
	FASTASequence seq;
	reader.ReadAllSequencesIntoOne(seq);

	
	if (doBLT) {
		sa.BuildLookupTable(seq.seq, seq.length, bltPrefixLength);
	}

	if (doBLCP) {
		cout << "LCP Table not yet implemented." << endl;
	}

	sa.Write(saOutFile);

}
开发者ID:BioinformaticsArchive,项目名称:blasr,代码行数:59,代码来源:SAModify.cpp

示例5: main

int main(int argc, char* argv[])
{
    std::string seqInName, seqOutName, dotOutName;
    if (argc < 4) {
        std::cout << "usage: exciseRepeats inName repMaskOutFile outName" << std::endl;
        std::exit(EXIT_FAILURE);
    }

    seqInName = argv[1];
    dotOutName = argv[2];
    seqOutName = argv[3];
    FASTAReader reader;
    reader.Initialize(seqInName);
    FASTASequence origSeq;
    reader.GetNext(origSeq);

    std::ifstream dotOutFile;
    CrucialOpen(dotOutName, dotOutFile);
    std::ofstream seqOutFile;
    std::ofstream seqOut;
    CrucialOpen(seqOutName, seqOut, std::ios::out);
    std::string dotOutLine;
    getline(dotOutFile, dotOutLine);
    getline(dotOutFile, dotOutLine);
    getline(dotOutFile, dotOutLine);
    while (getline(dotOutFile, dotOutLine)) {
        std::stringstream lineStrm(dotOutLine);
        int swScore;
        float pctDiv, pctDel, pctIns;
        std::string query;
        DNALength qPosBegin, qPosEnd;
        std::string left;
        char strand;
        std::string matchingRepeat;
        std::string repClass;
        std::string repPos, repEnd, repLeft;
        int id;
        lineStrm >> swScore >> pctDiv >> pctDel >> pctIns >> query >> qPosBegin >> qPosEnd >>
            left >> strand >> matchingRepeat >> repClass >> repPos >> repEnd >> repLeft >> id;
        for (DNALength seqPos = qPosBegin; seqPos < qPosEnd; seqPos++) {
            origSeq.seq[seqPos] = 'X';
        }
    }

    DNALength seqPos, unexPos;
    unexPos = 0;
    for (seqPos = 0; seqPos < origSeq.length; seqPos++) {
        if (origSeq.seq[seqPos] != 'X') {
            origSeq.seq[unexPos] = origSeq.seq[seqPos];
            unexPos++;
        }
    }
    origSeq.length = unexPos;

    origSeq.PrintSeq(seqOut);
    return 0;
}
开发者ID:Jiason2017,项目名称:blasr,代码行数:57,代码来源:ExciseRepeats.cpp

示例6: main

int main(int argc, char* argv[1]) {
	if (argc < 3) {
		cout << "Usage: findUnique genome.fasta query.fasta effective_k [options]" << endl;
		cout << "  genome.fasta.sa must exist." << endl;
		cout << "  Finds sequences at least effective_k in length that are unique." << endl;
		cout << "  -max m       Allow up to m matches" << endl;
		cout << "  -minLength l Ensure the length of the match is at least this." << endl;
		cout << "  -prefix p n  Allow up to n matches across a prefix of length p" << endl;
		cout << "  -suffix s n  Allow up to n matches across a suffix of length s" << endl;
		cout << "               Prefix and suffix options override max." << endl;
		cout << "  -out file    Print queries to this output file (query.fasta.queries)" << endl;
		exit(0);
	}

	DNASuffixArray sarray;
	
	string genomeFileName = argv[1];
	string suffixArrayFileName = genomeFileName + ".sa";
	
	FASTAReader reader;
	FASTASequence genome;

	int maxN = 0;

	int prefix = 0;
	int suffix = 0;
	int prefixN = 0;
	int suffixN = 0;
	int argi = 4;
	string outputFileName = "";
	int minLength = 0;
	while (argi < argc) {
		if (strcmp(argv[argi], "-max") == 0) {
			++argi;
			maxN = atoi(argv[argi]);
		}
		else if (strcmp(argv[argi], "-prefix") == 0) {
			++argi;
			prefix = atoi(argv[argi]);
			++argi;
			prefixN = atoi(argv[argi]);
		}
		else if (strcmp(argv[argi], "-suffix") == 0) {
			++argi;
			suffix = atoi(argv[argi]);
			++argi;
			suffixN = atoi(argv[argi]);
		}
		else if (strcmp(argv[argi], "-out") == 0) {
			++argi;
			outputFileName = argv[argi];
		}
		else if (strcmp(argv[argi], "-minLength") == 0) {
			++argi;
			minLength = atoi(argv[argi]);
		}
		++argi;
	}

	reader.Initialize(genomeFileName);
	reader.ReadAllSequencesIntoOne(genome);
	sarray.Read(suffixArrayFileName);

	FASTAReader queryReader;
	FASTASequence querySequence;
	string queryFileName = argv[2];
	int maxLength = atoi(argv[3]);
	string summaryTableFileName = queryFileName + ".summary";
	if (outputFileName == "") {
		outputFileName = queryFileName + ".queries";
	}
		
	
	ofstream summaryTable(summaryTableFileName.c_str());
	ofstream outputFile(outputFileName.c_str());

	queryReader.Initialize(queryFileName);

	while (queryReader.GetNext(querySequence)) {
		int i;
		cerr << "searching " << querySequence.title << endl;
		if (querySequence.length < maxLength) {
			continue;
		}

		int nMatches = 0;
		querySequence.ToUpper();
		int localMax;
		for (i = 0; i < querySequence.length - maxLength + 1; i++) {
			if ((i + 1) % 100000 == 0) {
				cerr << "processed: " << i + 1 << endl;
			}

			int lcpLength;
			vector<SAIndex> lcpLeftBounds, lcpRightBounds;
			vector<SAIndex> rclcpLeftBounds, rclcpRightBounds;
			localMax = maxN;
			if (i < prefix) {
				localMax = prefixN;
			}
//.........这里部分代码省略.........
开发者ID:JinfengChen,项目名称:chm1_scripts,代码行数:101,代码来源:FindUnique.cpp

示例7: main

int main(int argc, char* argv[]) {
  
  CommandLineParser clp;
  string readsFileName;
  string alignmentsFileName;
  string outputFileName;
  float minMergeIdentity = 0.70;
  clp.RegisterStringOption("reads", &readsFileName, "Reads used for alignments.");
  clp.RegisterStringOption("alignments", &alignmentsFileName, "SAM formatted alignments.");
  clp.RegisterIntOption("k", &vertexSize, "Minimum match length", CommandLineParser::PositiveInteger);
  clp.RegisterStringOption("outfile", &outputFileName, "Alignment output.");
  clp.RegisterPreviousFlagsAsHidden();
  clp.RegisterFlagOption("v", &verbose, "");
  clp.RegisterFloatOption("minMergeIdentity", 
                          &minMergeIdentity, 
                          "Minimum identity to merge paths.", CommandLineParser::PositiveFloat);
  
  clp.ParseCommandLine(argc, argv);

  if (minMergeIdentity < 0 or minMergeIdentity > 1) {
    cout << "ERROR. minMergeIdentity must be between 0 and 1" << endl;
    exit(1);
  }
  
  vector<FASTASequence> reads;

  FASTAReader fastaReader;
  fastaReader.Initialize(readsFileName);
  fastaReader.ReadAllSequences(reads);

  //
  // It is necessary to go from read title to index in the list of reads. 
  //
  map<string, int> readNameToIndex;
  BuildReadNameToIndexMap(reads, readNameToIndex);

  ReadWordMatchVector readWordMatches;
  InitializeFromReads(reads, readWordMatches);
  
  //
  // Get ready to read in the alignments.
  //
  SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> samReader;
  samReader.Initialize(alignmentsFileName);
  AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet;
  samReader.ReadHeader(alignmentSet);
  
  SAMAlignment samAlignment;
  AlignmentCandidate<> alignment;
  int numAlignedBases = 0;
  int alignmentIndex = 0;
  while ( samReader.GetNextAlignment( samAlignment ) ) {
    vector<AlignmentCandidate<> > alignments;
    SAMAlignmentsToCandidates(samAlignment,
                              reads,
                              readNameToIndex,
                              alignments, false, true);

    int i;
    ++alignmentIndex;
    int a;
    for (a = 0; a < alignments.size();a++) {
      if (alignments[a].qName != alignments[a].tName) {
        MarkMatches(alignments[a], readNameToIndex, vertexSize, readWordMatches);
      }
    }
    if (alignmentIndex % 1000 == 0) {
      cout << alignmentIndex << endl;
    }
  }


  int numMatches = 0;
  int parentIndex = 1;
  int r;
  for (r = 0; r < readWordMatches.size(); r++) {
    readWordMatches[r].CreateParents();
    numMatches += readWordMatches[r].pos.size();
  }

  vector<int> parentIndices;
  parentIndices.resize(2*numMatches + 1);
  fill(parentIndices.begin(), parentIndices.end(), 0);
  //
  // Start indexing off at 1 so that 0 does not need to be treated in
  // a special case.
  //
  int curParentIndex = 1;
  cout << "There are " << numMatches << " matches." << endl;

  samReader.Close();
  samReader.Initialize(alignmentsFileName);
  AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet2;
  samReader.ReadHeader(alignmentSet2);
  
  numAlignedBases = 0;
  alignmentIndex = 0;
  while ( samReader.GetNextAlignment( samAlignment ) ) {
    vector<AlignmentCandidate<> > alignments;
    SAMAlignmentsToCandidates(samAlignment,
//.........这里部分代码省略.........
开发者ID:BioinformaticsArchive,项目名称:blasr,代码行数:101,代码来源:BuildEBruijnGraph.cpp

示例8: main

int main(int argc, char* argv[]) {


	string refFileName, notNormalFileName, normalFileName;

	if (argc < 4) {
		cout << "usage: normalizeGCContent ref source dest " << endl
				 << "       flips the C/Gs in source randomly until they are the same gc content as ref." << endl;
		exit(1);
	}
		
	refFileName = argv[1];
	notNormalFileName = argv[2];
	normalFileName = argv[3];


	FASTAReader reader;
	FASTAReader queryReader;
	FASTASequence ref;
	vector<FASTASequence> querySequences;
	int queryTotalLength;
	reader.Initialize(refFileName);
	reader.ReadAllSequencesIntoOne(ref);

	queryReader.Initialize(notNormalFileName);
	int refCounts[5], queryCounts[5];
	int s;
	refCounts[0] = refCounts[1] =refCounts[2] = refCounts[3] = refCounts[4] = 0;
	queryCounts[0] = queryCounts[1] =queryCounts[2] = queryCounts[3] = queryCounts[4] = 0;
	
	queryReader.ReadAllSequences(querySequences);
	ofstream normOut;
	CrucialOpen(normalFileName, normOut);

	CountNucs(ref, refCounts);
	
	float refGC = (1.0*refCounts[TwoBit['c']] + refCounts[TwoBit['g']]) / (refCounts[TwoBit['a']] + refCounts[TwoBit['c']] + refCounts[TwoBit['g']] + refCounts[TwoBit['t']]);

	int q;
	for (q = 0; q < querySequences.size(); q++) {
		CountNucs(querySequences[q], queryCounts);
	}

	float queryGC = (1.0*queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']]) / (queryCounts[TwoBit['a']] + queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']] + queryCounts[TwoBit['t']]);

	
	float gcToat = 0.0;
	float atTogc = 0.0;
	if (refGC > queryGC) {
		atTogc = (refGC - queryGC);
	}
	else {
		gcToat = (queryGC - refGC);
	}

	
	DNALength queryGenomeLength = queryCounts[0] +  queryCounts[1] + queryCounts[2] + queryCounts[3] + queryCounts[4];

	DNALength unmaskedQueryLength = queryCounts[0] +  queryCounts[1] + queryCounts[2] + queryCounts[3];

	DNALength ngc2at = unmaskedQueryLength * gcToat;
	DNALength nat2gc = unmaskedQueryLength * atTogc;
	cout << refGC << " " << queryGC << " " << gcToat << " " << atTogc << " " << ngc2at << " " << nat2gc << endl;

	vector<FASTASequence> normalized;

	normalized.resize(querySequences.size());
	vector<DNALength> cumLengths;
	
	cumLengths.resize(normalized.size()+1);
	cumLengths[0] = 0;
	for (q = 0; q < querySequences.size(); q++) {
		normalized[q]   = querySequences[q];
		cumLengths[q+1] = cumLengths[q] + querySequences[q].length;
	}
	
	DNALength i;

																
	for (i = 0; i < ngc2at; i+=2) {
		DNALength pos, chr;
		FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'G', chr, pos);
		normalized[chr].seq[pos] = 'A';
		FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'C', chr, pos);
		normalized[chr].seq[pos] = 'T';		
	}
	
	for (i = 0; i < nat2gc; i+=2) {
		DNALength pos, chr;
		FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'A', chr, pos);
		normalized[chr].seq[pos] = 'g';
		FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'T', chr, pos);
		normalized[chr].seq[pos] = 'c';		
	}

	for (q = 0; q < normalized.size(); q++ ){
		normalized[q].PrintSeq(normOut);
	}

}
开发者ID:EichlerLab,项目名称:blasr,代码行数:100,代码来源:NormalizeGCContent.cpp

示例9: main

int main(int argc, char* argv[]) {
  string program = "samtoh5";
  string versionString = VERSION;
  AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);
  string samFileName, cmpFileName, refFileName;
  bool parseSmrtTitle = false;
  bool useShortRefName = false;
  CommandLineParser clp;
  string readType = "standard";
  int verbosity = 0;

  clp.SetProgramName(program);
  clp.SetProgramSummary("Converts in.sam file to out.cmp.h5 file.");
  clp.SetVersion(versionString);

  clp.RegisterStringOption("in.sam", &samFileName, 
                           "Input SAM file.", true);
  clp.RegisterStringOption("reference.fasta", &refFileName, 
                           "Reference used to generate reads.", true);
  clp.RegisterStringOption("out.cmp.h5", &cmpFileName, 
                           "Output cmp.h5 file.", true);
  clp.RegisterPreviousFlagsAsHidden();
  clp.RegisterFlagOption("smrtTitle", &parseSmrtTitle, 
                         "Use this option when converting alignments "
                         "generated from reads produced by the "
                         "pls2fasta from bas.h5 files by parsing read "
                         "coordinates from the SMRT read title.  The title " 
                         "is in the format /name/hole/coordinates, where "
                         "coordinates are in the format \\d+_\\d+, and "
                         "represent the interval of the read that was "
                         "aligned.");
  clp.RegisterStringOption("readType", &readType, 
                         "Set the read type: 'standard', 'strobe', 'CCS', "
                         "or 'cDNA'");
  clp.RegisterIntOption("verbosity", &verbosity, 
                         "Set desired verbosity.", 
                         CommandLineParser::PositiveInteger);
  clp.RegisterFlagOption("useShortRefName", &useShortRefName, 
                         "Use abbreviated reference names obtained "
                         "from file.sam instead of using full names "
                         "from reference.fasta.");
  string description = ("Because SAM has optional tags that have different "
    "meanings in different programs, careful usage is required in order to "
    "have proper output. The \"xs\" tag in bwa-sw is used to show the "
    "suboptimal score, but in PacBio SAM (blasr) it is defined as the start "
    "in the query sequence of the alignment.\nWhen \"-smrtTitle\" is "
    "specified, the xs tag is ignored, but when it is not specified, the "
    "coordinates given by the xs and xe tags are used to define the interval "
    "of a read that is aligned. The CIGAR string is relative to this interval.");
  clp.SetExamples(description);

  clp.ParseCommandLine(argc, argv);

  if (readType != "standard" and readType != "strobe" and 
      readType != "cDNA" and readType != "CCS") {
    cout << "ERROR. Read type '" << readType 
         << "' must be one of either 'standard', 'strobe', 'cDNA' or 'CCS'." 
         << endl;
    exit(1);
  }
    
  cerr << "[INFO] " << GetTimestamp() << " [" << program << "] started." << endl;

  SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> samReader;
  FASTAReader fastaReader;
  HDFCmpFile<AlignmentCandidate<FASTASequence, FASTASequence> > cmpFile;

  //
  // Initialize input/output files.
  //
  samReader.Initialize(samFileName);
  fastaReader.Initialize(refFileName);
  cmpFile.Create(cmpFileName);

  //
  // Configure the file log.
  //
  string command;
  CommandLineParser::CommandLineToString(argc, argv, command);
  string log = "Convert sam to cmp.h5";
  cmpFile.fileLogGroup.AddEntry(command, log, program, GetTimestamp(), versionString);

  //
  // Set the readType
  //
  cmpFile.SetReadType(readType);

  //
  // Read necessary input.
  //

  vector<FASTASequence> references;
  fastaReader.ReadAllSequences(references);
  
  //
  // This should probably be handled by the alignmentSetAdapter, but
  // time constraints...
  //
  AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet;
  samReader.ReadHeader(alignmentSet);
//.........这里部分代码省略.........
开发者ID:macmanes,项目名称:blasr,代码行数:101,代码来源:SamToCmpH5.cpp

示例10: main

int main(int argc, char* argv[]) {
  string gencodeGffFileName, genomeFileName, genesOutFileName;
  string geneType = "protein_coding";
  bool randomSplicing = false;
  int numRandomSplicing = 1;
  float pSkip = 0.5;
  if (argc < 4) {
    cout << "Usage: extractGenes gencodeGTFFile genomeFile genesOutFileName [-geneType type (protein_coding)] [-randomSplicing] [-numRandomSplicing n] [-pSkip prob (0-1, default:0.5)]" << endl;
    exit(1);
  }

  gencodeGffFileName = argv[1];
  genomeFileName     = argv[2];
  genesOutFileName   = argv[3];

  int argi = 4;
  string coordinatesFileName;

  while (argi < argc) {
    if (strcmp(argv[argi], "-geneType") == 0) {
      geneType = argv[++argi];
    }
    else if (strcmp(argv[argi], "-randomSplicing") == 0) {
      randomSplicing = true;
    }
    else if (strcmp(argv[argi], "-numRandomSplicing") == 0) {
      numRandomSplicing = atoi(argv[++argi]);
    }
    else if (strcmp(argv[argi], "-pSkip") == 0) {
      pSkip = atof(argv[++argi]);
    }
    else {
      cout << "ERROR, bad option  " << argv[argi] << endl;
      exit(1);
    }
    ++argi;
  }

  coordinatesFileName = genesOutFileName;
  coordinatesFileName.append(".pos");
  FASTAReader reader;
  reader.Initialize(genomeFileName);

  ofstream outFile, coordsFile;
  CrucialOpen(genesOutFileName, outFile, std::ios::out);

  string coordsFileName = genesOutFileName + ".coords";
  CrucialOpen(coordsFileName, coordsFile, std::ios::out);

  vector<FASTASequence> referenceSequences;
  reader.ReadAllSequences(referenceSequences);
  int i;
  map<string, int> titleToIndex;
  for (i = 0; i < referenceSequences.size(); i++) {
    titleToIndex[referenceSequences[i].title] = i;
  }

  GencodeGFFFile gencodeFile;
  gencodeFile.ReadAll(gencodeGffFileName);
  
  vector<GencodeGFFGene> genes;
  IndexGencodeGenes(gencodeFile, genes, geneType);

  for (i = 0; i < genes.size(); i++) {
    genes[i].OrderExonsByStart();
  }

  int e;
  for (i = 0; i < genes.size(); i++) {
    FASTASequence geneSequence;
    geneSequence.CopyTitle(genes[i].geneName);
    if (titleToIndex.find(genes[i].chromosome) == titleToIndex.end()) {
      continue;
    }
    int chrIndex = titleToIndex[genes[i].chromosome];
    string sequence = "";
    //
    // Do nothing with 0 length exons.
    //
    if (genes[i].exons.size() == 0) {
      continue;
    }
    vector<FASTASequence> geneSequences;
    vector<GeneCoordinates> geneCoordinates;
    genes[i].GenerateGeneSequences(referenceSequences[chrIndex], geneSequences, geneCoordinates, randomSplicing);
    int gi;
    for (gi = 0; gi < geneSequences.size(); gi++) {
      if (genes[i].GetStrand() == '+') {
        geneSequences[gi].PrintSeq(outFile);
      }
      else {
        FASTASequence rc;
        geneSequences[gi].MakeRC(rc);
        rc.PrintSeq(outFile);
        rc.Free();
      }
      coordsFile << geneSequences[gi].title << " " << geneCoordinates[gi].chromosome << " " << geneCoordinates[gi].exonCoordinates.size() << " " << geneCoordinates[gi].strand;
      int i;
      for (i = 0; i < geneCoordinates[gi].exonCoordinates.size(); i++) {
        coordsFile << " " 
//.........这里部分代码省略.........
开发者ID:BioinformaticsArchive,项目名称:blasr,代码行数:101,代码来源:ExtractGenes.cpp

示例11: main

int main(int argc, char* argv[]) {
    string program = "samtom4";
    string versionString = VERSION;
    AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);

    string samFileName, refFileName, outFileName;
    bool printHeader = false;
    bool parseSmrtTitle = false;
    bool useShortRefName = false;

    CommandLineParser clp;
    clp.SetProgramName(program);
    clp.SetVersion(versionString);
    clp.SetProgramSummary("Converts a SAM file generated by blasr to M4 format.");
    clp.RegisterStringOption("in.sam",        &samFileName,
                             "Input SAM file, which is produced by blasr.");
    clp.RegisterStringOption("reference.fasta", &refFileName,
                             "Reference used to generate file.sam.");
    clp.RegisterStringOption("out.m4",          &outFileName,
                             "Output in blasr M4 format.");
    clp.RegisterPreviousFlagsAsHidden();
    clp.RegisterFlagOption("header",            &printHeader,
                           "Print M4 header.");
    clp.RegisterFlagOption("useShortRefName",   &useShortRefName, 
                           "Use abbreviated reference names obtained "
                           "from file.sam instead of using full names "
                           "from reference.fasta.");
    //clp.SetExamples(program + " file.sam reference.fasta out.m4");

    clp.ParseCommandLine(argc, argv);

    ostream * outFilePtr = &cout;
	ofstream outFileStrm;
	if (outFileName != "") {
		CrucialOpen(outFileName, outFileStrm, std::ios::out);
		outFilePtr = &outFileStrm;
	}

    SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> samReader;
    FASTAReader fastaReader;

    //
    // Initialize samReader and fastaReader.
    //
    samReader.Initialize(samFileName);
    fastaReader.Initialize(refFileName);

    //
    // Configure the file log.
    //
    string command;
    CommandLineParser::CommandLineToString(argc, argv, command);

    //
    // Read necessary input.
    //
    vector<FASTASequence> references;
    fastaReader.ReadAllSequences(references);

    AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> alignmentSet;
    samReader.ReadHeader(alignmentSet); 

    //
    // The order of references in vector<FASTASequence> references and
    // AlignmentSet<, , >alignmentSet.references can be different.
    // Rearrange alignmentSet.references such that it is ordered in
    // exactly the same way as vector<FASTASequence> references.
    //
    alignmentSet.RearrangeReferences(references);

    //
    // Map short names for references obtained from file.sam to 
    // full names obtained from reference.fasta
    //
    map<string, string> shortRefNameToFull;
    map<string, string>::iterator it;
    assert(references.size() == alignmentSet.references.size());
    if (!useShortRefName) {
        for (size_t i = 0; i < references.size(); i++) {
            string shortRefName = alignmentSet.references[i].GetSequenceName();
            string fullRefName(references[i].title); 
            if (shortRefNameToFull.find(shortRefName) != shortRefNameToFull.end()) {
                cout << "ERROR, Found more than one reference " << shortRefName << "in sam header" << endl;
                exit(1);
            } 
            shortRefNameToFull[shortRefName] = fullRefName;
            alignmentSet.references[i].sequenceName = fullRefName;
        }
    }

    // Map reference name obtained from SAM file to indices
    map<string, int> refNameToIndex;
    for (size_t i = 0; i < references.size(); i++) {
        string refName = alignmentSet.references[i].GetSequenceName();
        refNameToIndex[refName] = i;
    }

    //
    // Store the alignments.
    //
//.........这里部分代码省略.........
开发者ID:Coryza,项目名称:blasr,代码行数:101,代码来源:SamToM4.cpp

示例12: main

int main(int argc, char* argv[]) {

	string cmpFileName;
	string refFileName;
	string readsFileName;
  string mapqvTrackName;
	if (argc < 2) {
		cout << "  printMapqvTrack: print a gff file of the average mapping quality value" << endl;
		exit(1);
	}
	vector<int> refPositions;
	cmpFileName = argv[1];
	refFileName = argv[2];
  mapqvFileName = argv[3];

	CmpFile cmpFile;
	FASTASequence ref;
	FASTAReader reader;

	reader.Initialize(refFileName);
	reader.GetNext(ref);

	HDFBasReader basReader;

	SMRTSequence seq, *seqPtr;

	vector<int> refCoverage;
	refCoverage.resize(ref.length);
	std::fill(refCoverage.begin(), refCoverage.end(), 0);
	/*
	 * These guys pull information from the same pls file.
	 */
	HDFCmpReader<CmpAlignment> cmpReader;


	if (cmpReader.Initialize(cmpFileName) == 0) {
		cout << "ERROR, could not open the cmp file." << endl;
		exit(1);
	}
	
	
	cmpReader.Read(cmpFile);
	UInt alignmentIndex;

	//	movieIndexSets.resize(nMovies);
	for (alignmentIndex = 0; alignmentIndex < cmpFile.alnInfo.alignments.size(); alignmentIndex++) {
		int refSeqId    = cmpFile.alnInfo.alignments[alignmentIndex].GetRefSeqId();
		int readGroupId = cmpFile.alnInfo.alignments[alignmentIndex].GetReadGroupId();
		int refSeqIdIndex;
		if (cmpFile.refSeqTable.GetIndexOfId(refSeqId, refSeqIdIndex) == false) {
			//
			// Sanity check -- we're only looking at alignments to references in the cmp file.
			//
			cout << "ERROR, ref seq id: " << refSeqId << " should exist in the cmp file but it does not." << endl;
			assert(0);
		}

		int readGroupIdIndex;
		cmpFile.readGroupTable.GetIndexOfId(readGroupId, readGroupIdIndex);
		
		string readGroupPath    = cmpFile.readGroupTable.names[readGroupIdIndex];
		string readGroup        = cmpReader.readGroupPathToReadGroup[readGroupPath];
		int readGroupArrayIndex = cmpReader.refAlignGroups[refSeqIdIndex]->experimentNameToIndex[readGroup];
		vector<char> alignedSequence, alignedTarget;

		//
		// This read overlaps one of the ref positions.
		
		UInt offsetEnd, offsetBegin;
				
		offsetEnd   = cmpFile.alnInfo.alignments[alignmentIndex].GetOffsetEnd();
		offsetBegin = cmpFile.alnInfo.alignments[alignmentIndex].GetOffsetBegin();
		vector<unsigned char> byteAlignment;
		int alignedSequenceLength = offsetEnd - offsetBegin;
		if (alignedSequenceLength >= 0) {
			alignedSequence.resize(alignedSequenceLength);
			alignedTarget.resize(alignedSequenceLength);
			byteAlignment.resize(alignedSequenceLength);
		}

		cmpReader.refAlignGroups[refSeqIdIndex]->readGroups[readGroupArrayIndex]->alignmentArray.Read(offsetBegin, offsetEnd, &byteAlignment[0]);
		UInt refStart = cmpFile.alnInfo.alignments[alignmentIndex].GetRefStart();
		UInt refEnd   = cmpFile.alnInfo.alignments[alignmentIndex].GetRefEnd();
		UInt readStart= cmpFile.alnInfo.alignments[alignmentIndex].GetQueryStart();
		UInt readEnd  = cmpFile.alnInfo.alignments[alignmentIndex].GetQueryEnd();
		//
		// Read the alignment string.
		//
		if (refSeqIdIndex > 0) continue;


		

		
		//
		// Convert to something we can compare easily.
		//
		alignedSequence[alignedSequence.size()-1]= '\0';
		ByteAlignmentToQueryString(&byteAlignment[0], byteAlignment.size(), &alignedSequence[0]);
		ByteAlignmentToRefString(&byteAlignment[0], byteAlignment.size(), &alignedTarget[0]);
//.........这里部分代码省略.........
开发者ID:csuliweilong,项目名称:blasr,代码行数:101,代码来源:PrintMapqvTrack.cpp

示例13: main


//.........这里部分代码省略.........
    string errMsg;
    if (not filterCriteria.MakeSane(errMsg)) {
        cout << errMsg << endl;
        exit(1);
    }

    // Parse hole number ranges. 
    if (holeNumberStr.size() != 0) {
        if (not holeNumberRanges.setRanges(holeNumberStr)) {
            cout << "Could not parse hole number ranges: "
                 << holeNumberStr << "." << endl;
            exit(1);
        } 
    }

    // Open output file.
    ostream * outFilePtr = &cout;
	ofstream outFileStrm;
	if (outFileName != "") {
		CrucialOpen(outFileName, outFileStrm, std::ios::out);
		outFilePtr = &outFileStrm;
	}
    
    GFFFile adapterGffFile;
    if (adapterGffFileName != "")
        adapterGffFile.ReadAll(adapterGffFileName);
    
    SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> samReader;
    FASTAReader fastaReader;

    //
    // Initialize samReader and fastaReader.
    //
    samReader.Initialize(samFileName);
    fastaReader.Initialize(refFileName);

    //
    // Configure the file log.
    //
    string command;
    CommandLineParser::CommandLineToString(argc, argv, command);
    string log = "Filter sam hits.";
    string program = "samFilter";
    string versionString = VERSION;
    AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);

    //
    // Read necessary input.
    //
    vector<FASTASequence> references;
    fastaReader.ReadAllSequences(references);

    // If the SAM file is generated by blasr with -titleTable,
    // then references in the SAM are represented by 
    // their corresponding indices in the title table.
    // In that case, we need to convert reference titles in fasta file
    // to their corresponding indices in the title table, such that
    // references in both SAM and fasta files are represented
    // by title table indices and therefore can match.
    if (titleTableName != "") {
        ConvertTitlesToTitleTableIndices(references, titleTableName);
    }
 
    AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> alignmentSet;
    vector<string> allHeaders = samReader.ReadHeader(alignmentSet); 
开发者ID:Mondale-tw,项目名称:blasr,代码行数:66,代码来源:SamFilter.cpp


注:本文中的FASTAReader::Initialize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。