本文整理汇总了C++中StringArray::AddTokens方法的典型用法代码示例。如果您正苦于以下问题:C++ StringArray::AddTokens方法的具体用法?C++ StringArray::AddTokens怎么用?C++ StringArray::AddTokens使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类StringArray
的用法示例。
在下文中一共展示了StringArray::AddTokens方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: addLineFromVcf
void GroupFromAnnotation::addLineFromVcf( String & buffer )
{
// sample:
// ANNO=Nonsynonymous:ASB16;
// ANNOFULL=ASB16/NM_080863:+:Nonsynonymous(CCC/Pro/P->ACC/Thr/T:Base1310/1362:Codon437/454:Exon5/5):Exon
// |C17orf65/NM_178542:-:Intron
StringArray vfield;
vfield.AddTokens(buffer, "\t");
if ( vfield.Length() < 8 )
error("Annotation vcf only has %d columns!\n", vfield.Length());
StringArray info_semicolon;
info_semicolon.AddTokens( vfield[7],";" );
// find ANNOFULL first
int annofull_index = -1;
for( int i=0; i<info_semicolon.Length(); i++ ) {
String iheader = info_semicolon[i].SubStr(0,8);
if (iheader == "ANNOFULL") {
annofull_index = i;
break;
}
}
if (annofull_index == -1) {
printf("warning: no ANNOFULL field at chr%s:%s. Variant won't included in groups!\n", info_semicolon[0].c_str(), info_semicolon[1].c_str());
return;
}
// remove ANNOFULL=
String anno_full_str = info_semicolon[annofull_index].SubStr(9);
// check each alternative field
StringArray alts;
alts.AddTokens( anno_full_str, "|" );
for( int a=0; a<alts.Length(); a++ ) {
StringArray sub;
sub.AddTokens( alts[a], ":/=");
if (func_upper.Length() != 0) { // match before add
for(int f =0;f<func_upper.Length();f++) {
bool pattern_match = checkPatternMatch( sub, func_upper[f] );
if ( pattern_match ) {
addGeneFromVcf( vfield, sub[0] );
break;
}
}
}
else { // no pattern to match: check if intergenic first
String upper_name = sub[0].ToUpper();
if ( !upper_name.SlowFind( "INTERGENIC" ) )
addGeneFromVcf( vfield, sub[0] );
}
}
}
示例2: loadMatrix
int loadMatrix(Matrix& a, String& fileName) {
a.Zero();
IFILE ifile(fileName.c_str(), "r");
String line;
StringArray array;
int lineNo = 0;
while (!ifeof(ifile)) {
line.ReadLine(ifile);
lineNo++;
if (line.Length() == 0) continue;
array.Clear();
array.AddTokens(line);
if (a.cols != 0 && a.cols != array.Length() && line.Length() > 0) {
fprintf(stderr, "Wrong column size at line %d!\n", lineNo);
array.Print();
line.Write(stdout);
return -1;
} else {
a.GrowTo(a.rows, array.Length());
}
if (a.rows < lineNo) {
a.GrowTo(a.rows + 1, a.cols);
}
for (int i = 0; i < array.Length(); i++) {
a[lineNo - 1][i] = atol(array[i]);
}
}
// a.Print(stdout);
return 0;
};
示例3: loadVector
int loadVector(Vector& a, String& fileName) {
a.Zero();
IFILE ifile(fileName.c_str(), "r");
String line;
StringArray array;
int lineNo = 0;
while (!ifeof(ifile)) {
line.ReadLine(ifile);
lineNo++;
if (line.Length() == 0) continue;
array.Clear();
array.AddTokens(line);
if (array.Length() > 1 && line.Length() > 0) {
fprintf(stderr, "Warning: column size at line %d!\n", lineNo);
array.Print();
line.Write(stdout);
return -1;
}
if (a.dim < lineNo) {
a.GrowTo(a.dim + 1);
}
a[lineNo - 1] = atol(array[0]);
}
// a.Print(stdout);
return 0;
};
示例4: LoadRegions
void GCContent::LoadRegions(String & regionsFile, GenomeSequence &genome, bool invertRegion)
{
if(regionsFile.Length()==0) return;
if(genome.sequenceLength()==0) error("No reference genome loaded!\n");
IFILE fhRegions;
fhRegions = ifopen(regionsFile.c_str(),"r");
if(fhRegions==NULL)
error("Open regions file %s failed!\n", regionsFile.c_str());
regionIndicator.resize(genome.sequenceLength());
StringArray tokens;
String buffer;
int len;
fprintf(stderr, "Loading region list...");
while (!ifeof(fhRegions)){
buffer.ReadLine(fhRegions);
if (buffer.IsEmpty() || buffer[0] == '#') continue;
tokens.AddTokens(buffer, WHITESPACE);
if(tokens.Length() < 3) continue;
genomeIndex_t startGenomeIndex = 0;
int chromosomeIndex = tokens[1].AsInteger();
// use chromosome name (token[0]) and position (token[1]) to query genome index.
startGenomeIndex = genome.getGenomePosition(tokens[0].c_str(), chromosomeIndex);
if(startGenomeIndex >= regionIndicator.size() ) {
//fprintf(stderr, "WARNING: region list section %s position %u is not found in the reference and skipped...\n", tokens[0].c_str(), chromosomeIndex);
continue;
}
len = tokens[2].AsInteger() - tokens[1].AsInteger() + 1;
for(uint32_t i=startGenomeIndex; i<startGenomeIndex+len; i++)
regionIndicator[i] = true;
tokens.Clear();
buffer.Clear();
}
if (invertRegion) {
fprintf(stderr, " invert region...");
for (uint32_t i = 0; i < regionIndicator.size(); i++) {
regionIndicator[i] = !regionIndicator[i];
}
}
ifclose(fhRegions);
fprintf(stderr, "DONE!\n");
}
示例5: StringToArray
void StringToArray(const String & input, IntArray & values, int desired)
{
StringArray tokens;
tokens.AddTokens(input, ',');
values.Dimension(desired);
values.Zero();
if (tokens.Length())
for (int i = 0; i < desired; i++)
values[i] = tokens[i % tokens.Length()].AsInteger();
}
示例6: GetGroupFromVCF
// partition 8th column only
void GroupFromAnnotation::GetGroupFromVCF()
{
printf("Parsing annotations from annotated VCF file ...\n");
StringArray func;
func.AddTokens(function,"/");
vcfInitialize(); // set size of the tables
FILE * inFile;
inFile = fopen(vcfInput,"r");
StringIntHash groupHash;
int geneCount=0;
// add all genes to group hash first
while (!feof(inFile))
{
String buffer;
buffer.ReadLine(inFile);
if ( buffer[0] == '#' )
continue;
addLineFromVcf( buffer );
}
fclose(inFile);
// sort SNPlist and SNPNoAllele
for( int g=0; g<geneCount; g++ ) {
if ( SNPlist[g].Length()>1 ) {
Vector order;
setOrderFromSortedPositions( g, order);
StringArray cp_SNPlist,cp_SNPNoAllele;
cp_SNPlist.Dimension(SNPlist[g].Length());
cp_SNPNoAllele.Dimension(SNPNoAllele[g].Length());
for(int l=0;l<SNPlist[g].Length();l++) {
cp_SNPlist[l] = SNPlist[g][l];
cp_SNPNoAllele[l] = SNPNoAllele[g][l];
}
for(int i=0;i<order.Length();i++) {
SNPlist[g][i] = cp_SNPlist[order[i]];
SNPNoAllele[g][i] = cp_SNPNoAllele[order[i]] ;
}
}
}
// print test.groupfile
printf("done!\n");
String grp_filename = "test.groupfile";
printGroupFile( grp_filename );
}
示例7: execute
//.........这里部分代码省略.........
int refLen = refInfo.getReferenceLength(i);
if(refLen >= maxRefLen)
{
maxRefLen = refLen + 1;
}
}
dbsnpListPtr = new PosList(refInfo.getNumEntries(),maxRefLen);
if(fdbSnp==NULL)
{
std::cerr << "Open dbSNP file " << dbsnp.c_str() << " failed!\n";
}
else if(dbsnpListPtr == NULL)
{
std::cerr << "Failed to init the memory allocation for the dbsnpList.\n";
}
else
{
// Read the dbsnp file.
StringArray tokens;
String buffer;
int position = 0;
int refID = 0;
// Loop til the end of the file.
while (!ifeof(fdbSnp))
{
// Read the next line.
buffer.ReadLine(fdbSnp);
// If it does not have at least 2 columns,
// continue to the next line.
if (buffer.IsEmpty() || buffer[0] == '#') continue;
tokens.AddTokens(buffer);
if(tokens.Length() < 2) continue;
if(!tokens[1].AsInteger(position))
{
std::cerr << "Improperly formatted region line, start position "
<< "(2nd column) is not an integer: "
<< tokens[1]
<< "; Skipping to the next line.\n";
continue;
}
// Look up the reference name.
refID = samHeader.getReferenceID(tokens[0]);
if(refID != SamReferenceInfo::NO_REF_ID)
{
// Reference id was found, so add it to the dbsnp
dbsnpListPtr->addPosition(refID, position);
}
tokens.Clear();
buffer.Clear();
}
}
ifclose(fdbSnp);
}
// Read the sam records.
SamRecord samRecord;
int numReads = 0;
//////////////////////
示例8: ReadModelsFromFile
bool RegressionAnalysis::ReadModelsFromFile()
{
StringArray models;
models.Read(modelsFile);
if (models.Length() == 0)
return false;
regress = new FancyRegression[models.Length()];
printf("Retrieving analysis models from file [%s]...\n",
(const char *) modelsFile);
modelCount = 0;
StringArray tokens;
for (int i = 0, line = 0; i < models.Length(); i++)
{
models[i].Trim();
// Skip comments
if (models[i][0] == '#') continue;
// Divide each line into tokens
tokens.Clear();
tokens.AddTokens(models[i]);
// Skip blank lines
if (tokens.Length() == 0) continue;
// Print message for tracing...
printf(" Input: %s\n", (const char *) models[i], line++);
// Need a minimum of four tokens per line
if (tokens.Length() < 4)
{
printf(" Skipped: Trait name, mean, variance and heritability required.\n");
continue;
}
regress[modelCount].trait = ped.LookupTrait(tokens[0]);
if (regress[modelCount].trait < 0)
{
printf(line == 1 ? " Skipped: Appears to be a header line\n" :
" Skipped: Trait %s not listed in the data file\n",
(const char *) tokens[0]);
continue;
}
// First check that mean, variance and heritability are valid numbers
bool fail = false;
for (int j = 1; j <= 3; j++)
{
char * ptr = NULL;
strtod(tokens[j], &ptr);
fail |= ptr[0] != 0;
}
// If one of the values is not a valid number, skip
if (fail)
{
printf(line == 1 ? " Skipped: Appears to be a header line\n" :
" Skipped: Invalid numeric format\n");
continue;
}
regress[modelCount].mean = tokens[1];
regress[modelCount].variance = tokens[2];
regress[modelCount].heritability = tokens[3];
if (tokens.Length() > 4)
{
regress[modelCount].label = tokens[4];
for (int j = 5; j < tokens.Length(); j++)
{
regress[modelCount].label += " ";
regress[modelCount].label += tokens[j];
}
}
else
regress[modelCount].label.printf("Model %d", modelCount + 1);
regress[modelCount].shortLabel = regress[modelCount].label;
regress[modelCount].testRetestCorrel = testRetestCorrel;
regress[modelCount].bounded = !unrestricted;
printf(" Model loaded and labelled %s\n", (const char *) regress[modelCount].label);
modelCount++;
}
if (modelCount == 0)
{
printf("No valid models, default model will be used\n\n");
return false;
}
printf("Table processed. %d models recognized\n\n", modelCount);
//.........这里部分代码省略.........
示例9: vcfInitialize
void GroupFromAnnotation::vcfInitialize()
{
// func_upper
if ( function != "" ) {
func_upper.AddTokens( function, "/" );
for( int i=0; i<func_upper.Length(); i++ )
func_upper[i] = func_upper[i].ToUpper();
}
FILE * inFile;
inFile = fopen(vcfInput,"r");
while (!feof(inFile)) {
String buffer;
buffer.ReadLine( inFile);
if ( buffer[0] == '#' )
continue;
StringArray vfield;
vfield.AddTokens(buffer, "\t");
if ( vfield.Length() < 8 )
error("Annotation vcf only has %d columns!\n", vfield.Length());
StringArray info_semicolon;
info_semicolon.AddTokens( vfield[7],";" );
int annofull_index = -1;
for( int i=0; i<info_semicolon.Length(); i++ ) {
String iheader = info_semicolon[i].SubStr(0,8);
if (iheader == "ANNOFULL") {
annofull_index = i;
break;
}
}
if (annofull_index == -1)
continue;
String anno_full_str = info_semicolon[annofull_index].SubStr(9);
StringArray alts;
alts.AddTokens( anno_full_str, "|" );
for( int a=0; a<alts.Length(); a++ ) {
StringArray sub;
sub.AddTokens( alts[a], ":/=");
if (func_upper.Length() != 0) { // match before add
for(int f =0;f<func_upper.Length();f++) {
bool pattern_match = checkPatternMatch( sub, func_upper[f] );
if ( pattern_match ) {
chrom.Push( vfield[0] );
addGeneToGroupHash( sub[0] );
break;
}
}
}
else { // no pattern to match
chrom.Push( vfield[0] );
addGeneToGroupHash( sub[0] );
}
}
}
// vectors
SNPlist = new StringArray [geneCount];
SNPNoAllele = new StringArray [geneCount];
pos = new Vector [geneCount];
}
示例10: GetGeneMap
void GroupFromAnnotation::GetGeneMap(String path)
{
IFILE genemap;
genemap = ifopen(mapFile,"r");
if(genemap==NULL)
{
if(mapFile=="../data/refFlat_hg19.txt")
{
mapFile += ".gz";
genemap = ifopen(mapFile,"r");
if(genemap==NULL)
{
int loc = path.Find("bin");
if(loc!=-1)
{
mapFile = path.Left(loc-1);
mapFile += "/data/refFlat_hg19.txt";
}
else
{
mapFile += "../data/refFlat_hg19.txt";
}
genemap = ifopen(mapFile,"r");
}
if(genemap==NULL)
{
mapFile += ".gz";
genemap = ifopen(mapFile,"r");
}
if(genemap==NULL)
error("Cannot open gene mapping file %s.\n",mapFile.c_str());
}
else
error("Cannot open gene mapping file %s.\n",mapFile.c_str());
}
StringIntHash GeneLocHash;
StringArray strand;
int gene_idx =0;
while(!ifeof(genemap))
{
String buffer;
buffer.ReadLine(genemap);
StringArray record;
record.AddTokens(buffer,"\t");
int loc = GeneLocHash.Integer(record[0]);
if(loc==-1)
{
GeneLocHash.SetInteger(record[0],gene_idx);
//save chr, start and end positions
StringArray gene_chr;
if(record[2][2]=='r' || record[2][2]=='R')
record[2] = record[2].SubStr(3);
gene_chr.AddTokens(record[2],"_,;.");
if(gene_chr[0].Find("Un")!=-1)
continue;
/*
if(ChrLocHash.Integer(gene_chr[0])==-1)
{
chr_count++;
unique_chr.Push(gene_chr[0]);
ChrLocHash.SetInteger(gene_chr[0],chr_count);
}
*/
chr.Push(gene_chr[0]);
//printf("%d\t%s\t%s\n",idx,record[0].c_str(),gene_chr[0].c_str());
start_pos.Push(record[4].AsInteger());
end_pos.Push(record[5].AsInteger());
strand.Push(record[3]);
genename.Push(record[0]);
gene_idx++;
}
else
{
//get the current chr
StringArray gene_chr;
if(record[2][2]=='r' || record[2][2]=='R')
record[2] = record[2].SubStr(3);
gene_chr.AddTokens(record[2],"_,;.");
if(gene_chr[0].Find("Un")!=-1)
continue;
//check if strand and chr are consistent with previous record
if(chr[loc]!=gene_chr[0])
//if(strand[loc]!=record[3] || chr[loc]!=gene_chr[0])
// printf("Gene %s in %s has multiple records in different chromosome or strand.\n",record[0].c_str(),mapFile.c_str());
continue;
//update start and end position
if(record[4].AsInteger()<start_pos[loc])
start_pos[loc] = record[4].AsInteger();
if(record[5].AsInteger()>end_pos[loc])
end_pos[loc] = record[5].AsInteger();
}
}
ifclose(genemap);
//ifclose(genemap);
chr_idx.Index(chr);
String chr_=chr[chr_idx[0]];
for(int i=1;i<chr.Length();i++)
//.........这里部分代码省略.........
示例11: GetGroupFromFile
void GroupFromAnnotation::GetGroupFromFile(FILE * log)
{
//Fill in annoGroups.
StringArray tmp;
FILE * file = fopen(groupFile,"r");
if(file==NULL)
{
printf("ERROR! Cannot open group file %s.\n",groupFile.c_str());
error("ERROR! Cannot open group file %s.\n",groupFile.c_str());
}
String buffer;
int line = 0;
while (!feof(file))
{
buffer.ReadLine(file);
tmp.Clear();
tmp.AddTokens(buffer, SEPARATORS);
if(tmp.Length()==0)
continue;
annoGroups.Push(tmp[0]);
chrom.Push(tmp[1]);
line++;
}
fclose(file);
//Fill in SNPlist.
SNPlist = new StringArray [line];
SNPNoAllele = new StringArray [line];
FILE * samefile = fopen(groupFile,"r");
line = 0;
Vector pos;
while (!feof(samefile))
{
buffer.ReadLine(samefile);
tmp.Clear();
pos.Clear();
tmp.AddTokens(buffer, "\t ");
SNPlist[line].Dimension(0);
SNPNoAllele[line].Dimension(0);
for(int i=1;i<tmp.Length();i++)
{
SNPlist[line].Push(tmp[i]);
StringArray sub;
sub.Clear();
sub.AddTokens(tmp[i],":_/");
if(sub.Length()!=4)
{
printf("Warning: group %s has a variant %s that has invalid format. The correct format should be chr:pos:allele1:allele2.\n",tmp[0].c_str(),tmp[i].c_str());
fprintf(log,"Warning: group %s has a variant %s that has invalid format. The correct format should be chr:pos:allele1:allele2.\n",tmp[0].c_str(),tmp[i].c_str());
continue;
}
pos.Push(sub[1].AsInteger());
SNPNoAllele[line].Push(sub[0] + ":" + sub[1]);
}
//sort SNPlist[line] and SNPNoAllele[line]
if(SNPlist[line].Length()>1)
{
Vector sorted_pos,order;
sorted_pos.Copy(pos);
sorted_pos.Sort();
order.Dimension(pos.Length());
for(int i=0;i<sorted_pos.Length();i++)
{
for(int j=0;j<pos.Length();j++)
{
if(sorted_pos[i]==pos[j])
{
order[i]=j;
break;
}
}
}
StringArray cp_SNPlist,cp_SNPNoAllele;
cp_SNPlist.Dimension(SNPlist[line].Length());
cp_SNPNoAllele.Dimension(SNPNoAllele[line].Length());
for(int l=0;l<SNPlist[line].Length();l++)
{
cp_SNPlist[l] = SNPlist[line][l];
cp_SNPNoAllele[l] = SNPNoAllele[line][l];
}
for(int i=0;i<order.Length();i++)
{
SNPlist[line][i] = cp_SNPlist[order[i]];
//printf("%s\t",SNPlist[line][i].c_str());
SNPNoAllele[line][i] = cp_SNPNoAllele[order[i]] ;
}
//printf("\n");
}
line++;
}
fclose(samefile);
}