本文整理汇总了C++中CDocument::add_token方法的典型用法代码示例。如果您正苦于以下问题:C++ CDocument::add_token方法的具体用法?C++ CDocument::add_token怎么用?C++ CDocument::add_token使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CDocument
的用法示例。
在下文中一共展示了CDocument::add_token方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: parse_document
//.........这里部分代码省略.........
_timestamp = timestp;
}
}
}
else
{
cout << "Couldn't open file " << filename << endl;
}
ifs.close();
// start parsing the file_contents
cout << "file contents size = " << file_contents.size() << endl;
status = parse(file_contents);
ofstream ofs;
ofs.open(sem_wiki_fname.c_str(),ios::trunc);
ofs<<"<<#WikiFileId>>\n";
ofs<<_semwiki_wiki_id<<"\n";
ofs<<"<<#Article Title>>\n";
ofs<<_article_title<<"\n";
ofs<<"<<#Author>>\n";
std::set<std::string>::iterator ait;
for(ait=_authors.begin();ait!=_authors.end();++ait)
{
if (_author_map.find(*ait)==_author_map.end())
_author_map[*ait]=++_author_id;
ofs<<(*ait)<<" $ ";
doc._author_ids.push_back(_author_map[*ait]);
}
ofs<<"\n<<#Timestamp>>\n";
ofs<<_timestamp<<"\n";
ofs<<"<<#Infobox>>\n";
set<string>::iterator i_itr;
for ( i_itr = _infobox_details.begin(); i_itr != _infobox_details.end(); ++i_itr)
{
ofs<<*i_itr;
}
ofs<<"\n<<#Sections>>\n";
list<string>::iterator sec_itr;
list<string>::iterator sec_det_itr;
string temp1="";
for(sec_itr=_section_header.begin(),sec_det_itr=_section_details.begin();sec_itr!=_section_header.end()&&sec_det_itr!=_section_details.end();++sec_itr,++sec_det_itr)
{
temp1=*sec_itr+" $";
ofs<<temp1;
ofs<<(*sec_det_itr);
ofs<<"\n";
}
ofs<<"<<#LINKS>>"<<"\n";
string temp ="";
set<string>::iterator s_itr;
size_t link_counter = 1;
for ( s_itr = _outgoing_link_set.begin(); s_itr != _outgoing_link_set.end(); ++s_itr, link_counter++ )
{
//cout << "linking to ==> (#" << link_counter << ") " << *s_itr << endl;
temp=*s_itr+" $";
ofs<<temp;
_link_set_map.insert(pair<std::string,std::string>(_article_title,temp));
temp="";
}
ofs<<"\n<<#Categories>>\n";
map<string,int>::iterator cat_map;
set<string>::iterator cat;
for(cat = _categories.begin();cat!=_categories.end();++cat)
{
if (_category_map.find(*cat)==_category_map.end())
_category_map[*cat]=++_category_id;
ofs<<*cat<<" $ ";
doc._category_ids.push_back(_category_map[*cat]);
}
ofs.close();
_authors.clear();
_categories.clear();
_timestamp = "";
CUtilities::tokenize(file_contents, token_vec, delim);
//cout << "Total number of raw tokens = " << token_vec.size() << endl;
vector<std::string>::iterator raw_token_it;
CBasicTokenProcessor* cbtp = new CBasicTokenProcessor();
for(raw_token_it = token_vec.begin();raw_token_it!=token_vec.end();++raw_token_it)
{
//cout<<"Token:"<<*raw_token_it<<endl;
if(CDocument::_raw_token_id_map.find((*raw_token_it))==CDocument::_raw_token_id_map.end())
CDocument::_raw_token_id_map[(*raw_token_it)]=CDocument::_raw_token_id++;
doc.add_token(*raw_token_it);
}
doc.process_token_list(*cbtp, doc);
token_vec.clear();
return status;
}