当前位置: 首页>>代码示例>>C++>>正文


C++ tokenizer类代码示例

本文整理汇总了C++中tokenizer的典型用法代码示例。如果您正苦于以下问题:C++ tokenizer类的具体用法?C++ tokenizer怎么用?C++ tokenizer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了tokenizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: read_scene

void read_scene(tokenizer& t,document_type& doc)
{
    //std::cerr << "unsupported data: Scene" << std::endl;
    OutputDebugStringA("Scene>>>>");
    t.expect_literal( "{" );
    for( ;; ) {
        substr token = t();
        OutputDebugStringA((token.str() + "\n").c_str());
        if( token == "}" ) { break; }
        if( token == "amb" ) {
            doc.scene.ambient.red = t.expect_float(0,1);
            doc.scene.ambient.green = t.expect_float(0,1);
            doc.scene.ambient.blue = t.expect_float(0,1);
            doc.scene.ambient.alpha = 1;
            t.expect_linefeed();
        } else if( token == "dirlights" ) {
            t();
            skip_chunk(t);
            skip_to_linefeed(t);
        } else {
            skip_to_linefeed(t);
        }
    }
    OutputDebugStringA("Scene<<<<");
}
开发者ID:jonigata,项目名称:partix,代码行数:25,代码来源:mqoreader.cpp

示例2: recognize_vertical

void recognize_vertical(istream& is, ostream& os, const ner& recognizer, tokenizer& tokenizer) {
  string para;
  vector<string_piece> forms;
  vector<named_entity> entities;
  unsigned total_tokens = 0;
  string entity_ids, entity_text;

  while (getpara(is, para)) {
    // Tokenize and tag
    tokenizer.set_text(para);
    while (tokenizer.next_sentence(&forms, nullptr)) {
      recognizer.recognize(forms, entities);
      sort_entities(entities);

      for (auto&& entity : entities) {
        entity_ids.clear();
        entity_text.clear();
        for (auto i = entity.start; i < entity.start + entity.length; i++) {
          if (i > entity.start) {
            entity_ids += ',';
            entity_text += ' ';
          }
          entity_ids += to_string(total_tokens + i + 1);
          entity_text.append(forms[i].str, forms[i].len);
        }
        os << entity_ids << '\t' << entity.type << '\t' << entity_text << '\n';
      }
      os << flush;
      total_tokens += forms.size() + 1;
    }
  }
}
开发者ID:ufal,项目名称:nametag,代码行数:32,代码来源:run_ner.cpp

示例3: extract_identifier

template <typename R> bool extract_identifier(R& result,tokenizer& tokenizer)
{
 result.clear();
 
 //buffer
 
 array<ascii> buffer;
 array<ascii> current;
 
 //identifier

 if(!tokenizer.identifier(current))
  return false;
  
 buffer.append(current);
  
 //word
 
 if(tokenizer.word(current))
  buffer.append(current);
  
 //delimited
 
 if(!tokenizer.is_delimited())
  return false;
  
 //commit

 result=buffer;
 
 update(symbols()._identifier,buffer);
 
 return true;
}
开发者ID:vmorgulys,项目名称:sandbox,代码行数:34,代码来源:class.token.h.extract.cpp

示例4: extract_word

template <typename R> bool extract_word(R& result,tokenizer& tokenizer)
{
 result.clear();
 
 //buffer
 
 array<ascii> buffer;

 //word
 
 if(!tokenizer.word(buffer))
  return false;
  
 //delimited
 
 if(!tokenizer.is_delimited())
  return false;
  
 //commit
 
 result=buffer;
 
 update(symbols()._word,buffer);
 
 return true;
}
开发者ID:vmorgulys,项目名称:sandbox,代码行数:26,代码来源:class.token.h.extract.cpp

示例5: tag_xml

void tag_xml(istream& is, ostream& os, const tagger& tagger, tokenizer& tokenizer, const tagset_converter& tagset_converter, const derivation_formatter& derivation, morpho::guesser_mode guesser) {
  string para;
  vector<string_piece> forms;
  vector<tagged_lemma> tags;

  while (getpara(is, para)) {
    // Tokenize and tag
    tokenizer.set_text(para);
    const char* unprinted = para.c_str();
    while (tokenizer.next_sentence(&forms, nullptr)) {
      tagger.tag(forms, tags, guesser);

      for (unsigned i = 0; i < forms.size(); i++) {
        tagset_converter.convert(tags[i]);
        derivation.format_derivation(tags[i].lemma);

        os << xml_encoded(string_piece(unprinted, forms[i].str - unprinted));
        if (!i) os << "<sentence>";
        os << "<token lemma=\"" << xml_encoded(tags[i].lemma, true) << "\" tag=\"" << xml_encoded(tags[i].tag, true) << "\">"
           << xml_encoded(forms[i]) << "</token>";
        if (i + 1 == forms.size()) os << "</sentence>";
        unprinted = forms[i].str + forms[i].len;
      }
    }
    os << xml_encoded(string_piece(unprinted, para.c_str() + para.size() - unprinted)) << flush;
  }
}
开发者ID:ufal,项目名称:morphodita,代码行数:27,代码来源:run_tagger.cpp

示例6: wowEvent

spellEvent::spellEvent(tokenizer& t) : wowEvent(t)
{
	spellID = asInt(t.token(9));
	string spellName = t.token(10); trimQuotes(spellName);
	spells[spellID] = spellName;
	spellSchool = asuIntFromHexa(t.token(11));
}
开发者ID:alhunor,项目名称:projects,代码行数:7,代码来源:events.cpp

示例7: is_next

bool number::is_next(tokenizer &tokens, int i, void *data)
{
	while (tokens.peek_char(i) == '-' || tokens.peek_char(i) == '+' || tokens.peek_char(i) == '.')
		i++;

	return (tokens.peek_char(i) >= '0' && tokens.peek_char(i) <= '9');
}
开发者ID:yuchien302,项目名称:skeleton,代码行数:7,代码来源:number.cpp

示例8: is_next

bool statement::is_next(tokenizer &tokens, int i, void *data)
{
	return (node_id::is_next(tokens, i, data) ||
			tokens.is_next("subgraph") ||
			tokens.is_next("graph") ||
			tokens.is_next("node") ||
			tokens.is_next("edge"));
}
开发者ID:nbingham1,项目名称:parse_dot,代码行数:8,代码来源:statement.cpp

示例9: tokenize_vertical

void tokenize_vertical(istream& is, ostream& os, tokenizer& tokenizer) {
  string para;
  vector<string_piece> forms;
  while (getpara(is, para)) {
    // Tokenize
    tokenizer.set_text(para);
    while (tokenizer.next_sentence(&forms, nullptr)) {
      for (auto&& form : forms) {
        os << form << '\n';
      }
      os << '\n' << flush;
    }
  }
}
开发者ID:ufal,项目名称:nametag,代码行数:14,代码来源:run_tokenizer.cpp

示例10: stmt_def_field

	inline stmt_def_field(const statement&parent,const token&tk,tokenizer&t):
		statement{parent,tk},
		ident_{t.next_token()}
	{
		if(ident_.is_name(""))
			throw compiler_error(ident_,"expected field name");

		if(!t.is_next_char('{'))
			throw compiler_error(ident_,"expected '{' initial value   then '}' ",ident_.name());

		while(true){
			if(t.is_next_char('}'))break;
			tokens_.push_back(t.next_token());
		}
	}
开发者ID:calint,项目名称:compiler-2,代码行数:15,代码来源:stmt_def_field.hpp

示例11: extract_control

template <typename R> bool extract_control(R& result,tokenizer& tokenizer)
{
 result.clear();
 
 //controls
 
 dictionary<string,id<string>> controls=
 {
  "\r",symbols()._cr,
  "\n",symbols()._lf
 };

 //buffer
 
 array<ascii> buffer;

 //any
 
 if(!tokenizer.any(buffer,controls.keys()))
  return false;
  
 //commit
 
 result=buffer;

 update(controls[buffer.join("")],buffer);
 
 return true;
}
开发者ID:vmorgulys,项目名称:sandbox,代码行数:29,代码来源:class.token.h.extract.cpp

示例12: stmt_def_func_param

	inline stmt_def_func_param(const statement&parent,tokenizer&t):
		statement{parent,t.next_token()}
	{
		assert(!tok().is_name(""));

		if(!t.is_next_char(':'))
			return;

		while(true){
			if(t.is_eos())throw compiler_error(*this,"unexpected end of stream",tok().name_copy());
			keywords_.push_back(t.next_token());
			if(t.is_next_char(':'))
					continue;
			break;
		}
	}
开发者ID:calint,项目名称:compiler-2,代码行数:16,代码来源:stmt_def_func_param.hpp

示例13: parse

void attribute_list::parse(tokenizer &tokens, void *data)
{
	tokens.syntax_start(this);

	tokens.increment(false);
	tokens.expect<assignment_list>();

	while (tokens.decrement(__FILE__, __LINE__, data))
	{
		attributes.push_back(assignment_list(tokens, data));

		tokens.increment(false);
		tokens.expect<assignment_list>();
	}

	tokens.syntax_end(this);
}
开发者ID:nbingham1,项目名称:parse_dot,代码行数:17,代码来源:attribute_list.cpp

示例14: read_scene

void read_scene(tokenizer& t,document_type& doc)
{
        //std::cerr << "unsupported data: Scene" << std::endl;
        t.expect_literal( "{" );
        for( ;; ) {
                substr token = t();
                if( token == "}" ) { break; }
                if( token == "amb" ) {
                        doc.scene.ambient.red = t.expect_float(0,1);
                        doc.scene.ambient.green = t.expect_float(0,1);
                        doc.scene.ambient.blue = t.expect_float(0,1);
                        doc.scene.ambient.alpha = 1;
						t.expect_linefeed();
                } else {
                        skip_to_linefeed(t);
                }
        }
}
开发者ID:jonigata,项目名称:yamadumi,代码行数:18,代码来源:mqoreader.cpp

示例15: tokenize_xml

static void tokenize_xml(istream& is, ostream& os, tokenizer& tokenizer) {
  string para;
  vector<string_piece> forms;
  while (getpara(is, para)) {
    // Tokenize
    tokenizer.set_text(para);
    const char* unprinted = para.c_str();
    while (tokenizer.next_sentence(&forms, nullptr))
      for (unsigned i = 0; i < forms.size(); i++) {
        if (unprinted < forms[i].str) os << xml_encoded(string_piece(unprinted, forms[i].str - unprinted));
        if (!i) os << "<sentence>";
        os << "<token>" << xml_encoded(forms[i]) << "</token>";
        if (i + 1 == forms.size()) os << "</sentence>";
        unprinted = forms[i].str + forms[i].len;
      }

    if (unprinted < para.c_str() + para.size()) os << xml_encoded(string_piece(unprinted, para.c_str() + para.size() - unprinted));
    os << flush;
  }
}
开发者ID:ufal,项目名称:nametag,代码行数:20,代码来源:run_tokenizer.cpp


注:本文中的tokenizer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。