当前位置: 首页>>代码示例>>C++>>正文


C++ Alphabet::begin方法代码示例

本文整理汇总了C++中Alphabet::begin方法的典型用法代码示例。如果您正苦于以下问题:C++ Alphabet::begin方法的具体用法?C++ Alphabet::begin怎么用?C++ Alphabet::begin使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Alphabet的用法示例。


在下文中一共展示了Alphabet::begin方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: CreateTagDictionary

void EntityDictionary::CreateTagDictionary(SequenceReader *reader) {
  SequenceDictionary::CreateTagDictionary(reader);

  // TODO: the SplitEntityTag function should probably be elsewhere and not on
  // EntityInstance.
  EntityInstance instance;
  Alphabet entities;

  // Display information about the entity tags.
  LOG(INFO) << "Found " << tag_alphabet_.size() << " entity tags:";
  for (Alphabet::iterator it = tag_alphabet_.begin();
  it != tag_alphabet_.end(); ++it) {
    std::string entity_tag = it->first;
    LOG(INFO) << entity_tag;

    std::string prefix, entity;
    instance.SplitEntityTag(it->first, &prefix, &entity);
    if (entity != "") entities.Insert(entity);
  }

  LOG(INFO) << "Entities:";
  for (Alphabet::iterator it = entities.begin(); it != entities.end(); ++it) {
    LOG(INFO) << it->first;
  }

  LOG(INFO) << "Computing allowed bigrams...";
  // Every bigram is allowed by default.
  allowed_bigrams_.assign(1 + tag_alphabet_.size(),
                          std::vector<bool>(1 + tag_alphabet_.size(), true));
  // Now add the BIO-like constraints.
  for (Alphabet::iterator it = entities.begin(); it != entities.end(); ++it) {
    std::string entity = it->first;
    LOG(INFO) << "Processing entity " << entity << "...";
    if (static_cast<EntityPipe*>(pipe_)->GetEntityOptions()->tagging_scheme() ==
        EntityTaggingSchemes::BIO) {
      int tag_begin = tag_alphabet_.Lookup("B-" + entity);
      int tag_inside = tag_alphabet_.Lookup("I-" + entity);
      if (tag_inside < 0) continue;
      // An I-tag can only occur after a B-tag or another I-tag of the same
      // entity.
      for (int left_tag = -1; left_tag < tag_alphabet_.size(); ++left_tag) {
        if (left_tag != tag_begin && left_tag != tag_inside) {
          allowed_bigrams_[1 + tag_inside][1 + left_tag] = false;
        }
      }
    } else if (static_cast<EntityPipe*>(pipe_)->GetEntityOptions()->
               tagging_scheme() == EntityTaggingSchemes::BILOU) {
      int tag_begin = tag_alphabet_.Lookup("B-" + entity);
      int tag_inside = tag_alphabet_.Lookup("I-" + entity);
      int tag_last = tag_alphabet_.Lookup("L-" + entity);
      // I-tags and L-tags can only occur after a B-tag or an I-tag of the same
      // entity.
      for (int left_tag = -1; left_tag < tag_alphabet_.size(); ++left_tag) {
        if (left_tag != tag_begin && left_tag != tag_inside) {
          if (tag_inside >= 0) {
            allowed_bigrams_[1 + tag_inside][1 + left_tag] = false;
          }
          if (tag_last >= 0) {
            allowed_bigrams_[1 + tag_last][1 + left_tag] = false;
          }
        }
      }
      // I-tags and B-tags can only occur before an I-tag or an L-tag of the
      // same entity.
      for (int right_tag = -1; right_tag < tag_alphabet_.size(); ++right_tag) {
        if (right_tag != tag_last && right_tag != tag_inside) {
          if (tag_inside >= 0) {
            allowed_bigrams_[1 + right_tag][1 + tag_inside] = false;
          }
          if (tag_begin >= 0) {
            allowed_bigrams_[1 + right_tag][1 + tag_begin] = false;
          }
        }
      }
    }
  }

  tag_alphabet_.BuildNames(); // Just to be able to plot readable information...
  int num_allowed_bigrams = 0;
  for (int tag = -1; tag < tag_alphabet_.size(); ++tag) {
    for (int left_tag = -1; left_tag < tag_alphabet_.size(); ++left_tag) {
      if (IsAllowedBigram(left_tag, tag)) {
        std::string left_tag_name = (left_tag >= 0) ?
          tag_alphabet_.GetName(left_tag) : "START";
        std::string tag_name = (tag >= 0) ?
          tag_alphabet_.GetName(tag) : "STOP";

        LOG(INFO) << "Allowed bigram: "
          << left_tag_name
          << " -> "
          << tag_name;

        ++num_allowed_bigrams;
      }
    }
  }

  LOG(INFO) << "Total allowed bigrams: " << num_allowed_bigrams;

  ReadGazetteerFiles();
//.........这里部分代码省略.........
开发者ID:Priberam,项目名称:TurboParser,代码行数:101,代码来源:EntityDictionary.cpp


注:本文中的Alphabet::begin方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。