C++ GetWordHash函数代码示例

本文整理汇总了C++中GetWordHash函数的典型用法代码示例。如果您正苦于以下问题：C++ GetWordHash函数的具体用法？C++ GetWordHash怎么用？C++ GetWordHash使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了GetWordHash函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: SortVocab

// Sorts the vocabulary by frequency using word counts
void SortVocab() {
  int a, size;
  unsigned int hash;
  // Sort the vocabulary and keep </s> at the first position
  qsort(&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
  for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
  size = vocab_size;
  train_words = 0;
  for (a = 0; a < size; a++) {
    // Words occuring less than min_count times will be discarded from the vocab
    if (vocab[a].cn < min_count) {
      vocab_size--;
      free(vocab[vocab_size].word);
    } else {
      // Hash will be re-computed, as after the sorting it is not actual
      hash=GetWordHash(vocab[a].word);
      while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
      vocab_hash[hash] = a;
      train_words += vocab[a].cn;
    }
  }
  vocab = (struct vocab_word *)realloc(vocab, (vocab_size + 1) * sizeof(struct vocab_word));
  // Allocate memory for the binary tree construction
  for (a = 0; a < vocab_size; a++) {
    vocab[a].code = (char *)calloc(MAX_CODE_LENGTH, sizeof(char));
    vocab[a].point = (int *)calloc(MAX_CODE_LENGTH, sizeof(int));
  }
}

开发者ID:zweiein，项目名称:kaldi，代码行数:29，代码来源:rnnlm.c

示例2: SortVocab

//根据单词词频排序
void SortVocab() {
    int a, size;
    unsigned int hash;
    // 排序
    // 并且保证</s>在第一位
    qsort(&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);//词汇表快排
    for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;//词汇重排了，哈希记录的index也乱了，所有的hash记录清除，下面会重建
    size = vocab_size;
    train_words = 0;// 用于训练的词汇总数（词频累加）
    for (a = 0; a < size; a++) {
        // 删除特别低频的词
        if (vocab[a].cn < min_count) {
            vocab_size--;
            free(vocab[vocab_size].word);
        } else {
            //原来的hash失效需要重新计算
            hash=GetWordHash(vocab[a].word);
            while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
            vocab_hash[hash] = a;
            train_words += vocab[a].cn;
        }
    }
    vocab = (struct vocab_word *)realloc(vocab, (vocab_size + 1) * sizeof(struct vocab_word));
    // 给霍夫曼编码和路径的词汇表索引分配空间
    for (a = 0; a < vocab_size; a++) {
        vocab[a].code = (char *)calloc(MAX_CODE_LENGTH, sizeof(char));
        vocab[a].point = (int *)calloc(MAX_CODE_LENGTH, sizeof(int));
    }
}

开发者ID:LilySpark，项目名称:Algorithm，代码行数:30，代码来源:word2vec_note.c

示例3: ReduceVocab

void ReduceVocab() {
    // reduces the vocabulary by removing infrequent tokens.
    int a, b = 0;
    unsigned int hash;
    // 最后剩下b个词，词频均大于min_reduce
    for (a = 0; a < vocab_size; a++) {
        if (vocab[a].cn > min_reduce) {
            vocab[b].cn = vocab[a].cn;
            vocab[b].word = vocab[a].word;
            b++;
        } else {
            free (vocab[a].word);
        }
    }
    vocab_size = b;
    // 重新分配hash索引
    for (a = 0; a < vocab_hash_size; a++) vocab_hash[a] = -1;
    for (a = 0; a < vocab_size; a++) {
        hash = GetWordHash(vocab[a].word);
        while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
        vocab_hash[hash] = a;
    }
    fflush(stdout);
    min_reduce++;
}

开发者ID:LilySpark，项目名称:Algorithm，代码行数:25，代码来源:word2vec_note.c

示例4: ReduceVocab

// Reduces the vocabulary by removing infrequent tokens
void
ReduceVocab ()
{
  int a, b = 0;
  unsigned int hash;
  for (a = 0; a < vocab_size; a++)
	if (vocab[a].cn > min_reduce)
	  {
		vocab[b].cn = vocab[a].cn;
		vocab[b].word = vocab[a].word;
		b++;
	  }
	else
	  free (vocab[a].word);
  vocab_size = b;
  for (a = 0; a < vocab_hash_size; a++)
	vocab_hash[a] = -1;
  for (a = 0; a < vocab_size; a++)
	{
	  // Hash will be re-computed, as it is not actual
	  hash = GetWordHash (vocab[a].word);
	  while (vocab_hash[hash] != -1)
		hash = (hash + 1) % vocab_hash_size;
	  vocab_hash[hash] = a;
	}
  fflush (stdout);
  min_reduce++;
}

开发者ID:Halo9Pan，项目名称:word2vec，代码行数:29，代码来源:word2phrase.c

示例5: SortVocab

// Sorts the vocabulary by frequency using word counts
void
SortVocab ()
{
  int a;
  unsigned int hash;
  // Sort the vocabulary and keep </s> at the first position
  qsort (&vocab[1], vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
  for (a = 0; a < vocab_hash_size; a++)
	vocab_hash[a] = -1;
  for (a = 0; a < vocab_size; a++)
	{
	  // Words occuring less than min_count times will be discarded from the vocab
	  if (vocab[a].cn < min_count)
		{
		  vocab_size--;
		  free (vocab[vocab_size].word);
		}
	  else
		{
		  // Hash will be re-computed, as after the sorting it is not actual
		  hash = GetWordHash (vocab[a].word);
		  while (vocab_hash[hash] != -1)
			hash = (hash + 1) % vocab_hash_size;
		  vocab_hash[hash] = a;
		}
	}
  vocab = (struct vocab_word *) realloc (vocab, vocab_size * sizeof(struct vocab_word));
}

开发者ID:Halo9Pan，项目名称:word2vec，代码行数:29，代码来源:word2phrase.c

示例6: SearchVocab

// Returns position of a word in the vocabulary; if the word is not found, returns -1
int SearchVocab(char *word) {
  unsigned int hash = GetWordHash(word);
  while (1) {
    if (vocab_hash[hash] == -1) return -1;
    if (!strcmp(word, vocab[vocab_hash[hash]].word)) return vocab_hash[hash];
    hash = (hash + 1) % vocab_hash_size;
  }
  return -1;
}

开发者ID:zweiein，项目名称:kaldi，代码行数:10，代码来源:rnnlm.c

示例7: SearchVocab

// Returns position of a word in the vocabulary; if the word is not found, returns -1
int SearchVocab(struct vocabulary *v, char *word) {
  unsigned int hash = GetWordHash(v, word);
  while (1) {
    if ((v->vocab_hash)[hash] == -1) return -1;
    if (!strcmp(word, v->vocab[v->vocab_hash[hash]].word)) return v->vocab_hash[hash];
    hash = (hash + 1) % vocab_hash_size;
  }
  return -1;
}

开发者ID:stephenroller，项目名称:word2vecfz，代码行数:10，代码来源:vocab.c

示例8: AddWordToVocab

int AddWordToVocab(char * word){
    unsigned int hash, lengh = strlen(word) + 1;
    if (lengh > MAX_STRING) lengh = MAX_STRING;
    vocab[vocab_size].word = (char *) calloc(lengh, sizeof(char));
    strcpy(vocab[vocab_size].word, word);
    vocab[vocab_size].cn = 0;
    vocab_size++;
    if (vocab_size + 2 >= vocab_max_size ) {
        vocab_max_size += 1000;// 每次增加1000个词位
        vocab = (struct vocab_word *)realloc(vocab, vocab_max_size * sizeof(struct vocab_word));
    }
    hash = GetWordHash(word);
    while (vocab_hash[hash] != -1)  hash = (hash + 1) % vocab_hash_size; // 线性探索hash
    vocab_hash[hash] = vocab_size - 1;// 记录在词汇表中的存储位置
    return  vocab_size - 1;// 返回添加的单词在词汇表中的存储位置
}

开发者ID:LilySpark，项目名称:Algorithm，代码行数:16，代码来源:word2vec_note.c

示例9: AddWordToVocab

// Adds a word to the vocabulary
int AddWordToVocab(char *word) {
  unsigned int hash, length = strlen(word) + 1;
  if (length > MAX_STRING) length = MAX_STRING;
  vocab[vocab_size].word = (char *)calloc(length, sizeof(char));
  strcpy(vocab[vocab_size].word, word);
  vocab[vocab_size].cn = 0;
  vocab_size++;
  // Reallocate memory if needed
  if (vocab_size + 2 >= vocab_max_size) {
    vocab_max_size *= 1.5; // was += 1000, modified to have fewer reallocations
    vocab = (struct vocab_word *)realloc(vocab, vocab_max_size * sizeof(struct vocab_word));
  }
  hash = GetWordHash(word);
  while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
  vocab_hash[hash] = vocab_size - 1;
  return vocab_size - 1;
}

开发者ID:zweiein，项目名称:kaldi，代码行数:18，代码来源:rnnlm.c

示例10: AddWordToVocab

// Adds a word to the vocabulary
// 将词添加到词汇表
int AddWordToVocab(char *word) {
  unsigned int hash, length = strlen(word) + 1;
  if (length > MAX_STRING) length = MAX_STRING; //词的长度不能超MAX_STRING
  vocab[vocab_size].word = (char *)calloc(length, sizeof(char));
  strcpy(vocab[vocab_size].word, word);
  vocab[vocab_size].cn = 0;  //初始词频为0
  vocab_size++;
  // Reallocate memory if needed
  if (vocab_size + 2 >= vocab_max_size) {
    vocab_max_size += 1000;
    vocab = (struct vocab_word *)realloc(vocab, vocab_max_size * sizeof(struct vocab_word));
  }
  hash = GetWordHash(word);
  while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size; //如果hash值冲突，采用线性探测的开放定址法，顺序向下查找
  vocab_hash[hash] = vocab_size - 1;
  return vocab_size - 1;
}

开发者ID:adrianhust，项目名称:RepresentationLearning，代码行数:19，代码来源:word2vec.c

示例11: AddWordToVocab

// Adds a word to the vocabulary
int AddWordToVocab(char *word) {
  unsigned int hash, length = strlen(word) + 1; //加1是为了存储末尾的结束符
  if (length > MAX_STRING) length = MAX_STRING;
  vocab[vocab_size].word = (char *)calloc(length, sizeof(char));
  strcpy(vocab[vocab_size].word, word);
  vocab[vocab_size].cn = 0;
  vocab_size++; //词汇表大小
  // Reallocate memory if needed
  if (vocab_size + 2 >= vocab_max_size) {
    vocab_max_size += 1000;
    vocab = (struct vocab_word *)realloc(vocab, vocab_max_size * sizeof(struct vocab_word));//realloc把vocab所在的内存块重新分配一块堆内存，之前的内存被释放，
  }
  hash = GetWordHash(word); //word的hash值
  while (vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size; //不为1表示出现冲突，采用线性探测开放定址法，顺序向下查找未被占用的位置
  vocab_hash[hash] = vocab_size - 1; //hash表中记录word在词汇表中的下标
  return vocab_size - 1;
}

开发者ID:ningyuwhut，项目名称:word2vec，代码行数:18，代码来源:word2vec.c

示例12: AddWordToVocab

// Adds a word to the vocabulary 将一个词添加到一个词汇中
int AddWordToVocab(char *word)
{
  unsigned int hash, length = strlen(word) + 1;
  if (length > MAX_STRING)
	  length = MAX_STRING;
  vocab[vocab_size].word = (char *)calloc(length, sizeof(char));
  strcpy(vocab[vocab_size].word, word);
  vocab[vocab_size].cn = 0;
  vocab_size++;
  // Reallocate memory if needed
  if (vocab_size + 2 >= vocab_max_size)
  {
    vocab_max_size += 1000;
    vocab = (struct vocab_word *)realloc(vocab, vocab_max_size * sizeof(struct vocab_word));
  }
  hash = GetWordHash(word);
  while (vocab_hash[hash] != -1)//如果hash值冲突了
	  hash = (hash + 1) % vocab_hash_size;//使用开放地址法解决冲突
  vocab_hash[hash] = vocab_size - 1;//由词的hash值找到她所在词汇表的排序位置
  return vocab_size - 1;
}

开发者ID:lvchigo，项目名称:ads_text，代码行数:22，代码来源:Demo_word2vec.cpp

示例13: AddWordToVocab

// Adds a word to the vocabulary
int AddWordToVocab(struct vocabulary *v, char *word) {
  //static long collide = 0;
  //static long nocollide = 0;
  unsigned int hash, length = strlen(word) + 1;
  if (length > MAX_STRING) length = MAX_STRING;
  v->vocab[v->vocab_size].word = (char *)calloc(length, sizeof(char));
  strcpy(v->vocab[v->vocab_size].word, word);
  v->vocab[v->vocab_size].cn = 0;
  v->vocab_size++;
  // Reallocate memory if needed
  if (v->vocab_size + 2 >= v->vocab_max_size) {
    v->vocab_max_size += 1000;
    v->vocab = (struct vocab_word *)realloc(v->vocab, v->vocab_max_size * sizeof(struct vocab_word));
  }
  hash = GetWordHash(v, word);
  //if (v->vocab_hash[hash] != -1) { collide += 1; } else { nocollide += 1; }
  //if ((collide + nocollide) % 100000 == 0) printf("%d %d %f collisions\n\n",collide, nocollide, (float)collide/(collide+nocollide));
  while (v->vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
  v->vocab_hash[hash] = v->vocab_size - 1;
  return v->vocab_size - 1;
}

开发者ID:stephenroller，项目名称:word2vecfz，代码行数:22，代码来源:vocab.c

示例14: ReduceVocab

// Reduces the vocabulary by removing infrequent tokens
void ReduceVocab(struct vocabulary *v) {
   static int min_reduce = 1;
   printf("reducevocab\n");
  int a, b = 0;
  unsigned int hash;
  for (a = 0; a < v->vocab_size; a++) if (v->vocab[a].cn > min_reduce) {
    v->vocab[b].cn = v->vocab[a].cn;
    v->vocab[b].word = v->vocab[a].word;
    b++;
  } else free(v->vocab[a].word);
  v->vocab_size = b;
  for (a = 0; a < vocab_hash_size; a++) v->vocab_hash[a] = -1;
  for (a = 0; a < v->vocab_size; a++) {
    // Hash will be re-computed, as it is not actual
    hash = GetWordHash(v, v->vocab[a].word);
    while (v->vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
    v->vocab_hash[hash] = a;
  }
  fflush(stdout);
  min_reduce++;
}

开发者ID:stephenroller，项目名称:word2vecfz，代码行数:22，代码来源:vocab.c

示例15: SortAndReduceVocab

// Sorts the vocabulary by frequency using word counts
void SortAndReduceVocab(struct vocabulary *v, int min_count) {
  int a, size;
  unsigned int hash;
  // Sort the vocabulary and keep </s> at the first position
  qsort(&(v->vocab[1]), v->vocab_size - 1, sizeof(struct vocab_word), VocabCompare);
  for (a = 0; a < vocab_hash_size; a++) v->vocab_hash[a] = -1;
  size = v->vocab_size;
  v->word_count = 0;
  for (a = 0; a < size; a++) {
    // Words occuring less than min_count times will be discarded from the vocab
    if (v->vocab[a].cn < min_count) {
      v->vocab_size--;
      free(v->vocab[v->vocab_size].word);
    } else {
      // Hash will be re-computed, as after the sorting it is not actual
      hash=GetWordHash(v, v->vocab[a].word);
      while (v->vocab_hash[hash] != -1) hash = (hash + 1) % vocab_hash_size;
      v->vocab_hash[hash] = a;
      v->word_count += v->vocab[a].cn;
    }
  }
  v->vocab = (struct vocab_word *)realloc(v->vocab, (v->vocab_size + 1) * sizeof(struct vocab_word));
}

开发者ID:stephenroller，项目名称:word2vecfz，代码行数:24，代码来源:vocab.c

注：本文中的GetWordHash函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。