本文整理汇总了C++中CDictionary::GetFrequency方法的典型用法代码示例。如果您正苦于以下问题:C++ CDictionary::GetFrequency方法的具体用法?C++ CDictionary::GetFrequency怎么用?C++ CDictionary::GetFrequency使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CDictionary
的用法示例。
在下文中一共展示了CDictionary::GetFrequency方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: BiGraphGenerate
//CDynamicArray &aWord: the words array
//CDynamicArray &aWordBinaryNet:the net between words
//double dSmoothingPara: the parameter of data smoothing
//CDictionary &DictBinary: the binary dictionary
//CDictionary &DictCore: the Core dictionary
bool CSegment::BiGraphGenerate(CDynamicArray &aWord, CDynamicArray &aBinaryWordNet,double dSmoothingPara,CDictionary &DictBinary,CDictionary &DictCore)
{
PARRAY_CHAIN pTail,pCur,pNextWords;//Temp buffer
unsigned int nWordIndex=0,nTwoWordsFreq=0,nCurWordIndex,nNextWordIndex;
//nWordIndex: the index number of current word
double dCurFreqency,dValue,dTemp;
char sTwoWords[WORD_MAXLENGTH];
m_nWordCount=aWord.GetTail(&pTail);//Get tail element and return the words count
if(m_npWordPosMapTable)
{//free buffer
delete [] m_npWordPosMapTable;
m_npWordPosMapTable=0;
}
if(m_nWordCount>0)//Word count is greater than 0
{
m_npWordPosMapTable=new int[m_nWordCount];//Record the position of possible words
memset(m_npWordPosMapTable,0,m_nWordCount*sizeof(int));
}
pCur=aWord.GetHead();
while(pCur!=NULL)//Set the position map of words
{
m_npWordPosMapTable[nWordIndex++]=pCur->row*MAX_SENTENCE_LEN+pCur->col;
pCur=pCur->next;
}
pCur=aWord.GetHead();
while(pCur!=NULL)//
{
if(pCur->nPOS>=0)//It's not an unknown words
dCurFreqency=pCur->value;
else//Unknown words
dCurFreqency=DictCore.GetFrequency(pCur->sWord,2);
aWord.GetElement(pCur->col,-1,pCur,&pNextWords);//Get next words which begin with pCur->col
while(pNextWords&&pNextWords->row==pCur->col)//Next words
{
//Current words frequency
strcpy(sTwoWords,pCur->sWord);
strcat(sTwoWords,WORD_SEGMENTER);
strcat(sTwoWords,pNextWords->sWord);
nTwoWordsFreq=DictBinary.GetFrequency(sTwoWords,3);
//Two linked Words frequency
dTemp=(double)1/MAX_FREQUENCE;
//Smoothing
dValue=-log(dSmoothingPara*(1+dCurFreqency)/(MAX_FREQUENCE+80000)+(1-dSmoothingPara)*((1-dTemp)*nTwoWordsFreq/(1+dCurFreqency)+dTemp));
//-log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
if(pCur->nPOS<0)//Unknown words: P(Wi|Ci);while known words:1
dValue+=pCur->value;
//Get the position index of current word in the position map table
nCurWordIndex=BinarySearch(pCur->row*MAX_SENTENCE_LEN+pCur->col,m_npWordPosMapTable,m_nWordCount);
nNextWordIndex=BinarySearch(pNextWords->row*MAX_SENTENCE_LEN+pNextWords->col,m_npWordPosMapTable,m_nWordCount);
aBinaryWordNet.SetElement(nCurWordIndex,nNextWordIndex,dValue,pCur->nPOS);
pNextWords=pNextWords->next;//Get next word
}
pCur=pCur->next;
}
return true;
}
示例2: ChineseNameSplit
bool CResult::ChineseNameSplit(char *sPersonName, char *sSurname, char *sSurname2, char *sGivenName, CDictionary &personDict)
{
int nSurNameLen=4,nLen=strlen(sPersonName),nFreq,i=0,nCharType,nFreqGiven;
char sTemp[3];
if(nLen<3||nLen>8)//Not a traditional Chinese person name
return false;
while(i<nLen)//No Including non-CHinese char
{
nCharType=charType((unsigned char*)sPersonName+i);
if(nCharType!=CT_CHINESE&&nCharType!=CT_OTHER)
return false;
i+=2;
}
sSurname2[0]=0;//init
strncpy(sSurname,sPersonName,nSurNameLen);
sSurname[nSurNameLen]=0;
if(!personDict.IsExist(sSurname,1))
{
nSurNameLen=2;
sSurname[nSurNameLen]=0;
if(!personDict.IsExist(sSurname,1))
{
nSurNameLen=0;
sSurname[nSurNameLen]=0;
}
}
strcpy(sGivenName,sPersonName+nSurNameLen);
if(nLen>6)
{
strncpy(sTemp,sPersonName+nSurNameLen,2);
sTemp[2]=0;//Get the second possible surname
if(personDict.IsExist(sTemp,1))
{//Hongkong women's name: Surname+surname+given name
strcpy(sSurname2,sTemp);
strcpy(sGivenName,sPersonName+nSurNameLen+2);
}
}
nFreq=personDict.GetFrequency(sSurname,1);
strncpy(sTemp,sGivenName,2);
sTemp[2]=0;
nFreqGiven=personDict.GetFrequency(sTemp,2);
if(nSurNameLen!=4&&((nSurNameLen==0&&nLen>4)||strlen(sGivenName)>4||(GetForeignCharCount(sPersonName)>=3&&nFreq<personDict.GetFrequency("张",1)/40&&nFreqGiven<personDict.GetFrequency("华",2)/20)||(nFreq<10&&GetForeignCharCount(sGivenName)==(nLen-nSurNameLen)/2)))
return false;
if(nLen==4&&m_uPerson.IsGivenName(sPersonName))
{//Single Surname+given name
return false;
}
return true;
}
示例3: POSTagging
//POS tagging with Hidden Markov Model
bool CSpan::POSTagging(PWORD_RESULT pWordItems,CDictionary &dictCore,CDictionary &dictUnknown)
{
//pWordItems: Items; nItemCount: the count of items;core dictionary and unknown recognition dictionary
int i=0,j,nStartPos;
Reset(false);
while(i>-1&&pWordItems[i].sWord[0]!=0)
{
nStartPos=i;//Start Position
i=GetFrom(pWordItems,nStartPos,dictCore,dictUnknown);
GetBestPOS();
switch(m_tagType)
{
case TT_NORMAL://normal POS tagging
j=1;
while(m_nBestTag[j]!=-1&&j<m_nCurLength)
{//Store the best POS tagging
pWordItems[j+nStartPos-1].nHandle=m_nBestTag[j];
//Let 。be 0
if(pWordItems[j+nStartPos-1].dValue>0&&dictCore.IsExist(pWordItems[j+nStartPos-1].sWord,-1))//Exist and update its frequncy as a POS value
pWordItems[j+nStartPos-1].dValue=LOG_MAX_FRQUENCE-log((double)dictCore.GetFrequency(pWordItems[j+nStartPos-1].sWord,m_nBestTag[j])+1);
j+=1;
}
break;
case TT_PERSON://Person recognition
/*clock_t lStart,lEnd;
lStart=clock();
*/
SplitPersonPOS(dictUnknown);
//lEnd=clock();
//printf("SplitPersonPOS=%f\n",(double)(lEnd-lStart)*1000/CLOCKS_PER_SEC);
//Spit Persons POS
//lStart=clock();
PersonRecognize(dictUnknown);
//lEnd=clock();
//printf("PersonRecognize=%f\n",(double)(lEnd-lStart)/CLOCKS_PER_SEC);
//Person Recognition with the person recognition dictionary
break;
case TT_PLACE://Place name recognition
PlaceRecognize(dictCore,dictUnknown);
break;
case TT_TRANS://Transliteration
TransRecognize(dictCore,dictUnknown);
break;
default:
break;
}
Reset();
}
return true;
}
示例4: ComputePossibility
ELEMENT_TYPE CSpan::ComputePossibility(int nStartPos,int nLength,CDictionary &dict)
{
ELEMENT_TYPE dRetValue=0,dPOSPoss;
//dPOSPoss: the possibility of a POS appears
//dContextPoss: The possibility of context POS appears
int nFreq;
for(int i=nStartPos;i<nStartPos+nLength;i++)
{
nFreq=dict.GetFrequency(m_sWords[i],m_nBestTag[i]);
//nFreq is word being the POS
dPOSPoss=log((double)(m_context.GetFrequency(0,m_nBestTag[i])+1))-log((double)(nFreq+1));
dRetValue+=dPOSPoss;
/* if(i<nStartPos+nLength-1)
{
dContextPoss=log((double)(m_context.GetContextPossibility(0,m_nBestTag[i],m_nBestTag[i+1])+1));
dRetValue+=dPOSPoss-dContextPoss;
}
*/ }
return dRetValue;
}
示例5: PersonRecognize
bool CSpan::PersonRecognize(CDictionary &personDict)
{
char sPOS[MAX_WORDS_PER_SENTENCE]="z",sPersonName[100];
//0 1 2 3 4 5
char sPatterns[][5]={ "BBCD","BBC","BBE","BBZ","BCD","BEE","BE",
"BG", "BXD","BZ", "CDCD","CD","EE",
"FB", "Y","XD",""};
double dFactor[]={0.0011,0.0011,0.0011,0.0011,0.7614,0.0011,0.2055,
0.0160,0.0011,0.0011,0,0.0160,0.0011,
0.0160,0.0011,0.0011,0 };
//About parameter:
/*
Given Name: 486 0.0160
Surname+postfix:484 0.0160
m_lPerson2Num:6265 0.2055
m_lPerson3Num: 23184 0.7614
m_lPerson4Num:32 0.0011
*/
//The person recognition patterns set
//BBCD:姓+姓+名1+名2;
//BBE: 姓+姓+单名;
//BBZ: 姓+姓+双名成词;
//BCD: 姓+名1+名2;
//BE: 姓+单名;
//BEE: 姓+单名+单名;韩磊磊
//BG: 姓+后缀
//BXD: 姓+姓双名首字成词+双名末字
//BZ: 姓+双名成词;
//B: 姓
//CD: 名1+名2;
//EE: 单名+单名;
//FB: 前缀+姓
//XD: 姓双名首字成词+双名末字
//Y: 姓单名成词
int nPatternLen[]={4,3,3,3,3,3,2,2,3,2,4,2,2,2,1,2,0};
int i;
for(i=1;m_nBestTag[i]>-1;i++)//Convert to string from POS
sPOS[i]=m_nBestTag[i]+'A';
sPOS[i]=0;
int j=1,k,nPos;//Find the proper pattern from the first POS
int nLittleFreqCount;//Counter for the person name role with little frequecy
bool bMatched=false;
while(j<i)
{
bMatched=false;
for(k=0;!bMatched&&nPatternLen[k]>0;k++)
{
if(strncmp(sPatterns[k],sPOS+j,nPatternLen[k])==0&&strcmp(m_sWords[j-1],"·")!=0&&strcmp(m_sWords[j+nPatternLen[k]],"·")!=0)
{//Find the proper pattern k
if(strcmp(sPatterns[k],"FB")==0&&(sPOS[j+2]=='E'||sPOS[j+2]=='C'||sPOS[j+2]=='G'))
{//Rule 1 for exclusion:前缀+姓+名1(名2): 规则(前缀+姓)失效;
continue;
}
/* if((strcmp(sPatterns[k],"BEE")==0||strcmp(sPatterns[k],"EE")==0)&&strcmp(m_sWords[j+nPatternLen[k]-1],m_sWords[j+nPatternLen[k]-2])!=0)
{//Rule 2 for exclusion:姓+单名+单名:单名+单名 若EE对应的字不同,规则失效.如:韩磊磊
continue;
}
if(strcmp(sPatterns[k],"B")==0&&m_nBestTag[j+1]!=12)
{//Rule 3 for exclusion: 若姓后不是后缀,规则失效.如:江主席、刘大娘
continue;
}
*/ //Get the possible name
nPos=j;//Record the person position in the tag sequence
sPersonName[0]=0;
nLittleFreqCount=0;//Record the number of role with little frequency
while(nPos<j+nPatternLen[k])
{//Get the possible person name
//
if(m_nBestTag[nPos]<4&&personDict.GetFrequency(m_sWords[nPos],m_nBestTag[nPos])<LITTLE_FREQUENCY)
nLittleFreqCount++;//The counter increase
strcat(sPersonName,m_sWords[nPos]);
nPos+=1;
}
if(IsAllForeign(sPersonName)&&personDict.GetFrequency(m_sWords[j],1)<LITTLE_FREQUENCY)
{//Exclusion foreign name
//Rule 2 for exclusion:若均为外国人名用字 规则(名1+名2)失效
j+=nPatternLen[k]-1;
continue;
}
if(strcmp(sPatterns[k],"CDCD")==0)
{//Rule for exclusion
//规则(名1+名2+名1+名2)本身是排除规则:女高音歌唱家迪里拜尔演唱
//Rule 3 for exclusion:含外国人名用字 规则适用
//否则,排除规则失效:黑妞白妞姐俩拔了头筹。
if(GetForeignCharCount(sPersonName)>0)
j+=nPatternLen[k]-1;
continue;
}
if(strcmp(sPatterns[k],"CD")==0&&IsAllForeign(sPersonName))
{//
j+=nPatternLen[k]-1;
continue;
}
if(nLittleFreqCount==nPatternLen[k]||nLittleFreqCount==3)
//马哈蒂尔;小扎耶德与他的中国阿姨胡彩玲受华黎明大使之邀,
//The all roles appear with two lower frequecy,we will ignore them
continue;
m_nUnknownWords[m_nUnknownIndex][0]=m_nWordPosition[j];
//.........这里部分代码省略.........