当前位置: 首页>>代码示例>>C++>>正文


C++ PhraseAlignment::match方法代码示例

本文整理汇总了C++中PhraseAlignment::match方法的典型用法代码示例。如果您正苦于以下问题:C++ PhraseAlignment::match方法的具体用法?C++ PhraseAlignment::match怎么用?C++ PhraseAlignment::match使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在PhraseAlignment的用法示例。


在下文中一共展示了PhraseAlignment::match方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: computeCountOfCounts

void computeCountOfCounts( char* fileNameExtract )
{
	cerr << "computing counts of counts";
	for(int i=1;i<=GT_MAX;i++) countOfCounts[i] = 0;

	ifstream extractFile;
	extractFile.open( fileNameExtract );
	if (extractFile.fail()) {
		cerr << "ERROR: could not open extract file " << fileNameExtract << endl;
		exit(1);
	}
	istream &extractFileP = extractFile;

	// loop through all extracted phrase translations
	int i=0;
	char line[LINE_MAX_LENGTH],lastLine[LINE_MAX_LENGTH];
	lastLine[0] = '\0';
	PhraseAlignment *lastPhrasePair = NULL;
	while(true) {
		if (extractFileP.eof()) break;
		if (++i % 100000 == 0) cerr << "." << flush;
		SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
		if (extractFileP.eof())	break;
		
		// identical to last line? just add count
		if (strcmp(line,lastLine) == 0)
		{
			lastPhrasePair->addToCount( line );
			continue;			
		}
		strcpy( lastLine, line );

		// create new phrase pair
		PhraseAlignment *phrasePair = new PhraseAlignment();
		phrasePair->create( line, i );
		
		if (i == 1)
		{
			lastPhrasePair = phrasePair;
			continue;
		}

		// only differs in count? just add count
		if (lastPhrasePair->match( *phrasePair ))
		{
			lastPhrasePair->count += phrasePair->count;
			phrasePair->clear();
			delete(phrasePair);
			continue;
		}

		// periodically house cleaning
		if (phrasePair->GetSource() != lastPhrasePair->GetSource())
		{
			phraseTableT.clear(); // these would get too big
			phraseTableS.clear(); // these would get too big
			// process line again, since phrase tables flushed
			phrasePair->clear();
			phrasePair->create( line, i ); 
		}

		int count = lastPhrasePair->count + 0.99999;
		if(count <= GT_MAX)
			countOfCounts[ count ]++;
		lastPhrasePair->clear();
		delete( lastPhrasePair );
		lastPhrasePair = phrasePair;
	}
	
	delete lastPhrasePair;

	discountFactor[0] = 0.01; // floor
	cerr << "\n";
	for(int i=1;i<GT_MAX;i++)
	{
		discountFactor[i] = ((float)i+1)/(float)i*(((float)countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1));
		cerr << "count " << i << ": " << countOfCounts[ i ] << ", discount factor: " << discountFactor[i];
		// some smoothing...
		if (discountFactor[i]>1) 
			discountFactor[i] = 1;
		if (discountFactor[i]<discountFactor[i-1])
			discountFactor[i] = discountFactor[i-1];
		cerr << " -> " << discountFactor[i]*i << endl;
	}
}
开发者ID:poetzhangzi,项目名称:test,代码行数:85,代码来源:score.cpp

示例2: computeCountOfCounts

void computeCountOfCounts(const string& fileNameExtract) {
	if (fileNameExtract == "-") {
		cerr << "The ‘GoodTuring Discounting’ option may not be used with piped input!" << endl;
		exit(9);
	}

	cerr << "computing counts of counts";
	for (size_t i=1; i<=GT_MAX; countOfCounts[i++] = 0);

	Bz2LineReader extractFile(fileNameExtract);

	// loop through all extracted phrase translations
	int i=0;
	string lastLine;
	PhraseAlignment *lastPhrasePair = NULL;

	for (string line = extractFile.readLine(); !line.empty(); line = extractFile.readLine()) {
		if (line.empty()) break;
		if ((++i)%10000000 == 0) cerr << "[" << i << "]" << endl;
    else if (i % 100000 == 0) cerr << "," << flush;
		
		// identical to last line? just add count
		if (line == lastLine) {
			lastPhrasePair->addToCount(line);
			continue;			
		}
		lastLine = line;

		// create new phrase pair
		PhraseAlignment *phrasePair = new PhraseAlignment();
		vector<string> lineVector = tokenize(line.c_str());
		phrasePair->create(lineVector, i);
		
		if (i == 1) {
			lastPhrasePair = phrasePair;
			continue;
		}

		// only differs in count? just add count
		if (lastPhrasePair->match( *phrasePair )) {
			lastPhrasePair->count += phrasePair->count;
			phrasePair->clear();
			delete(phrasePair);
			continue;
		}

		// periodically house cleaning
		if (phrasePair->source != lastPhrasePair->source) {
			phraseTableT.clear(); // these would get too big
			phraseTableS.clear(); // these would get too big
			// process line again, since phrase tables flushed
			phrasePair->clear();
			phrasePair->create(lineVector, i); 
		}

		int count = lastPhrasePair->count + 0.99999;
		if(count <= GT_MAX)
			++countOfCounts[ count ];
		lastPhrasePair->clear();
		delete( lastPhrasePair );
		lastPhrasePair = phrasePair;
	}

	discountFactor[0] = 0.01; // floor
	cerr << "\n";
	for(int i=1;i<GT_MAX; ++i) {
		discountFactor[i] = ((float)i+1)/(float)i*(((float)countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1));
		cerr << "count " << i << ": " << countOfCounts[ i ] << ", discount factor: " << discountFactor[i];
		// some smoothing...
		if (discountFactor[i]>1) 
			discountFactor[i] = 1;
		if (discountFactor[i]<discountFactor[i-1])
			discountFactor[i] = discountFactor[i-1];
		cerr << " -> " << discountFactor[i]*i << endl;
	}
}
开发者ID:svetakrasikova,项目名称:ADSKMosesTraining,代码行数:76,代码来源:score.cpp


注:本文中的PhraseAlignment::match方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。