本文整理汇总了C++中moses::OutputFileStream类的典型用法代码示例。如果您正苦于以下问题:C++ OutputFileStream类的具体用法?C++ OutputFileStream怎么用?C++ OutputFileStream使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了OutputFileStream类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: writeCountOfCounts
void writeCountOfCounts( const string &fileNameCountOfCounts )
{
// open file
Moses::OutputFileStream countOfCountsFile;
bool success = countOfCountsFile.Open(fileNameCountOfCounts.c_str());
if (!success) {
cerr << "ERROR: could not open count-of-counts file "
<< fileNameCountOfCounts << endl;
return;
}
// Kneser-Ney needs the total number of phrase pairs
countOfCountsFile << totalDistinct << endl;
// write out counts
for(int i=1; i<=COC_MAX; i++) {
countOfCountsFile << countOfCounts[ i ] << endl;
}
countOfCountsFile.Close();
}
示例2: processFiles
void processFiles( const std::string& fileNameDirect,
const std::string& fileNameIndirect,
const std::string& fileNameConsolidated,
const std::string& fileNameCountOfCounts,
const std::string& fileNameSourceLabelSet,
const std::string& fileNamePartsOfSpeechVocabulary )
{
if (goodTuringFlag || kneserNeyFlag)
loadCountOfCounts( fileNameCountOfCounts );
// open input files
Moses::InputFileStream fileDirect(fileNameDirect);
UTIL_THROW_IF2(fileDirect.fail(), "could not open phrase table file " << fileNameDirect);
Moses::InputFileStream fileIndirect(fileNameIndirect);
UTIL_THROW_IF2(fileIndirect.fail(), "could not open phrase table file " << fileNameIndirect);
// open output file: consolidated phrase table
Moses::OutputFileStream fileConsolidated;
bool success = fileConsolidated.Open(fileNameConsolidated);
UTIL_THROW_IF2(!success, "could not open output file " << fileNameConsolidated);
// create properties consolidator
// (in case any additional phrase property requires further processing)
MosesTraining::PropertiesConsolidator propertiesConsolidator = MosesTraining::PropertiesConsolidator();
if (sourceLabelsFlag) {
propertiesConsolidator.ActivateSourceLabelsProcessing(fileNameSourceLabelSet);
}
if (partsOfSpeechFlag) {
propertiesConsolidator.ActivatePartsOfSpeechProcessing(fileNamePartsOfSpeechVocabulary);
}
// loop through all extracted phrase translations
int i=0;
while(true) {
i++;
if (i%100000 == 0) std::cerr << "." << std::flush;
std::vector< std::string > itemDirect, itemIndirect;
if (! getLine(fileIndirect, itemIndirect) ||
! getLine(fileDirect, itemDirect))
break;
// direct: target source alignment probabilities
// indirect: source target probabilities
// consistency checks
UTIL_THROW_IF2(itemDirect[0].compare( itemIndirect[0] ) != 0,
"target phrase does not match in line " << i << ": '" << itemDirect[0] << "' != '" << itemIndirect[0] << "'");
UTIL_THROW_IF2(itemDirect[1].compare( itemIndirect[1] ) != 0,
"source phrase does not match in line " << i << ": '" << itemDirect[1] << "' != '" << itemIndirect[1] << "'");
// SCORES ...
std::string directScores, directSparseScores, indirectScores, indirectSparseScores;
breakdownCoreAndSparse( itemDirect[3], directScores, directSparseScores );
breakdownCoreAndSparse( itemIndirect[3], indirectScores, indirectSparseScores );
std::vector<std::string> directCounts;
Moses::Tokenize( directCounts, itemDirect[4] );
std::vector<std::string> indirectCounts;
Moses::Tokenize( indirectCounts, itemIndirect[4] );
float countF = Moses::Scan<float>(directCounts[0]);
float countE = Moses::Scan<float>(indirectCounts[0]);
float countEF = Moses::Scan<float>(indirectCounts[1]);
float n1_F, n1_E;
if (kneserNeyFlag) {
n1_F = Moses::Scan<float>(directCounts[2]);
n1_E = Moses::Scan<float>(indirectCounts[2]);
}
// Good Turing discounting
float adjustedCountEF = countEF;
if (goodTuringFlag && countEF+0.99999 < goodTuringDiscount.size()-1)
adjustedCountEF *= goodTuringDiscount[(int)(countEF+0.99998)];
float adjustedCountEF_indirect = adjustedCountEF;
// Kneser Ney discounting [Foster et al, 2006]
if (kneserNeyFlag) {
float D = kneserNey_D3;
if (countEF < 2) D = kneserNey_D1;
else if (countEF < 3) D = kneserNey_D2;
if (D > countEF) D = countEF - 0.01; // sanity constraint
float p_b_E = n1_E / totalCount; // target phrase prob based on distinct
float alpha_F = D * n1_F / countF; // available mass
adjustedCountEF = countEF - D + countF * alpha_F * p_b_E;
// for indirect
float p_b_F = n1_F / totalCount; // target phrase prob based on distinct
float alpha_E = D * n1_E / countE; // available mass
adjustedCountEF_indirect = countEF - D + countE * alpha_E * p_b_F;
}
// drop due to MinScore thresholding
if ((minScore0 > 0 && adjustedCountEF_indirect/countE < minScore0) ||
(minScore2 > 0 && adjustedCountEF /countF < minScore2)) {
continue;
}
// output phrase pair
fileConsolidated << itemDirect[0] << " ||| ";
//.........这里部分代码省略.........
示例3: processFiles
void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameConsolidated, char* fileNameCountOfCounts )
{
if (goodTuringFlag || kneserNeyFlag)
loadCountOfCounts( fileNameCountOfCounts );
// open input files
Moses::InputFileStream fileDirect(fileNameDirect);
Moses::InputFileStream fileIndirect(fileNameIndirect);
if (fileDirect.fail()) {
cerr << "ERROR: could not open phrase table file " << fileNameDirect << endl;
exit(1);
}
istream &fileDirectP = fileDirect;
if (fileIndirect.fail()) {
cerr << "ERROR: could not open phrase table file " << fileNameIndirect << endl;
exit(1);
}
istream &fileIndirectP = fileIndirect;
// open output file: consolidated phrase table
Moses::OutputFileStream fileConsolidated;
bool success = fileConsolidated.Open(fileNameConsolidated);
if (!success) {
cerr << "ERROR: could not open output file " << fileNameConsolidated << endl;
exit(1);
}
// loop through all extracted phrase translations
int i=0;
while(true) {
i++;
if (i%100000 == 0) cerr << "." << flush;
vector< string > itemDirect, itemIndirect;
if (! getLine(fileIndirectP,itemIndirect) ||
! getLine(fileDirectP, itemDirect ))
break;
// direct: target source alignment probabilities
// indirect: source target probabilities
// consistency checks
if (itemDirect[0].compare( itemIndirect[0] ) != 0) {
cerr << "ERROR: target phrase does not match in line " << i << ": '"
<< itemDirect[0] << "' != '" << itemIndirect[0] << "'" << endl;
exit(1);
}
if (itemDirect[1].compare( itemIndirect[1] ) != 0) {
cerr << "ERROR: source phrase does not match in line " << i << ": '"
<< itemDirect[1] << "' != '" << itemIndirect[1] << "'" << endl;
exit(1);
}
// output hierarchical phrase pair (with separated labels)
fileConsolidated << itemDirect[0] << " ||| " << itemDirect[1] << " |||";
// SCORES ...
string directScores, directSparseScores, indirectScores, indirectSparseScores;
breakdownCoreAndSparse( itemDirect[3], directScores, directSparseScores );
breakdownCoreAndSparse( itemIndirect[3], indirectScores, indirectSparseScores );
vector<string> directCounts = tokenize(itemDirect[4].c_str());
vector<string> indirectCounts = tokenize(itemIndirect[4].c_str());
float countF = atof(directCounts[0].c_str());
float countE = atof(indirectCounts[0].c_str());
float countEF = atof(indirectCounts[1].c_str());
float n1_F, n1_E;
if (kneserNeyFlag) {
n1_F = atof(directCounts[2].c_str());
n1_E = atof(indirectCounts[2].c_str());
}
// Good Turing discounting
float adjustedCountEF = countEF;
if (goodTuringFlag && countEF+0.99999 < goodTuringDiscount.size()-1)
adjustedCountEF *= goodTuringDiscount[(int)(countEF+0.99998)];
float adjustedCountEF_indirect = adjustedCountEF;
// Kneser Ney discounting [Foster et al, 2006]
if (kneserNeyFlag) {
float D = kneserNey_D3;
if (countEF < 2) D = kneserNey_D1;
else if (countEF < 3) D = kneserNey_D2;
if (D > countEF) D = countEF - 0.01; // sanity constraint
float p_b_E = n1_E / totalCount; // target phrase prob based on distinct
float alpha_F = D * n1_F / countF; // available mass
adjustedCountEF = countEF - D + countF * alpha_F * p_b_E;
// for indirect
float p_b_F = n1_F / totalCount; // target phrase prob based on distinct
float alpha_E = D * n1_E / countE; // available mass
adjustedCountEF_indirect = countEF - D + countE * alpha_E * p_b_F;
}
// prob indirect
if (!onlyDirectFlag) {
//.........这里部分代码省略.........
示例4: main
//.........这里部分代码省略.........
#ifdef WITH_THREADS
thread_count = atoi(argv[++i]);
#else
cerr << "thread support not compiled in." << '\n';
exit(1);
#endif
} else if (strcmp(argv[i], "--SentenceOffset") == 0) {
if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
exit(1);
}
sentenceOffset = atoi(argv[++i]);
} else if (strcmp(argv[i],"--BoundaryRules") == 0) {
options.boundaryRules = true;
} else {
cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
exit(1);
}
}
cerr << "extracting hierarchical rules" << endl;
// open input files
Moses::InputFileStream tFile(fileNameT);
Moses::InputFileStream sFile(fileNameS);
Moses::InputFileStream aFile(fileNameA);
istream *tFileP = &tFile;
istream *sFileP = &sFile;
istream *aFileP = &aFile;
// open output files
string fileNameExtractInv = fileNameExtract + ".inv" + (options.gzOutput?".gz":"");
Moses::OutputFileStream extractFile;
Moses::OutputFileStream extractFileInv;
Moses::OutputFileStream extractFileContext;
Moses::OutputFileStream extractFileContextInv;
extractFile.Open((fileNameExtract + (options.gzOutput?".gz":"")).c_str());
if (!options.onlyDirectFlag)
extractFileInv.Open(fileNameExtractInv.c_str());
if (options.flexScoreFlag) {
string fileNameExtractContext = fileNameExtract + ".context" + (options.gzOutput?".gz":"");
extractFileContext.Open(fileNameExtractContext.c_str());
if (!options.onlyDirectFlag) {
string fileNameExtractContextInv = fileNameExtract + ".context.inv" + (options.gzOutput?".gz":"");
extractFileContextInv.Open(fileNameExtractContextInv.c_str());
}
}
// stats on labels for glue grammar and unknown word label probabilities
set< string > targetLabelCollection, sourceLabelCollection;
map< string, int > targetTopLabelCollection, sourceTopLabelCollection;
// loop through all sentence pairs
size_t i=sentenceOffset;
string targetString, sourceString, alignmentString;
while(getline(*tFileP, targetString)) {
i++;
getline(*sFileP, sourceString);
getline(*aFileP, alignmentString);
if (i%1000 == 0) cerr << i << " " << flush;
示例5: main
//.........这里部分代码省略.........
//else if (strcmp(argv[i],"--ZipFiles") == 0) {
// zipFiles = true;
//}
// if an source phrase is paired with two target phrases, then count(t|s) = 0.5
else if (strcmp(argv[i],"--NoFractionalCounting") == 0) {
options.fractionalCounting = false;
} else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
options.outputNTLengths = true;
#ifdef WITH_THREADS
} else if (strcmp(argv[i],"-threads") == 0 ||
strcmp(argv[i],"--threads") == 0 ||
strcmp(argv[i],"--Threads") == 0) {
thread_count = atoi(argv[++i]);
#endif
} else {
cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
exit(1);
}
}
cerr << "extracting hierarchical rules" << endl;
// open input files
Moses::InputFileStream tFile(fileNameT);
Moses::InputFileStream sFile(fileNameS);
Moses::InputFileStream aFile(fileNameA);
istream *tFileP = &tFile;
istream *sFileP = &sFile;
istream *aFileP = &aFile;
// open output files
string fileNameExtractInv = fileNameExtract + ".inv" + (options.gzOutput?".gz":"");
Moses::OutputFileStream extractFile;
Moses::OutputFileStream extractFileInv;
extractFile.Open((fileNameExtract + (options.gzOutput?".gz":"")).c_str());
if (!options.onlyDirectFlag)
extractFileInv.Open(fileNameExtractInv.c_str());
// output into file
Moses::OutputCollector* extractCollector = new Moses::OutputCollector(&extractFile);
Moses::OutputCollector* extractCollectorInv = new Moses::OutputCollector(&extractFileInv);
// stats on labels for glue grammar and unknown word label probabilities
set< string > targetLabelCollection, sourceLabelCollection;
map< string, int > targetTopLabelCollection, sourceTopLabelCollection;
#ifdef WITH_THREADS
// set up thread pool
Moses::ThreadPool pool(thread_count);
pool.SetQueueLimit(1000);
#endif
// loop through all sentence pairs
size_t i=0;
while(true) {
i++;
if (i%1000 == 0) cerr << "." << flush;
if (i%10000 == 0) cerr << ":" << flush;
if (i%100000 == 0) cerr << "!" << flush;
char targetString[LINE_MAX_LENGTH];
char sourceString[LINE_MAX_LENGTH];
char alignmentString[LINE_MAX_LENGTH];
SAFE_GETLINE((*tFileP), targetString, LINE_MAX_LENGTH, '\n', __FILE__);
if (tFileP->eof()) break;
SAFE_GETLINE((*sFileP), sourceString, LINE_MAX_LENGTH, '\n', __FILE__);
示例6: main
//.........这里部分代码省略.........
}
else if (strcmp(argv[i], "--GZOutput") == 0) {
global->gzOutput = true;
}
else if (strcmp(argv[i],"--MaxSpan") == 0) {
// ignore
++i;
}
else if (strcmp(argv[i],"--SentenceOffset") == 0) {
if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
exit(1);
}
sentenceOffset = atoi(argv[++i]);
}
else {
cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
exit(1);
}
}
// open input files
Moses::InputFileStream tFile(fileNameT);
Moses::InputFileStream sFile(fileNameS);
Moses::InputFileStream aFile(fileNameA);
// open output files
string fileNameExtractInv = fileNameExtract + ".inv";
if (global->gzOutput) {
fileNameExtract += ".gz";
fileNameExtractInv += ".gz";
}
Moses::OutputFileStream extractFile;
Moses::OutputFileStream extractFileInv;
extractFile.Open(fileNameExtract.c_str());
extractFileInv.Open(fileNameExtractInv.c_str());
// loop through all sentence pairs
int i = sentenceOffset;
while(true) {
i++;
if (i % 1000 == 0) {
cerr << i << " " << flush;
}
string targetString;
string sourceString;
string alignmentString;
bool ok = getline(tFile, targetString);
if (!ok)
break;
getline(sFile, sourceString);
getline(aFile, alignmentString);
//cerr << endl << targetString << endl << sourceString << endl << alignmentString << endl;
//time_t currTime = time(NULL);
//cerr << "A " << (currTime - starttime) << endl;
SentenceAlignment sentencePair;
if (sentencePair.Create( targetString, sourceString, alignmentString, i, *global ))
{
//cerr << sentence.sourceTree << endl;
//cerr << sentence.targetTree << endl;
sentencePair.FindTunnels(*g_global);
//cerr << "C " << (time(NULL) - starttime) << endl;
//cerr << sentencePair << endl;
sentencePair.CreateLattice(*g_global);
//cerr << "D " << (time(NULL) - starttime) << endl;
//cerr << sentencePair << endl;
sentencePair.CreateRules(*g_global);
//cerr << "E " << (time(NULL) - starttime) << endl;
//cerr << sentence.lattice->GetRules().GetSize() << endl;
sentencePair.GetLattice().GetRules().Output(extractFile);
sentencePair.GetLattice().GetRules().OutputInv(extractFileInv);
}
}
tFile.Close();
sFile.Close();
aFile.Close();
extractFile.Close();
extractFileInv.Close();
if (global->glueGrammarFlag) {
writeGlueGrammar(fileNameGlueGrammar, *global, targetLabelCollection, targetTopLabelCollection);
}
delete global;
}
示例7: main
//.........这里部分代码省略.........
// load domain information
if (domainFlag) {
if (inverseFlag) {
domainFlag = false;
includeSentenceIdFlag = false;
}
else {
domain = new Domain;
domain->load( fileNameDomain );
}
}
// compute count of counts for Good Turing discounting
if (goodTuringFlag || kneserNeyFlag) {
for(int i=1; i<=COC_MAX; i++) countOfCounts[i] = 0;
}
// sorted phrase extraction file
Moses::InputFileStream extractFile(fileNameExtract);
if (extractFile.fail()) {
cerr << "ERROR: could not open extract file " << fileNameExtract << endl;
exit(1);
}
istream &extractFileP = extractFile;
// output file: phrase translation table
ostream *phraseTableFile;
if (fileNamePhraseTable == "-") {
phraseTableFile = &cout;
}
else {
Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
bool success = outputFile->Open(fileNamePhraseTable);
if (!success) {
cerr << "ERROR: could not open file phrase table file "
<< fileNamePhraseTable << endl;
exit(1);
}
phraseTableFile = outputFile;
}
// loop through all extracted phrase translations
float lastCount = 0.0f;
float lastPcfgSum = 0.0f;
vector< PhraseAlignment > phrasePairsWithSameF;
bool isSingleton = true;
int i=0;
char line[LINE_MAX_LENGTH],lastLine[LINE_MAX_LENGTH];
lastLine[0] = '\0';
PhraseAlignment *lastPhrasePair = NULL;
while(true) {
if (extractFileP.eof()) break;
if (++i % 100000 == 0) cerr << "." << flush;
SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
if (extractFileP.eof()) break;
// identical to last line? just add count
if (strcmp(line,lastLine) == 0) {
lastPhrasePair->count += lastCount;
lastPhrasePair->pcfgSum += lastPcfgSum;
continue;
}
strcpy( lastLine, line );
示例8: main
int main(int argc, char* argv[])
{
cerr << "PhraseExtract v1.4, written by Philipp Koehn\n"
<< "phrase extraction from an aligned parallel corpus\n";
if (argc < 6) {
cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ]\n";
exit(1);
}
Moses::OutputFileStream extractFileOrientation;
const char* const &fileNameE = argv[1];
const char* const &fileNameF = argv[2];
const char* const &fileNameA = argv[3];
const string fileNameExtract = string(argv[4]);
PhraseExtractionOptions options(atoi(argv[5]));
for(int i=6; i<argc; i++) {
if (strcmp(argv[i],"--OnlyOutputSpanInfo") == 0) {
options.initOnlyOutputSpanInfo(true);
} else if (strcmp(argv[i],"orientation") == 0 || strcmp(argv[i],"--Orientation") == 0) {
options.initOrientationFlag(true);
} else if (strcmp(argv[i],"--FlexibilityScore") == 0) {
options.initFlexScoreFlag(true);
} else if (strcmp(argv[i],"--NoTTable") == 0) {
options.initTranslationFlag(false);
} else if (strcmp(argv[i], "--IncludeSentenceId") == 0) {
options.initIncludeSentenceIdFlag(true);
} else if (strcmp(argv[i], "--SentenceOffset") == 0) {
if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
exit(1);
}
sentenceOffset = atoi(argv[++i]);
} else if (strcmp(argv[i], "--GZOutput") == 0) {
options.initGzOutput(true);
} else if (strcmp(argv[i], "--InstanceWeights") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, used switch --InstanceWeights without file name" << endl;
exit(1);
}
options.initInstanceWeightsFile(argv[++i]);
} else if (strcmp(argv[i], "--Debug") == 0) {
options.debug = true;
} else if (strcmp(argv[i], "--MinPhraseLength") == 0) {
options.minPhraseLength = atoi(argv[++i]);
} else if (strcmp(argv[i], "--Separator") == 0) {
options.separator = argv[++i];
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
exit(1);
}
char* modelParams = argv[++i];
char* modelName = strtok(modelParams, "-");
char* modelType = strtok(NULL, "-");
// REO_MODEL_TYPE intModelType;
if(strcmp(modelName, "wbe") == 0) {
options.initWordModel(true);
if(strcmp(modelType, "msd") == 0)
options.initWordType(REO_MSD);
else if(strcmp(modelType, "mslr") == 0)
options.initWordType(REO_MSLR);
else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
options.initWordType(REO_MONO);
else {
cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
exit(1);
}
} else if(strcmp(modelName, "phrase") == 0) {
options.initPhraseModel(true);
if(strcmp(modelType, "msd") == 0)
options.initPhraseType(REO_MSD);
else if(strcmp(modelType, "mslr") == 0)
options.initPhraseType(REO_MSLR);
else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
options.initPhraseType(REO_MONO);
else {
cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
exit(1);
}
} else if(strcmp(modelName, "hier") == 0) {
options.initHierModel(true);
if(strcmp(modelType, "msd") == 0)
options.initHierType(REO_MSD);
else if(strcmp(modelType, "mslr") == 0)
options.initHierType(REO_MSLR);
else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
options.initHierType(REO_MONO);
else {
cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
exit(1);
}
} else {
cerr << "extract: syntax error, unknown reordering model: " << modelName << endl;
exit(1);
}
//.........这里部分代码省略.........
示例9: main
int main(int argc, char* argv[])
{
cerr << "Starting..." << endl;
char* &fileNameDirect = argv[1];
Moses::InputFileStream fileDirect(fileNameDirect);
//fileDirect.open(fileNameDirect);
if (fileDirect.fail()) {
cerr << "ERROR: could not open extract file " << fileNameDirect << endl;
exit(1);
}
istream &fileDirectP = fileDirect;
char* &fileNameConsolidated = argv[2];
ostream *fileConsolidated;
if (strcmp(fileNameConsolidated, "-") == 0) {
fileConsolidated = &cout;
} else {
Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
bool success = outputFile->Open(fileNameConsolidated);
if (!success) {
cerr << "ERROR: could not open file phrase table file "
<< fileNameConsolidated << endl;
exit(1);
}
fileConsolidated = outputFile;
}
int i=0;
while(true) {
i++;
if (i%1000 == 0) cerr << "." << flush;
if (i%10000 == 0) cerr << ":" << flush;
if (i%100000 == 0) cerr << "!" << flush;
vector< string > itemDirect;
if (! getLine(fileDirectP, itemDirect ))
break;
(*fileConsolidated) << itemDirect[0] << " ||| " << itemDirect[1] << " ||| ";
// output alignment and probabilities
(*fileConsolidated) << itemDirect[2] // prob direct
<< " 2.718" // phrase count feature
<< " ||| " << itemDirect[3]; // alignment
// counts
(*fileConsolidated) << "||| 0 " << itemDirect[4]; // indirect
(*fileConsolidated) << endl;
}
fileConsolidated->flush();
if (fileConsolidated != &cout) {
delete fileConsolidated;
}
cerr << "Finished" << endl;
}
示例10: main
int main(int argc, char* argv[])
{
cerr << "PhraseExtract v1.4, written by Philipp Koehn\n"
<< "phrase extraction from an aligned parallel corpus\n";
if (argc < 6) {
cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] | --OnlyOutputSpanInfo | --NoTTable | --SentenceId]\n";
exit(1);
}
char* &fileNameE = argv[1];
char* &fileNameF = argv[2];
char* &fileNameA = argv[3];
string fileNameExtract = string(argv[4]);
maxPhraseLength = atoi(argv[5]);
for(int i=6; i<argc; i++) {
if (strcmp(argv[i],"--OnlyOutputSpanInfo") == 0) {
onlyOutputSpanInfo = true;
} else if (strcmp(argv[i],"orientation") == 0 || strcmp(argv[i],"--Orientation") == 0) {
orientationFlag = true;
} else if (strcmp(argv[i],"--NoTTable") == 0) {
translationFlag = false;
} else if (strcmp(argv[i], "--SentenceId") == 0) {
sentenceIdFlag = true;
} else if (strcmp(argv[i], "--GZOutput") == 0) {
gzOutput = true;
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
exit(1);
}
char* modelParams = argv[++i];
char* modelName = strtok(modelParams, "-");
char* modelType = strtok(NULL, "-");
REO_MODEL_TYPE intModelType;
if(strcmp(modelName, "wbe") == 0) {
wordModel = true;
if(strcmp(modelType, "msd") == 0)
wordType = REO_MSD;
else if(strcmp(modelType, "mslr") == 0)
wordType = REO_MSLR;
else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
wordType = REO_MONO;
else {
cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
exit(1);
}
} else if(strcmp(modelName, "phrase") == 0) {
phraseModel = true;
if(strcmp(modelType, "msd") == 0)
phraseType = REO_MSD;
else if(strcmp(modelType, "mslr") == 0)
phraseType = REO_MSLR;
else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
phraseType = REO_MONO;
else {
cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
exit(1);
}
} else if(strcmp(modelName, "hier") == 0) {
hierModel = true;
if(strcmp(modelType, "msd") == 0)
hierType = REO_MSD;
else if(strcmp(modelType, "mslr") == 0)
hierType = REO_MSLR;
else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
hierType = REO_MONO;
else {
cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
exit(1);
}
} else {
cerr << "extract: syntax error, unknown reordering model: " << modelName << endl;
exit(1);
}
allModelsOutputFlag = true;
} else {
cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
exit(1);
}
}
// default reordering model if no model selected
// allows for the old syntax to be used
if(orientationFlag && !allModelsOutputFlag) {
wordModel = true;
wordType = REO_MSD;
}
// open input files
Moses::InputFileStream eFile(fileNameE);
Moses::InputFileStream fFile(fileNameF);
Moses::InputFileStream aFile(fileNameA);
istream *eFileP = &eFile;
istream *fFileP = &fFile;
istream *aFileP = &aFile;
//.........这里部分代码省略.........