本文整理汇总了C++中PhraseTable类的典型用法代码示例。如果您正苦于以下问题:C++ PhraseTable类的具体用法?C++ PhraseTable怎么用?C++ PhraseTable使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PhraseTable类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: cky
void
AuxSearchSpace::
cky(PhraseTable& pt, Features& weight)
{
int sentLength=_sentence.size();
for(int length=0; length<sentLength;length++)
{
for(int start=0;start<sentLength-length;start++)
{
int stop=start+length;
pair<string,double>& bead=(*this)[start][stop];
for(int pivot=start;pivot<stop;pivot++)
{
double score=(*this)[start][pivot].second+(*this)[pivot+1][stop].second;
bead.second=max(score,bead.second);
}
string& srcPhrase=bead.first;
vector<PhraseRuleEntry*>* p_ruleVec=pt.queryRulesVec(srcPhrase);
if(p_ruleVec!=NULL)
{
vector<PhraseRuleEntry*>& ruleVec=*p_ruleVec;
for(size_t ruleIter=0;ruleIter<ruleVec.size();ruleIter++)
{
PhraseRuleEntry& rule=*ruleVec[ruleIter];
double score=calculateRuleScore(rule, weight);
bead.second=max(score,bead.second);
}
}
}
}
}
示例2: copeUNK
void
SearchSpace::
copeUNK(PhraseTable& pt)
{
for(size_t i=0;i<_sentence.size();i++)
{
string wrd=_sentence[i];
if(pt.queryRulesVec(wrd)==NULL)
{
PhraseRuleEntry e;
e.s2tLexScore=e.t2sLexScore=e.t2sScore=e.s2tScore=-100;
e.srcPhrase=e.tarPhrase=wrd;
e.tarRepresent.push_back(wrd);
pt.add(wrd,wrd,e);
pt.rulesInVec()[wrd].push_back(&pt.data()[wrd][wrd]);
}
}
}
示例3: rule_refinement
void rule_refinement(JKArgs& args)
{
if(!args.is_set("i"))usage();
ifstream is(args.value("i").c_str());
map<string,EntryInfo> srcPhrases,tarPhrases;
while(is.good())
{
string curline="";
getline(is,curline);
if(curline=="")break;
PhraseRuleEntry pre;
pre.read(curline);
if(srcPhrases.find(pre.srcPhrase)==srcPhrases.end())
{
EntryInfo ei;
ei.ind=(int)srcPhrases.size();
ei.score=pre.s2tScore;
srcPhrases[pre.srcPhrase]=ei;
}
else
srcPhrases[pre.srcPhrase].score+=pre.s2tScore;
if(tarPhrases.find(pre.tarPhrase)==tarPhrases.end())
{
EntryInfo ei;
ei.ind=(int)tarPhrases.size();
ei.score=pre.s2tScore;
tarPhrases[pre.tarPhrase]=ei;
}
else
tarPhrases[pre.tarPhrase].score+=pre.s2tScore;
}
is.clear();
is.seekg(0,ios::beg);
PhraseTable pt;
while(is.good())
{
string curline="";
getline(is,curline);
if(curline==""){pt.print(cout);break;}
PhraseRuleEntry pre;
pre.read(curline);
pre.s2tScore/=srcPhrases[pre.srcPhrase].score;
pre.t2sScore/=tarPhrases[pre.tarPhrase].score;
if(pt.size()>0&&pt.data().find(pre.srcPhrase)==pt.data().end())
{
pt.print(cout);
pt.clear();
}
pt.add(pre.srcPhrase,pre.tarPhrase,pre);
}
}
示例4: AddTargetPhrases
void InputPath::AddTargetPhrases(const PhraseTable &pt,
const TargetPhrases *tps)
{
size_t ptInd = pt.GetPtInd();
targetPhrases[ptInd] = tps;
if (tps) {
m_numRules += tps->GetSize();
}
}
示例5: decode
void decode(JKArgs& args)
{
//pbmt -decode -pt=[phrase table file] -dmax=[max distortion limit] -beam=[beam size] -weights=[d:d:d...]"<<endl;
if(!args.is_set("pt"))usage();
//pt.print(cout);
int dmax=4;
int beamsize=100;
Features weights;
weights.s2t=weights.t2s=weights.s2tLex=weights.t2sLex=weights.length=weights.distort=0;
weights.s2t=1;
weights.distort=1;
if(args.is_set("dmax"))dmax=atoi(args.value("dmax").c_str());
if(args.is_set("beam"))beamsize=atoi(args.value("beam").c_str());
if(args.is_set("weights"))weights.init(args.value("weights"));
bool debug=false;
int tlimit=10;
if(args.is_set("tlimit"))tlimit=atoi(args.value("tlimit").c_str());
if(args.is_set("debug")) debug=true;
PhraseTable pt;
pt.load(args.value("pt"));
pt.logrize(10);
pt.makeVector();
pt.sortVector(weights);
while(!cin.eof())
{
string curline="";
getline(cin,curline);
if(curline=="")continue;
SearchSpace space;
space.init(curline);
space.setupAuxSpace(pt,weights);
space.copeUNK(pt);
space.beamSearch(pt,weights,beamsize,dmax,tlimit,debug);
cout<<space.getNthTranslation(1)<<endl;
}
}
示例6: PhraseTable
void PhraseTransTblBuilder::BuildOneDirection(ifstream &fin, ofstream &fout, LexicalTable &lex)
{
PhraseTable phraseTable = PhraseTable(lex, fout);
string line;
while (getline(fin, line)){
vector<string> tmpVec = BasicMethod::Split(line, " ||| ");
if (tmpVec.size() != 3){
cerr << "Bad rule form detected:" << line << endl;
continue;
}
PhraseTblEntry entry;
entry.CreatePhraseTblEntry(tmpVec[0], tmpVec[1], tmpVec[2]);
phraseTable.Insert(entry);
}
//note that there are left unprocessed phrase pairs in phrase table
//we can insert a dummy phrasetblentry
PhraseTblEntry dummy;
string stop = "####";
dummy.CreatePhraseTblEntry(stop, stop, stop);
phraseTable.Insert(dummy);
}
示例7: Save
void Save( const PhraseTable &Data, const std::string &path) {
ofstream fout( path.c_str() );
if(! fout ) {
cerr << "Error: cannot open file " << path << endl;
exit(1);
}
boost::archive::binary_oarchive oa(fout);
oa << Data;
fout.close();
// save trie
Data.save_trie( path + ".trie" );
return;
}
示例8: Load
void Load( PhraseTable &Data, const std::string & path) {
ifstream fin( path.c_str() );
if(! fin ) {
cerr << "Error: cannot open file " << path << endl;
exit(1);
}
cerr << "loading mapping file..." << endl;
boost::archive::binary_iarchive ia(fin);
ia >> Data;
fin.close();
cerr << "loading trie..." << endl;
Data.load_trie( path + ".trie" );
cerr << "done" << endl;
return;
}
示例9: main
int main(int argc, char **argv) {
Lexicon* src_lex = new Lexicon();
Lexicon* dst_lex = new Lexicon();
Lexicon* unt_lex = new Lexicon();
int prune = 30;
// n beste Übersetzungen
int n = 10;
// Parameter überprüfen
string help = string("usage: ") + string(argv[0]) + string(" [-n 10] [-p 30] <config> <table.gz> <text>");
if(argc < 4 || argc % 2 == 1) {
cerr << help << endl;
return 1;
}
for(int i = 1; i < argc; i++) {
if(!string(argv[i]).compare("-n") && argc >= 6) {
i++;
n = atoi(argv[i]);
} else {
if(!string(argv[i]).compare("-p") && argc >= 6) {
i++;
prune = atoi(argv[i]);
}
}
}
if(n <= 0) {
cerr << help << endl;
return 1;
}
char *cnf_name = argv[argc-3];
char *tbl_name = argv[argc-2];
char *txt_name = argv[argc-1];
if(getenv("LOG_D")) {
cerr << "[main] Opening files ..." << endl;
}
// Dateien öffnen:
ifstream f_cnf(cnf_name);
if(!f_cnf.good()) {
cerr << "ERROR: opening file '" << tbl_name << "' failed!" << endl << help << endl;
return 1;
}
igzstream f_tbl(tbl_name);
if(!f_tbl.good()) {
cerr << "ERROR: opening file '" << tbl_name << "' failed!" << endl << help << endl;
return 1;
}
ifstream f_txt(txt_name);
if(!f_txt.good()) {
cerr << "ERROR: opening file '" << txt_name << "' failed!" << endl << help << endl;
return 1;
}
Config *cnf = new Config(&f_cnf);
if(getenv("LOG_D")) {
cerr << "[main] Generating PhraseTable ..." << endl;
}
PhraseTable* phraseTable = new PhraseTable(&f_tbl, src_lex, dst_lex, unt_lex, prune);
string f_line;
unt_lex->add("[wordKNOTfound]");
unt_lex->add("[wordnotfound]");
// für jeden Satz
Astar *algo;
while(getline(f_txt, f_line)){
istringstream f_iss(f_line); // wir lesen ein
vector<int> sentence;
string currWord;
while(f_iss >> currWord) {
// einlesen
int id = src_lex->get_id(currWord);
if(id == -1){
id = - (unt_lex->add(currWord));
}
sentence.push_back(id);
}
if(getenv("LOG_D")) {
cerr << "[main] Translating sentence \"" << src_lex->sent2string(sentence) << "\" ..." << endl;
}
HypothesisNode *currTrans = phraseTable->translateSentence(sentence, cnf->lambda);
Sentence trans;
algo = new Astar(currTrans, prune);
double transCost;
Scores score;
for(int i = 0; i < n && algo->getnext(trans, transCost, score); i++) {
cout << dst_lex->sent2string(trans,unt_lex) << endl;
}
delete algo;
algo = 0;
}
delete cnf;
//.........这里部分代码省略.........
示例10: extractPhrase
void extractPhrase(JKArgs& args)
{
if(!args.is_set("src")||!args.is_set("tar")||!args.is_set("align"))
usage();
ifstream fsrc(args.value("src").c_str()),ftar(args.value("tar").c_str()),falign(args.value("align").c_str());
ifstream fweight;
ofstream os;
double threshold=0;
if(args.is_set("threshold"))threshold=atof(args.value("threshold").c_str());
if(args.is_set("o"))os.open(args.value("o").c_str());
if(args.is_set("w"))fweight.open(args.value("w").c_str());
Dic<double> s2tLexDic,t2sLexDic;
if(!args.is_set("s2tLex")||!args.is_set("t2sLex"))
{
cerr<<"making lex dic on the fly"<<endl;
makeLexDic(args.value("src"),args.value("tar"),args.value("align"),args.value("w"),s2tLexDic,t2sLexDic);
}
else
{
s2tLexDic.load(args.value("s2tLex"));
t2sLexDic.load(args.value("t2sLex"));
}
bool reverse=false,normalize=false;
bool moore=false;
if(args.is_set("moore"))if(args.value("moore")=="true")moore=true;
if(args.is_set("reverse"))
reverse=true;
if(args.is_set("normalize"))
if(args.value("normalize")=="true")
normalize=true;
int srcLengthLimit=8,tarLengthLimit=12;
if(args.is_set("srcLengthLimit"))
srcLengthLimit=atoi(args.value("srcLengthLimit").c_str());
if(args.is_set("tarLengthLimit"))
tarLengthLimit=atoi(args.value("tarLengthLimit").c_str());
int maxNumOfUnAlignedWords=2;
if(args.is_set("maxNumOfUnAlignedWords"))
maxNumOfUnAlignedWords=atoi(args.value("maxNumOfUnAlignedWords").c_str());
double start=1,stop=1E10;
if(args.is_set("range"))
{
string range=args.value("range");
start=atoi(range.substr(0,range.find("-")).c_str());
stop=atof(range.substr(range.find("-")+1).c_str());
cerr<<"start:"<<start<<",stop:"<<stop<<endl;
}
PhraseTable phraseTable;
int count=0;
while(!falign.eof())
{
string align="",src="",tar="";
double weight=1;
getline(falign,align);
getline(fsrc,src);
getline(ftar,tar);
if(fweight.good())fweight>>weight;
//Alignment alignment(align,maxNumOfUnAlignedWords);
count++;
if(count<start)continue;
if(count>stop)break;
if(weight==0||weight<threshold)continue;
if(moore)weight=1;
vector<PhraseRuleEntry> phrases;
phraseExtractor(src,tar,align,s2tLexDic,t2sLexDic,phrases,srcLengthLimit,tarLengthLimit,maxNumOfUnAlignedWords,weight);
for(size_t i=0;i<phrases.size();i++)
{
PhraseRuleEntry& pre=phrases[i];
if(reverse)
{
string tmp=pre.srcPhrase;
pre.srcPhrase=pre.tarPhrase;
pre.tarPhrase=tmp;
}
if(os.good())phraseTable.add(pre.srcPhrase,pre.tarPhrase,pre);
}
}
if(args.is_set("printTopEntrys"))
{
int topN=atoi(args.value("printTopEntrys").c_str());
phraseTable.printTopEntrys(topN,"topEntrys.txt");
}
if(normalize)
phraseTable.normalize();
bool onlyonefeat=false;
if(args.is_set("onlyonefeat"))
onlyonefeat=true;
if(os.good())phraseTable.print(os,onlyonefeat);
string signal="mftsignal";
if(args.is_set("signal"))signal=args.value("signal");
ofstream o_sg(signal.c_str());
o_sg.close();
}
示例11: beamSearch
void
SearchSpace::
beamSearch(PhraseTable& pt, Features& weight, int beamsize, int distLimit, int tlimit, bool debug)
{
Hypothesis tmpHypo;
_hypoHeaps[0].push_back(tmpHypo);
Hypothesis& initHypo=_hypoHeaps[0][0];
initHypo.coveredWords.init(_sentence.size(),false);
initHypo.currentScore=0;
initHypo.estimatedScore=0;
initHypo.features.init();
initHypo.lastCoveredWord=-1;
initHypo.represent="";
initHypo.translation.clear();
initHypo.trace.p_prev=NULL;
initHypo.trace.p_rule=NULL;
for(size_t heapIter=0;heapIter<_sentence.size();heapIter++)
{
HypothesisHeap& hHeap=_hypoHeaps[heapIter];
hHeap.sortAndPrune(beamsize);
for(int hypoIter=0;hypoIter<(int)hHeap.size();hypoIter++)
{
Hypothesis& curHypo=hHeap[hypoIter];
if(debug)
{
cout<<"CurHypo ::"<<endl;
curHypo.print(cout);
}
BITVECTOR bvec=curHypo.coveredWords;
int firstUnCovered=bvec.firstFalse();
for(int start=0;start<(int)bvec.size();start++)
{
if(bvec[start]==true)
continue;
for(int stop=start;
stop<(int)bvec.size()&&
bvec[stop]==false&&
(stop<distLimit+firstUnCovered||start==firstUnCovered);
stop++)
{
pair<int,int> phraseSpan=make_pair(start,stop);
string candiPhrase=_auxSpace.queryPhrase(phraseSpan);
vector<PhraseRuleEntry*>* p_rules=pt.queryRulesVec(candiPhrase);
if(p_rules==NULL)continue;
int newLength=heapIter+stop-start+1;
BITVECTOR newBVec;
combine(bvec,phraseSpan,newBVec);
//cout<<"newBVec: "<<newBVec.represent()<<endl;
double futureScore=_auxSpace.queryFutureCost(newBVec);
vector<PhraseRuleEntry*>& rules=*p_rules;
for(size_t rIter=0;rIter<rules.size()&&(int)rIter<tlimit;rIter++)
{
PhraseRuleEntry& rule=*rules[rIter];
Hypothesis newHypo;
newHypo.genFromChild(curHypo,make_pair(start,stop),rule,weight,futureScore);
if(debug)
{
cout<<"newHypo :: "<<endl;
newHypo.print(cout);
}
_hypoHeaps[newLength].addHypothesis(newHypo);
}
}
}
}
}
_hypoHeaps.back().sortAndPrune(beamsize);
if(debug)
{
cout<<"final hHeap"<<endl;
for(size_t i=0;i<_hypoHeaps.back().size();i++){
Hypothesis& hypo=_hypoHeaps.back()[i];
hypo.print(cout);
}
}
}
示例12: main
int main(int argc, char* argv[]) {
cerr << "Score v2.5 written by Philipp Koehn" << endl
<< "Modified by Ventsislav Zhechev, Autodesk Development Sàrl" << endl
<< "scoring methods for extracted rules" << endl
;
if (argc < 4) {
cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--OnlyDirect] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--WordAlignment file]\n";
exit(1);
}
char* fileNameExtract = argv[1];
char* fileNameLex = argv[2];
char* fileNamePhraseTable = argv[3];
char* fileNameWordAlignment;
for(int i=4; i<argc; ++i) {
if (strcmp(argv[i],"inverse") == 0 || strcmp(argv[i],"--Inverse") == 0) {
inverseFlag = true;
cerr << "using inverse mode\n";
}
else if (strcmp(argv[i],"--Hierarchical") == 0) {
hierarchicalFlag = true;
cerr << "processing hierarchical rules\n";
}
else if (strcmp(argv[i],"--OnlyDirect") == 0) {
onlyDirectFlag = true;
cerr << "outputing in correct phrase table format (no merging with inverse)\n";
}
else if (strcmp(argv[i],"--WordAlignment") == 0) {
wordAlignmentFlag = true;
fileNameWordAlignment = argv[++i];
cerr << "outputing word alignment in file " << fileNameWordAlignment << endl;
}
else if (strcmp(argv[i],"--NoLex") == 0) {
lexFlag = false;
cerr << "not computing lexical translation score\n";
}
else if (strcmp(argv[i],"--GoodTuring") == 0) {
goodTuringFlag = true;
cerr << "using Good Turing discounting\n";
}
else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
}
else if (strcmp(argv[i],"--NegLogProb") == 0) {
logProbFlag = true;
negLogProb = -1;
cerr << "using negative log-probabilities\n";
}
else {
cerr << "ERROR: unknown option " << argv[i] << endl;
exit(1);
}
}
// lexical translation table
if (lexFlag)
lexTable.load(fileNameLex);
// compute count of counts for Good Turing discounting
if (goodTuringFlag)
computeCountOfCounts(fileNameExtract);
// sorted phrase extraction file
Bz2LineReader extractFile(fileNameExtract);
// output file: phrase translation table
Bz2LineWriter phraseTableFile(fileNamePhraseTable);
// output word alignment file
if (!inverseFlag && wordAlignmentFlag) {
wordAlignmentFile.open(fileNameWordAlignment);
if (wordAlignmentFile.fail()) {
cerr << "ERROR: could not open word alignment file "
<< fileNameWordAlignment << endl;
exit(1);
}
}
// loop through all extracted phrase translations
int lastSource = -1;
vector< PhraseAlignment > phrasePairsWithSameF;
int i=0;
string lastLine = "";
PhraseAlignment *lastPhrasePair = NULL;
for (string line = extractFile.readLine(); !line.empty(); line = extractFile.readLine()) {
if (line.empty()) break;
if ((++i)%10000000 == 0) cerr << "[p. score:" << i << "]" << flush;
else if (i % 100000 == 0) cerr << "." << flush;
// identical to last line? just add count
if (lastSource >= 0 && line == lastLine) {
lastPhrasePair->addToCount(line);
continue;
}
lastLine = line;
// create new phrase pair
PhraseAlignment phrasePair;
//.........这里部分代码省略.........
示例13: create
bool PhraseAlignment::create(const char line[], int lineID )
{
vector< string > token = tokenize( line );
int item = 1;
PHRASE phraseF, phraseE;
for (size_t j=0; j<token.size(); j++) {
if (token[j] == "|||") item++;
else {
if (item == 1)
phraseF.push_back( vcbF.storeIfNew( token[j] ) );
else if (item == 2)
phraseE.push_back( vcbE.storeIfNew( token[j] ) );
else if (item == 3) {
int e,f;
sscanf(token[j].c_str(), "%d-%d", &f, &e);
if ((size_t)e >= phraseE.size() || (size_t)f >= phraseF.size()) {
cerr << "WARNING: sentence " << lineID << " has alignment point (" << f << ", " << e << ") out of bounds (" << phraseF.size() << ", " << phraseE.size() << ")\n";
} else {
if (alignedToE.size() == 0) {
vector< size_t > dummy;
for(size_t i=0; i<phraseE.size(); i++)
alignedToE.push_back( dummy );
for(size_t i=0; i<phraseF.size(); i++)
alignedToF.push_back( dummy );
foreign = phraseTableF.storeIfNew( phraseF );
english = phraseTableE.storeIfNew( phraseE );
}
alignedToE[e].push_back( f );
alignedToF[f].push_back( e );
}
}
}
}
return (item>2); // real phrase pair, not just foreign phrase
}
示例14: create
// read in a phrase pair and store it
void PhraseAlignment::create(const vector<string>& token, int lineID) {
int item = 1;
PHRASE phraseS, phraseT;
for (size_t j=0; j<token.size(); ++j) {
if (token[j] == "|||")
item++;
else if (item == 1) // source phrase
phraseS.push_back( vcbS.storeIfNew( token[j] ) );
else if (item == 2) // target phrase
phraseT.push_back( vcbT.storeIfNew( token[j] ) );
else if (item == 3) { // alignment
int s = strtol(token[j].substr(0, token[j].find("-")).c_str(), NULL, 10);
int t = strtol(token[j].substr(token[j].find("-") + 1).c_str(), NULL, 10);
if (t >= phraseT.size() || s >= phraseS.size()) {
cerr << "WARNING: phrase pair " << lineID
<< " has alignment point (" << s << ", " << t
<< ") out of bounds (" << phraseS.size() << ", " << phraseT.size() << ")\n";
} else {
// first alignment point? -> initialize
if (alignedToT.size() == 0) {
assert(alignedToS.size() == 0);
size_t numTgtSymbols = (hierarchicalFlag ? phraseT.size()-1 : phraseT.size());
alignedToT.resize(numTgtSymbols);
size_t numSrcSymbols = (hierarchicalFlag ? phraseS.size()-1 : phraseS.size());
alignedToS.resize(numSrcSymbols);
source = phraseTableS.storeIfNew( phraseS );
target = phraseTableT.storeIfNew( phraseT );
}
// add alignment point
alignedToT[t].insert( s );
alignedToS[s].insert( t );
}
} else if (item == 4) // count
count = strtof(token[j].c_str(), NULL);
}
if (item == 3)
count = 1.0;
if (item < 3 || item > 4) {
cerr << "ERROR: faulty line " << lineID << ": ";
for(vector<string>::const_iterator i = token.begin(); i != token.end(); cerr << *(i++) << " ");
cerr << endl;
}
}
示例15: equals
bool PhraseAlignment::equals( const PhraseAlignment& other ) {
if (this == &other) return true;
if (other.english != english) return false;
if (other.foreign != foreign) return false;
PHRASE phraseE = phraseTableE.getPhrase( english );
PHRASE phraseF = phraseTableF.getPhrase( foreign );
for(int i=0; i<phraseE.size(); i++) {
if (alignedToE[i].size() != other.alignedToE[i].size()) return false;
for(int j=0; j<alignedToE[i].size(); j++) {
if (alignedToE[i][j] != other.alignedToE[i][j]) return false;
}
}
for(int i=0; i<phraseF.size(); i++) {
if (alignedToF[i].size() != other.alignedToF[i].size()) return false;
for(int j=0; j<alignedToF[i].size(); j++) {
if (alignedToF[i][j] != other.alignedToF[i][j]) return false;
}
}
return true;
}