本文整理汇总了C++中PDFDoc::readMetadata方法的典型用法代码示例。如果您正苦于以下问题:C++ PDFDoc::readMetadata方法的具体用法?C++ PDFDoc::readMetadata怎么用?C++ PDFDoc::readMetadata使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类PDFDoc
的用法示例。
在下文中一共展示了PDFDoc::readMetadata方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: metaData
indri::parse::UnparsedDocument* indri::parse::PDFDocumentExtractor::nextDocument() {
if( !_documentPath.length() )
return 0;
PDFDoc* doc = 0;
TextOutputDev* textOut = 0;
GString* gfilename = new GString(_documentPath.c_str());
doc = new PDFDoc( gfilename );
// if the doc is not ok, or ok to copy, it
// will be a document of length 0.
if( doc->isOk() && doc->okToCopy() ) {
void* stream = &_documentTextBuffer;
textOut = new TextOutputDev( buffer_write, stream, gFalse, gFalse);
if ( textOut->isOk() ) {
int firstPage = 1;
int lastPage = doc->getNumPages();
double hDPI=72.0;
double vDPI=72.0;
int rotate=0;
GBool useMediaBox=gFalse;
GBool crop=gTrue;
GBool printing=gFalse;
if(doc->readMetadata()!=NULL)
{
GString rawMetaData = doc->readMetadata();
GString preparedMetaData="";
//zoek <rdf:RDF en eindig bij </rdf:RDF>!!
for(int x=0; x<rawMetaData.getLength(); x++) {
if(rawMetaData.getChar(x)!='?' && rawMetaData.getChar(x)!=':') {
//skip characters which the XMLReader doesn't understand
preparedMetaData.append(rawMetaData.getChar(x));
}
}
std::string metaData(preparedMetaData.getCString());
int startbegin = metaData.find("<rdf");
int stopend = metaData.find(">", metaData.rfind("</rdf") );
metaData = metaData.substr(startbegin, (stopend-startbegin)+1 );
indri::xml::XMLReader reader;
try {
std::auto_ptr<indri::xml::XMLNode> result( reader.read( metaData.c_str() ) );
appendPdfMetaData( result.get() );
} catch( lemur::api::Exception& e ) {
LEMUR_RETHROW( e, "Had trouble reading PDF metadata" );
}
if( _author.length()>0 || _title.length()>0 )
{
std::string createdPdfHeader;
createdPdfHeader="<head>\n";
if(_title.length()>0) {
createdPdfHeader+="<title>";
createdPdfHeader+=_title;
createdPdfHeader+="</title>\n";
}
if(_author.length()>0) {
createdPdfHeader+="<author>";
createdPdfHeader+=_author;
createdPdfHeader+="</author>\n";
}
createdPdfHeader+="</head>\n";
char *metastream = _documentTextBuffer.write( createdPdfHeader.length()+1 );
strcpy(metastream, createdPdfHeader.c_str());
}
}
doc->displayPages(textOut, firstPage, lastPage, hDPI, vDPI, rotate, useMediaBox, crop, printing);
}
}
delete textOut;
delete doc;
_unparsedDocument.textLength = _documentTextBuffer.position();
_unparsedDocument.contentLength = _unparsedDocument.textLength ? _documentTextBuffer.position() - 1 : 0 ; // no null 0 if text is empty.
char* docnoPoint = _documentTextBuffer.write( _documentPath.length()+1 );
strcpy( docnoPoint, _documentPath.c_str() );
_unparsedDocument.text = _documentTextBuffer.front();
_unparsedDocument.content = _documentTextBuffer.front();
_unparsedDocument.metadata.clear();
indri::parse::MetadataPair pair;
pair.key = "path";
pair.value = docnoPoint;
pair.valueLength = _documentPath.length()+1;
_unparsedDocument.metadata.push_back( pair );
_docnostring.assign(_documentPath.c_str() );
cleanDocno();
pair.value = _docnostring.c_str();
pair.valueLength = _docnostring.length()+1;
pair.key = "docno";
_unparsedDocument.metadata.push_back( pair );
_documentPath = "";
return &_unparsedDocument;
//.........这里部分代码省略.........