本文整理汇总了Java中org.galagosearch.core.parse.Document类的典型用法代码示例。如果您正苦于以下问题:Java Document类的具体用法?Java Document怎么用?Java Document使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Document类属于org.galagosearch.core.parse包,在下文中一共展示了Document类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: handleDumpCorpus
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
private static void handleDumpCorpus(String[] args) throws IOException {
if (args.length <= 1) {
commandHelp(args[0]);
return;
}
DocumentIndexReader reader = new DocumentIndexReader(args[1]);
DocumentIndexReader.Iterator iterator = reader.getIterator();
while (!iterator.isDone()) {
System.out.println("#IDENTIFIER: " + iterator.getKey());
Document document = iterator.getDocument();
System.out.println("#METADATA");
for (Entry<String, String> entry : document.metadata.entrySet()) {
System.out.println(entry.getKey() + "," + entry.getValue());
}
System.out.println("#TEXT");
System.out.println(document.text);
iterator.nextDocument();
}
}
示例2: get
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public synchronized Document get(long documentID) throws SQLException {
Document result = new Document();
selectDocumentByID.setLong(1, documentID);
ResultSet document = selectDocumentByID.executeQuery();
String text;
if (document.next()) {
text = document.getString(1);
result.text = text;
document.close();
} else {
document.close();
return null;
}
addMetadata(documentID, result);
return result;
}
示例3: main
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public static void main(String[] args) throws SQLException, ClassNotFoundException, IOException {
String driver = "com.mysql.jdbc.Driver";
String url = "jdbc:mysql:///document_store?user=root";
SQLDocumentStore.dropDatabase(driver, url);
SQLDocumentStore.createDatabase(driver, url);
SQLDocumentStore s = new SQLDocumentStore(driver, url);
Document document = new Document();
document.identifier = "WTX000-000-00";
document.metadata.put("identifier", document.identifier);
document.metadata.put("hi", "mom");
document.text = "hello! hello!";
s.add(document);
s.addMetadata("hi", "mom", "initial", "test");
Document d = s.get("hi", "mom");
for (Document e : s) {
System.out.println(e.text);
}
s.close();
}
示例4: buildIndex
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public void buildIndex() throws FileNotFoundException, IOException {
File temporary = Utility.createTemporary();
// Build an encoded document:
document = new Document();
document.identifier = "doc-identifier";
document.text = "This is the text part.";
document.metadata.put("Key", "Value");
document.metadata.put("Something", "Else");
Parameters parameters = new Parameters();
parameters.add("filename", temporary.getAbsolutePath());
DocumentIndexWriter writer = new DocumentIndexWriter(new FakeParameters(parameters));
writer.process(document);
writer.close();
temporaryName = temporary.getAbsolutePath();
assertTrue(IndexReader.isIndexFile(temporaryName));
}
示例5: makeTermFreq
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public HashMap<String, Integer> makeTermFreq(String docStr) throws IOException{
Document tokenizedResult=tn.tokenize(docStr);
HashMap<String, Integer> termFreqPair = new HashMap<String, Integer>();
for(int index=0;index<tokenizedResult.terms.size();index++){
String term = tokenizedResult.terms.get(index);
// if term is in stopword list, bypass this term.
if(stopwordList.containsKey(term)) continue;
if(term.length() < MINiNUM_TERM_LENGTH) continue;
if(!termFreqPair.containsKey(term)) termFreqPair.put(term, 1);
else termFreqPair.put(term, termFreqPair.get(term)+1);
}
return termFreqPair;
}
示例6: makeTermFreq
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public HashMap<String, Integer> makeTermFreq(String docStr) throws IOException {
Document tokenizedResult = tn.tokenize(docStr);
HashMap<String, Integer> termFreqPair = new HashMap<String, Integer>();
for (int index = 0; index < tokenizedResult.terms.size(); index++) {
String term = tokenizedResult.terms.get(index);
// if term is in stopword list, bypass this term.
if (stopwordList.containsKey(term)) continue;
if (stopWordByTopTerms.containsKey(term)) continue;
if (term.length() < MINiNUM_TERM_LENGTH) continue;
if (!termFreqPair.containsKey(term)) termFreqPair.put(term, 1);
else termFreqPair.put(term, termFreqPair.get(term) + 1);
}
return termFreqPair;
}
示例7: handleDumpCorpus
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
private void handleDumpCorpus(String[] args) throws IOException {
if (args.length <= 1) {
commandHelp(args[0]);
return;
}
DocumentIndexReader reader = new DocumentIndexReader(args[1]);
DocumentIndexReader.Iterator iterator = reader.getIterator();
while (!iterator.isDone()) {
output.println("#IDENTIFIER: " + iterator.getKey());
Document document = iterator.getDocument();
output.println("#METADATA");
for (Entry<String, String> entry : document.metadata.entrySet()) {
output.println(entry.getKey() + "," + entry.getValue());
}
output.println("#TEXT");
output.println(document.text);
iterator.nextDocument();
}
}
示例8: instance
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public static ArrayList<Processor<Document>> instance(Parameters parameters) {
ArrayList<Processor<Document>> transformations = new ArrayList<Processor<Document>>();
String[] transformationNames = {"stopper", "stemmer"};
for (String name : transformationNames) {
Processor<Document> transformation = instance(parameters, name);
if (transformation != null) {
transformations.add(transformation);
}
}
return transformations;
}
示例9: getSummary
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public String getSummary(Document document, Set<String> query) throws IOException {
if (document.metadata.containsKey("description")) {
String description = document.metadata.get("description");
if (description.length() > 10) {
return generator.highlight(description, query);
}
}
return generator.getSnippet(document.text, query);
}
示例10: handleDoc
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
private static void handleDoc(String[] args) throws IOException {
if (args.length <= 2) {
commandHelp(args[0]);
return;
}
String indexPath = args[1];
String identifier = args[2];
DocumentIndexReader reader = new DocumentIndexReader(indexPath);
Document document = reader.getDocument(identifier);
System.out.println(document.text);
}
示例11: handleDocument
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public void handleDocument(HttpServletRequest request, HttpServletResponse response) throws IOException {
request.getParameterMap();
String identifier = request.getParameter("identifier");
Document document = search.getDocument(identifier);
response.setContentType("text/html; charset=UTF-8");
PrintWriter writer = response.getWriter();
writer.write(getEscapedString(document.text));
writer.close();
}
示例12: handleSnippet
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public void handleSnippet(HttpServletRequest request, HttpServletResponse response) throws IOException {
String identifier = request.getParameter("identifier");
String[] terms = request.getParameterValues("term");
Set<String> queryTerms = new HashSet<String>(Arrays.asList(terms));
Document document = search.getDocument(identifier);
if (document == null) {
response.setStatus(response.SC_NOT_FOUND);
} else {
response.setContentType("text/xml");
PrintWriter writer = response.getWriter();
String snippet = search.getSummary(document, queryTerms);
String title = document.metadata.get("title");
String url = document.metadata.get("url");
if (snippet == null) snippet = "";
response.setContentType("text/xml");
writer.append("<response>\n");
writer.append(String.format("<snippet>%s</snippet>\n", snippet));
writer.append(String.format("<identifier>%s</identifier>\n", identifier));
writer.append(String.format("<title>%s</title>\n", scrub(title)));
writer.append(String.format("<url>%s</url>\n", scrub(url)));
writer.append("</response>");
writer.close();
}
}
示例13: add
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public synchronized boolean add(Document document) {
try {
insertDocument.setString(1, document.text);
insertDocument.executeUpdate();
// got the key set
ResultSet keySet = insertDocument.getGeneratedKeys();
if (!keySet.next()) {
return false;
}
long documentID = keySet.getInt(1);
keySet.close();
// now, add in rows for the metadata
for (Entry<String, String> entry : document.metadata.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
insertMetadata.setLong(1, documentID);
insertMetadata.setString(2, key);
insertMetadata.setString(3, value);
insertMetadata.executeUpdate();
insertMetadata.clearParameters();
}
} catch (SQLException e) {
throw new RuntimeException("Failed to add a document to the DocumentStore", e);
}
return true;
}
示例14: addMetadata
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public synchronized void addMetadata(long documentID, Document document) throws SQLException {
selectMetadata.setLong(1, documentID);
document.metadata = new HashMap();
ResultSet metadata = selectMetadata.executeQuery();
while (metadata.next()) {
String key = metadata.getString(1);
String value = metadata.getString(2);
document.metadata.put(key, value);
}
metadata.close();
}
示例15: toArray
import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public <T> T[] toArray(T[] example) {
ArrayList<Document> list = new ArrayList();
for (Document d : this) {
list.add(d);
}
return list.toArray(example);
}