当前位置: 首页>>代码示例>>Java>>正文


Java Document类代码示例

本文整理汇总了Java中org.galagosearch.core.parse.Document的典型用法代码示例。如果您正苦于以下问题:Java Document类的具体用法?Java Document怎么用?Java Document使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Document类属于org.galagosearch.core.parse包,在下文中一共展示了Document类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: handleDumpCorpus

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
private static void handleDumpCorpus(String[] args) throws IOException {
    if (args.length <= 1) {
        commandHelp(args[0]);
        return;
    }

    DocumentIndexReader reader = new DocumentIndexReader(args[1]);
    DocumentIndexReader.Iterator iterator = reader.getIterator();
    while (!iterator.isDone()) {
        System.out.println("#IDENTIFIER: " + iterator.getKey());
        Document document = iterator.getDocument();
        System.out.println("#METADATA");
        for (Entry<String, String> entry : document.metadata.entrySet()) {
            System.out.println(entry.getKey() + "," + entry.getValue());
        }
        System.out.println("#TEXT");
        System.out.println(document.text);
        iterator.nextDocument();
    }
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:21,代码来源:App.java

示例2: get

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public synchronized Document get(long documentID) throws SQLException {
    Document result = new Document();
    selectDocumentByID.setLong(1, documentID);
    ResultSet document = selectDocumentByID.executeQuery();
    String text;

    if (document.next()) {
        text = document.getString(1);

        result.text = text;
        document.close();
    } else {
        document.close();
        return null;
    }

    addMetadata(documentID, result);
    return result;
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:20,代码来源:SQLDocumentStore.java

示例3: main

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public static void main(String[] args) throws SQLException, ClassNotFoundException, IOException {
    String driver = "com.mysql.jdbc.Driver";
    String url = "jdbc:mysql:///document_store?user=root";

    SQLDocumentStore.dropDatabase(driver, url);
    SQLDocumentStore.createDatabase(driver, url);

    SQLDocumentStore s = new SQLDocumentStore(driver, url);

    Document document = new Document();
    document.identifier = "WTX000-000-00";
    document.metadata.put("identifier", document.identifier);
    document.metadata.put("hi", "mom");
    document.text = "hello!  hello!";
    s.add(document);

    s.addMetadata("hi", "mom", "initial", "test");
    Document d = s.get("hi", "mom");

    for (Document e : s) {
        System.out.println(e.text);
    }

    s.close();
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:26,代码来源:SQLDocumentStore.java

示例4: buildIndex

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public void buildIndex() throws FileNotFoundException, IOException {
    File temporary = Utility.createTemporary();

    // Build an encoded document:
    document = new Document();
    document.identifier = "doc-identifier";
    document.text = "This is the text part.";
    document.metadata.put("Key", "Value");
    document.metadata.put("Something", "Else");
    
    Parameters parameters = new Parameters();
    parameters.add("filename", temporary.getAbsolutePath());
    DocumentIndexWriter writer = new DocumentIndexWriter(new FakeParameters(parameters));
    writer.process(document);
    writer.close();

    temporaryName = temporary.getAbsolutePath();
    assertTrue(IndexReader.isIndexFile(temporaryName));
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:20,代码来源:IndexReaderSplitParserTest.java

示例5: makeTermFreq

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public HashMap<String, Integer> makeTermFreq(String docStr) throws IOException{
    Document tokenizedResult=tn.tokenize(docStr);

    HashMap<String, Integer> termFreqPair = new HashMap<String, Integer>();

    for(int index=0;index<tokenizedResult.terms.size();index++){
        String term = tokenizedResult.terms.get(index);
        // if term is in stopword list, bypass this term.
        if(stopwordList.containsKey(term)) continue;
        if(term.length() < MINiNUM_TERM_LENGTH) continue;
        if(!termFreqPair.containsKey(term)) termFreqPair.put(term, 1);
        else termFreqPair.put(term, termFreqPair.get(term)+1);
    }

    return termFreqPair;
}
 
开发者ID:youngilcho,项目名称:internet-application-2014,代码行数:17,代码来源:TermAssoDemo.java

示例6: makeTermFreq

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public HashMap<String, Integer> makeTermFreq(String docStr) throws IOException {
    Document tokenizedResult = tn.tokenize(docStr);

    HashMap<String, Integer> termFreqPair = new HashMap<String, Integer>();

    for (int index = 0; index < tokenizedResult.terms.size(); index++) {
        String term = tokenizedResult.terms.get(index);
        // if term is in stopword list, bypass this term.
        if (stopwordList.containsKey(term)) continue;
        if (stopWordByTopTerms.containsKey(term)) continue;
        if (term.length() < MINiNUM_TERM_LENGTH) continue;
        if (!termFreqPair.containsKey(term)) termFreqPair.put(term, 1);
        else termFreqPair.put(term, termFreqPair.get(term) + 1);
    }

    return termFreqPair;
}
 
开发者ID:youngilcho,项目名称:internet-application-2014,代码行数:18,代码来源:TermAssociationManager.java

示例7: handleDumpCorpus

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
private void handleDumpCorpus(String[] args) throws IOException {
    if (args.length <= 1) {
        commandHelp(args[0]);
        return;
    }

    DocumentIndexReader reader = new DocumentIndexReader(args[1]);
    DocumentIndexReader.Iterator iterator = reader.getIterator();
    while (!iterator.isDone()) {
        output.println("#IDENTIFIER: " + iterator.getKey());
        Document document = iterator.getDocument();
        output.println("#METADATA");
        for (Entry<String, String> entry : document.metadata.entrySet()) {
            output.println(entry.getKey() + "," + entry.getValue());
        }
        output.println("#TEXT");
        output.println(document.text);
        iterator.nextDocument();
    }
}
 
开发者ID:youngilcho,项目名称:internet-application-2014,代码行数:21,代码来源:App.java

示例8: instance

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public static ArrayList<Processor<Document>> instance(Parameters parameters) {
    ArrayList<Processor<Document>> transformations = new ArrayList<Processor<Document>>();
    String[] transformationNames = {"stopper", "stemmer"};

    for (String name : transformationNames) {
        Processor<Document> transformation = instance(parameters, name);

        if (transformation != null) {
            transformations.add(transformation);
        }
    }

    return transformations;
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:15,代码来源:DocumentTransformationFactory.java

示例9: getSummary

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public String getSummary(Document document, Set<String> query) throws IOException {
    if (document.metadata.containsKey("description")) {
        String description = document.metadata.get("description");

        if (description.length() > 10) {
            return generator.highlight(description, query);
        }
    }

    return generator.getSnippet(document.text, query);
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:12,代码来源:Search.java

示例10: handleDoc

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
private static void handleDoc(String[] args) throws IOException {
    if (args.length <= 2) {
        commandHelp(args[0]);
        return;
    }

    String indexPath = args[1];
    String identifier = args[2];
    DocumentIndexReader reader = new DocumentIndexReader(indexPath);
    Document document = reader.getDocument(identifier);
    System.out.println(document.text);
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:13,代码来源:App.java

示例11: handleDocument

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public void handleDocument(HttpServletRequest request, HttpServletResponse response) throws IOException {
    request.getParameterMap();
    String identifier = request.getParameter("identifier");
    Document document = search.getDocument(identifier);
    response.setContentType("text/html; charset=UTF-8");

    PrintWriter writer = response.getWriter();
    writer.write(getEscapedString(document.text));
    writer.close();
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:11,代码来源:SearchWebHandler.java

示例12: handleSnippet

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public void handleSnippet(HttpServletRequest request, HttpServletResponse response) throws IOException {
    String identifier = request.getParameter("identifier");
    String[] terms = request.getParameterValues("term");
    Set<String> queryTerms = new HashSet<String>(Arrays.asList(terms));

    Document document = search.getDocument(identifier);

    if (document == null) {
        response.setStatus(response.SC_NOT_FOUND);
    } else {
        response.setContentType("text/xml");
        PrintWriter writer = response.getWriter();
        String snippet = search.getSummary(document, queryTerms);
        String title = document.metadata.get("title");
        String url = document.metadata.get("url");

        if (snippet == null) snippet = "";

        response.setContentType("text/xml");
        writer.append("<response>\n");
        writer.append(String.format("<snippet>%s</snippet>\n", snippet));
        writer.append(String.format("<identifier>%s</identifier>\n", identifier));
        writer.append(String.format("<title>%s</title>\n", scrub(title)));
        writer.append(String.format("<url>%s</url>\n", scrub(url)));
        writer.append("</response>");
        writer.close();
    }
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:29,代码来源:SearchWebHandler.java

示例13: add

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public synchronized boolean add(Document document) {
    try {
        insertDocument.setString(1, document.text);
        insertDocument.executeUpdate();

        // got the key set
        ResultSet keySet = insertDocument.getGeneratedKeys();
        if (!keySet.next()) {
            return false;
        }
        long documentID = keySet.getInt(1);
        keySet.close();

        // now, add in rows for the metadata
        for (Entry<String, String> entry : document.metadata.entrySet()) {
            String key = entry.getKey();
            String value = entry.getValue();

            insertMetadata.setLong(1, documentID);
            insertMetadata.setString(2, key);
            insertMetadata.setString(3, value);
            insertMetadata.executeUpdate();
            insertMetadata.clearParameters();
        }
    } catch (SQLException e) {
        throw new RuntimeException("Failed to add a document to the DocumentStore", e);
    }

    return true;
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:31,代码来源:SQLDocumentStore.java

示例14: addMetadata

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public synchronized void addMetadata(long documentID, Document document) throws SQLException {
    selectMetadata.setLong(1, documentID);
    document.metadata = new HashMap();
    ResultSet metadata = selectMetadata.executeQuery();

    while (metadata.next()) {
        String key = metadata.getString(1);
        String value = metadata.getString(2);
        document.metadata.put(key, value);
    }

    metadata.close();
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:14,代码来源:SQLDocumentStore.java

示例15: toArray

import org.galagosearch.core.parse.Document; //导入依赖的package包/类
public <T> T[] toArray(T[] example) {
    ArrayList<Document> list = new ArrayList();

    for (Document d : this) {
        list.add(d);
    }

    return list.toArray(example);
}
 
开发者ID:jjfiv,项目名称:galagosearch,代码行数:10,代码来源:SQLDocumentStore.java


注:本文中的org.galagosearch.core.parse.Document类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。