Python document.Document类代码示例

本文整理汇总了Python中document.Document类的典型用法代码示例。如果您正苦于以下问题：Python Document类的具体用法？Python Document怎么用？Python Document使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了Document类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generate_document_data

def generate_document_data(chapter_paths, word_count):
    """
    Generate visualization data for a set of chapters.

    Given input chapters we want to find both the unique words being used inside
    of each chapter and how frequent they are within the text as a whole.

    chapter_paths - A list of paths to chapters
    word_count - The number of most frequent words to grab for each chapter

    Returns a list looking like this:
    [
        [
            {
                "word": wart
                "freq": .7
                "uniqueness": .5
                "pos": .1
            }
        ],
    ]

    This is a list of chapters, where each chapter is a list of word
    dictionaries and each word dictionary has the word itself, the frequency of
    the word in that chapter, the uniqueness of the word overall, and the first
    position the word is observed. All of the latter three values are scaled
    from 0-1 with respect to the chapter (the most frequent word receives a 1,
    for instance).
    """
    document = Document(chapter_paths)
    return [generate_chapter_data(word_list, word_count, document) for word_list
            in document.get_chapters()]

开发者ID:Dylnuge，项目名称:once-and-future-vis，代码行数:32，代码来源:parser.py

示例2: Probability

    def Probability(self, doc, dclass = ""):
        """Calculates the probability for a class dclass given a document doc"""
        if dclass:
            sum_dclass = self.sum_words_in_class(dclass)
            prob = 0

            d = Document(self.__vocabulary)
            d.read_document(doc)

            for j in self.__document_classes:
                sum_j = self.sum_words_in_class(j)
                prod = 1
                for i in d.Words():
                    wf_dclass = 1 + self.__document_classes[dclass].WordFreq(i)
                    wf = 1 + self.__document_classes[j].WordFreq(i)
                    r = wf * sum_dclass / (wf_dclass * sum_j)
                    prod *= r
                prob += prod * self.__document_classes[j].NumberOfDocuments() / self.__document_classes[dclass].NumberOfDocuments()
            if prob != 0:
                return 1 / prob
            else:
                return -1
        else:
            prob_list = []
            for dclass in self.__document_classes:
                prob = self.Probability(doc, dclass)
                prob_list.append([dclass,prob])
            prob_list.sort(key = lambda x: x[1], reverse = True)
            return prob_list

开发者ID:TechBK，项目名称:NLP，代码行数:29，代码来源:pool.py

示例3: test_array_delete

    def test_array_delete(self):
        doc0 =  Document()
        doc0.snapshot = []
        doc1 = self.doc1
        doc2 = self.doc2

        # can technically delete nothing from empty list. why not
        op1 = Op('ad', [], offset=0, val=0)
        doc0.apply_op(op1)
        self.assertEqual(doc0.snapshot, [])

        # remove one from list
        op2 = Op('ad', [], offset=1, val=1)
        doc2.apply_op(op2)
        self.assertEqual(doc2.get_value([1]), 'normal, ol string')

        # from nested lists
        op3 = Op('ad', [2], offset=1, val=1)
        doc2.apply_op(op3)
        self.assertEqual(doc2.get_value([2]), [['multi'],['array']])

        # delete multiple elements
        op4 = Op('ad', [], offset=0, val=4)
        doc2.apply_op(op4)
        self.assertEqual(doc2.snapshot, [None, 42])

        # delete last in list:
        op5 = Op('ad', [], offset=1, val=1)
        doc2.apply_op(op5)
        self.assertEqual(doc2.snapshot, [None])

        # in dicts
        op6 = Op('ad', ['fifth'], offset=2, val=2)
        doc1.apply_op(op6)
        self.assertEqual(doc1.get_value(['fifth']), [55,66])

开发者ID:citizencurator，项目名称:majormajor，代码行数:35，代码来源:document_tests.py

示例4: test_call_pod_renderer_with_document_file_and_context_and_result_file_in_render_then_call_run

 def test_call_pod_renderer_with_document_file_and_context_and_result_file_in_render_then_call_run(self, renderer):
     doc = Document(context="context")
     doc.document_file = "document"
     doc.result_file = "result"
     doc.render()
     renderer.assert_called_once_with("document", "context", "result")
     renderer.return_value.run.assert_called_once_with()

开发者ID:stclair，项目名称:pyowa-pod，代码行数:7，代码来源:tests.py

示例5: test_array_insert

    def test_array_insert(self):
        doc0 =  Document()
        doc0.snapshot = []
        doc1 = self.doc1
        doc2 = self.doc2

        # whole doc is just an empty array. alter it
        op1 = Op('ai', [], val='c', offset=0)
        doc0.apply_op(op1)
        self.assertEqual(doc0.snapshot, ['c'])
        # insert at start
        op2 = Op('ai', [], val='a', offset=0)
        doc0.apply_op(op2)
        self.assertEqual(doc0.snapshot, ['a', 'c'])
        # insert at end
        op3 = Op('ai', [], val='d', offset=2)
        doc0.apply_op(op3)
        self.assertEqual(doc0.snapshot, ['a','c','d'])
        # insert in middle
        op4 = Op('ai', [], val='b', offset=1)
        doc0.apply_op(op4)
        self.assertEqual(doc0.snapshot, ['a','b','c','d'])

        # insert into some array deep in doc
        op5 = Op('ai', [3,1], val='a', offset=1)
        doc2.apply_op(op5)
        self.assertEqual(doc2.get_value([3,1]), ['dimen', 'a'])

        # again
        op6 = Op('ai', ['fifth'], val='a', offset=1)
        doc1.apply_op(op6)
        result6 = [55,'a',66,{'sixth': 'deep string'}, 'rw']
        self.assertEqual(doc1.get_value(['fifth']), result6)

开发者ID:citizencurator，项目名称:majormajor，代码行数:33，代码来源:document_tests.py

示例6: categorize_document

def categorize_document(unknown_document, k):
    nearest_neighbors = dict()

    for football_document in footballDocuments:
        distance = Document.calculate_tanimoto_distance(unknown_document, football_document)
        print(distance)
        if nearest_neighbors.__len__() < k:
            nearest_neighbors[distance] = football_document.category
        else:
            update_neighbors(nearest_neighbors, football_document.category, distance)

    print("\n")

    for python_document in pythonDocuments:
        distance = Document.calculate_tanimoto_distance(unknown_document, python_document)
        print(distance)
        if nearest_neighbors.__len__() < k:
            nearest_neighbors[distance] = python_document.category
        else:
            update_neighbors(nearest_neighbors, python_document.category, distance)

    football_documents_count = 0
    python_documents_count = 0

    for value in nearest_neighbors.values():
        if value == Category.Football:
            football_documents_count += 1
        elif value == Category.Python:
            python_documents_count += 1

    if football_documents_count >= python_documents_count:
        document.category = Category.Football
    elif football_documents_count < python_documents_count:
        document.category = Category.Python

开发者ID:pkt-fit-knu，项目名称:I21-07，代码行数:34，代码来源:program.py

示例7: run

    def run(self, index_file):
        """
        Generate the features using Top N algorithm
        """
        with open(index_file) as f:
            lines = f.readlines()
            for line in lines:
                name = line[:-1]
                with open("../data/scoped/%s" % name, 'r') as d:
                    document = Document(d.read())
                    self.table.add_document(name, document.content_lower)

        new_data_set = self.table.top_n_words(10)
        for document_name, words in new_data_set.iteritems():

            with open("../data/scoped/%s" % document_name, 'r') as d:
                    document = Document(d.read())

            path_name = "../data/features/%s" % document_name

            with open("%s" % path_name, 'w') as f:
                for word in words:
                    for _ in xrange(document.count(word)):
                        f.write(word)
                        f.write("\n")

开发者ID:film42，项目名称:lda-topic-modeling，代码行数:25，代码来源:feature_selector.py

示例8: init

 def __init__(self, json_str):
     Document.__init__(self)
     self.json_object = json.loads(json_str)
     if "document_width" in self.json_object:
         self.document_width = self.json_object["document_width"]
     for field in self.json_object["fields"]:
         self.add_field(Field(unicode(field["text"]), field["x"], field["y"], field["length"]))

开发者ID:AlejoAsd，项目名称:lx300printerHelper，代码行数:7，代码来源:json_document.py

示例9: TestDocument

class TestDocument(unittest.TestCase):
    def setUp(self):
        self.d = Document()
        self.d.insert("a")

    def test_cursor(self):
        self.assertEqual(self.d.cursor.position, 1)
        self.d.save("tst")
        try:
            remove("tst")
        except OSError:
            pass
        self.d.cursor.back()
        self.d.delete()
        self.assertEqual(self.d.cursor.position, 0)

    def test_multiple_chars_and_escape(self):
        self.d.cursor.home()
        self.d.delete()
        string = ["h", "e", "l", "l", "o", "\n", "w", "o", "r", "l", "d", "!"]
        for i in string:
            self.d.insert(i)
        self.assertEqual(self.d.string, "hello\nworld!")

    def test_string_property(self):
        self.assertEqual(self.d.string, "a")

开发者ID:jainarchita，项目名称:60-days-of-python，代码行数:26，代码来源:test_document.py

示例10: test_word_tokenizing

 def test_word_tokenizing(self):
     text = "This is a test sentence."
     with open("../process/tmp_test_file.txt", "w") as test_file:
         test_file.write(text)
     d = Document("tmp_test_file.txt", "testuser")
     d.preprocess_text()
     self.assertEqual(d.preprocessed['tokens'], 6, "word tokenizing failed, incorrect number of tokens")

开发者ID:abeautifulman，项目名称:DoubleCheck，代码行数:7，代码来源:test_suite.py

示例11: test_textWithWeirdFormatting

	def test_textWithWeirdFormatting (self):
		sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '11_weird_formatting.docx')
		docxProcessor = DocxProcessor(sampleDocxFile)
		document = docxProcessor.document()
		expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_11'))

		self.assertEquals(expectedDocument.content(), document.content())
		self.assertEquals(expectedDocument.formatting(), document.formatting())

开发者ID:gcsolaroli，项目名称:metadata-processor，代码行数:8，代码来源:test_docx.py

示例12: test_parseSimplePdf

	def test_parseSimplePdf (self):
		samplePdfFile = os.path.join(os.getcwd(), 'samples', 'pdf', '01_simple_text.pdf')
		pdfProcessor = PdfProcessor(samplePdfFile)
		document = pdfProcessor.document()
		expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_01'))

		self.assertEquals(expectedDocument.content(), document.content())
		self.assertEquals(expectedDocument.formatting(), document.formatting())

开发者ID:gcsolaroli，项目名称:metadata-processor，代码行数:8，代码来源:_test_pdf.py

示例13: test_textWithLineBlocks

	def test_textWithLineBlocks (self):
		sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '08_line_blocks.docx')
		docxProcessor = DocxProcessor(sampleDocxFile)
		document = docxProcessor.document()
		expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_08'))

		self.assertEquals(expectedDocument.content(),	 document.content())
		self.assertEquals(expectedDocument.formatting(), document.formatting())