本文整理汇总了Python中document.Document类的典型用法代码示例。如果您正苦于以下问题:Python Document类的具体用法?Python Document怎么用?Python Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Document类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_document_data
def generate_document_data(chapter_paths, word_count):
"""
Generate visualization data for a set of chapters.
Given input chapters we want to find both the unique words being used inside
of each chapter and how frequent they are within the text as a whole.
chapter_paths - A list of paths to chapters
word_count - The number of most frequent words to grab for each chapter
Returns a list looking like this:
[
[
{
"word": wart
"freq": .7
"uniqueness": .5
"pos": .1
}
],
]
This is a list of chapters, where each chapter is a list of word
dictionaries and each word dictionary has the word itself, the frequency of
the word in that chapter, the uniqueness of the word overall, and the first
position the word is observed. All of the latter three values are scaled
from 0-1 with respect to the chapter (the most frequent word receives a 1,
for instance).
"""
document = Document(chapter_paths)
return [generate_chapter_data(word_list, word_count, document) for word_list
in document.get_chapters()]
示例2: Probability
def Probability(self, doc, dclass = ""):
"""Calculates the probability for a class dclass given a document doc"""
if dclass:
sum_dclass = self.sum_words_in_class(dclass)
prob = 0
d = Document(self.__vocabulary)
d.read_document(doc)
for j in self.__document_classes:
sum_j = self.sum_words_in_class(j)
prod = 1
for i in d.Words():
wf_dclass = 1 + self.__document_classes[dclass].WordFreq(i)
wf = 1 + self.__document_classes[j].WordFreq(i)
r = wf * sum_dclass / (wf_dclass * sum_j)
prod *= r
prob += prod * self.__document_classes[j].NumberOfDocuments() / self.__document_classes[dclass].NumberOfDocuments()
if prob != 0:
return 1 / prob
else:
return -1
else:
prob_list = []
for dclass in self.__document_classes:
prob = self.Probability(doc, dclass)
prob_list.append([dclass,prob])
prob_list.sort(key = lambda x: x[1], reverse = True)
return prob_list
示例3: test_array_delete
def test_array_delete(self):
doc0 = Document()
doc0.snapshot = []
doc1 = self.doc1
doc2 = self.doc2
# can technically delete nothing from empty list. why not
op1 = Op('ad', [], offset=0, val=0)
doc0.apply_op(op1)
self.assertEqual(doc0.snapshot, [])
# remove one from list
op2 = Op('ad', [], offset=1, val=1)
doc2.apply_op(op2)
self.assertEqual(doc2.get_value([1]), 'normal, ol string')
# from nested lists
op3 = Op('ad', [2], offset=1, val=1)
doc2.apply_op(op3)
self.assertEqual(doc2.get_value([2]), [['multi'],['array']])
# delete multiple elements
op4 = Op('ad', [], offset=0, val=4)
doc2.apply_op(op4)
self.assertEqual(doc2.snapshot, [None, 42])
# delete last in list:
op5 = Op('ad', [], offset=1, val=1)
doc2.apply_op(op5)
self.assertEqual(doc2.snapshot, [None])
# in dicts
op6 = Op('ad', ['fifth'], offset=2, val=2)
doc1.apply_op(op6)
self.assertEqual(doc1.get_value(['fifth']), [55,66])
示例4: test_call_pod_renderer_with_document_file_and_context_and_result_file_in_render_then_call_run
def test_call_pod_renderer_with_document_file_and_context_and_result_file_in_render_then_call_run(self, renderer):
doc = Document(context="context")
doc.document_file = "document"
doc.result_file = "result"
doc.render()
renderer.assert_called_once_with("document", "context", "result")
renderer.return_value.run.assert_called_once_with()
示例5: test_array_insert
def test_array_insert(self):
doc0 = Document()
doc0.snapshot = []
doc1 = self.doc1
doc2 = self.doc2
# whole doc is just an empty array. alter it
op1 = Op('ai', [], val='c', offset=0)
doc0.apply_op(op1)
self.assertEqual(doc0.snapshot, ['c'])
# insert at start
op2 = Op('ai', [], val='a', offset=0)
doc0.apply_op(op2)
self.assertEqual(doc0.snapshot, ['a', 'c'])
# insert at end
op3 = Op('ai', [], val='d', offset=2)
doc0.apply_op(op3)
self.assertEqual(doc0.snapshot, ['a','c','d'])
# insert in middle
op4 = Op('ai', [], val='b', offset=1)
doc0.apply_op(op4)
self.assertEqual(doc0.snapshot, ['a','b','c','d'])
# insert into some array deep in doc
op5 = Op('ai', [3,1], val='a', offset=1)
doc2.apply_op(op5)
self.assertEqual(doc2.get_value([3,1]), ['dimen', 'a'])
# again
op6 = Op('ai', ['fifth'], val='a', offset=1)
doc1.apply_op(op6)
result6 = [55,'a',66,{'sixth': 'deep string'}, 'rw']
self.assertEqual(doc1.get_value(['fifth']), result6)
示例6: categorize_document
def categorize_document(unknown_document, k):
nearest_neighbors = dict()
for football_document in footballDocuments:
distance = Document.calculate_tanimoto_distance(unknown_document, football_document)
print(distance)
if nearest_neighbors.__len__() < k:
nearest_neighbors[distance] = football_document.category
else:
update_neighbors(nearest_neighbors, football_document.category, distance)
print("\n")
for python_document in pythonDocuments:
distance = Document.calculate_tanimoto_distance(unknown_document, python_document)
print(distance)
if nearest_neighbors.__len__() < k:
nearest_neighbors[distance] = python_document.category
else:
update_neighbors(nearest_neighbors, python_document.category, distance)
football_documents_count = 0
python_documents_count = 0
for value in nearest_neighbors.values():
if value == Category.Football:
football_documents_count += 1
elif value == Category.Python:
python_documents_count += 1
if football_documents_count >= python_documents_count:
document.category = Category.Football
elif football_documents_count < python_documents_count:
document.category = Category.Python
示例7: run
def run(self, index_file):
"""
Generate the features using Top N algorithm
"""
with open(index_file) as f:
lines = f.readlines()
for line in lines:
name = line[:-1]
with open("../data/scoped/%s" % name, 'r') as d:
document = Document(d.read())
self.table.add_document(name, document.content_lower)
new_data_set = self.table.top_n_words(10)
for document_name, words in new_data_set.iteritems():
with open("../data/scoped/%s" % document_name, 'r') as d:
document = Document(d.read())
path_name = "../data/features/%s" % document_name
with open("%s" % path_name, 'w') as f:
for word in words:
for _ in xrange(document.count(word)):
f.write(word)
f.write("\n")
示例8: __init__
def __init__(self, json_str):
Document.__init__(self)
self.json_object = json.loads(json_str)
if "document_width" in self.json_object:
self.document_width = self.json_object["document_width"]
for field in self.json_object["fields"]:
self.add_field(Field(unicode(field["text"]), field["x"], field["y"], field["length"]))
示例9: TestDocument
class TestDocument(unittest.TestCase):
def setUp(self):
self.d = Document()
self.d.insert("a")
def test_cursor(self):
self.assertEqual(self.d.cursor.position, 1)
self.d.save("tst")
try:
remove("tst")
except OSError:
pass
self.d.cursor.back()
self.d.delete()
self.assertEqual(self.d.cursor.position, 0)
def test_multiple_chars_and_escape(self):
self.d.cursor.home()
self.d.delete()
string = ["h", "e", "l", "l", "o", "\n", "w", "o", "r", "l", "d", "!"]
for i in string:
self.d.insert(i)
self.assertEqual(self.d.string, "hello\nworld!")
def test_string_property(self):
self.assertEqual(self.d.string, "a")
示例10: test_word_tokenizing
def test_word_tokenizing(self):
text = "This is a test sentence."
with open("../process/tmp_test_file.txt", "w") as test_file:
test_file.write(text)
d = Document("tmp_test_file.txt", "testuser")
d.preprocess_text()
self.assertEqual(d.preprocessed['tokens'], 6, "word tokenizing failed, incorrect number of tokens")
示例11: test_textWithWeirdFormatting
def test_textWithWeirdFormatting (self):
sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '11_weird_formatting.docx')
docxProcessor = DocxProcessor(sampleDocxFile)
document = docxProcessor.document()
expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_11'))
self.assertEquals(expectedDocument.content(), document.content())
self.assertEquals(expectedDocument.formatting(), document.formatting())
示例12: test_parseSimplePdf
def test_parseSimplePdf (self):
samplePdfFile = os.path.join(os.getcwd(), 'samples', 'pdf', '01_simple_text.pdf')
pdfProcessor = PdfProcessor(samplePdfFile)
document = pdfProcessor.document()
expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_01'))
self.assertEquals(expectedDocument.content(), document.content())
self.assertEquals(expectedDocument.formatting(), document.formatting())
示例13: test_textWithLineBlocks
def test_textWithLineBlocks (self):
sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '08_line_blocks.docx')
docxProcessor = DocxProcessor(sampleDocxFile)
document = docxProcessor.document()
expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_08'))
self.assertEquals(expectedDocument.content(), document.content())
self.assertEquals(expectedDocument.formatting(), document.formatting())
示例14: testDeselectsWhenClickingElsewhere
def testDeselectsWhenClickingElsewhere(self):
document = Document()
document.new_shape()
document.current_shape.append_point((0, 0))
tool = SelectTool(document)
_perform_click(tool, 0, 0)
_perform_click(tool, 1000, 0) # Click far away
self.assertTrue(document.selected_point_index is None)
示例15: testSelectsWhenMouseClicked
def testSelectsWhenMouseClicked(self):
document = Document()
document.new_shape()
document.current_shape.append_point((0, 0))
tool = SelectTool(document)
self.assertTrue(document.selected_point_index is None)
_perform_click(tool, 0, 0)
self.assertTrue(document.selected_point_index == 0)