本文整理汇总了Python中LTTL.Segmenter类的典型用法代码示例。如果您正苦于以下问题:Python Segmenter类的具体用法?Python Segmenter怎么用?Python Segmenter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Segmenter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_recode_overlapping_segmentation
def test_recode_overlapping_segmentation(self):
"""Does recode raise exception for overlapping segmentation?"""
with self.assertRaises(
ValueError,
msg="recode doesn't raise exception for overlapping segmentation!"
):
Segmenter.recode(
self.overlapping_seg,
)
示例2: test_import_xml_exception_missing_opening
def test_import_xml_exception_missing_opening(self):
"""Does import_xml detect missing opening tag?"""
with self.assertRaises(
ValueError,
msg="import_xml doesn't detect missing opening tag!"
):
Segmenter.import_xml(
self.wrong_xml_seg2,
element='a',
)
示例3: test_tokenize_exception_mode
def test_tokenize_exception_mode(self):
"""Does tokenize raise exception for unknown mode?"""
with self.assertRaises(
ValueError,
msg="tokenize doesn't raise exception for unknown mode!"
):
Segmenter.tokenize(
self.entire_text_seg,
[(re.compile(r'\W+'), 'unknown_mode')],
)
示例4: test_auto_number_autonumber
def test_auto_number_autonumber(self):
"""Does _auto_number autonumber in place?"""
Segmenter._auto_number(
self.third_letter_seg,
annotation_key='num',
)
self.assertEqual(
[s.annotations['num'] for s in self.third_letter_seg],
[1, 2, 3],
msg="_auto_number doesn't autonumber in place!"
)
示例5: test_sample_exception_mode
def test_sample_exception_mode(self):
"""Does sample raise exception for unknown mode?"""
with self.assertRaises(
ValueError,
msg="sample doesn't raise exception for unknown mode!"
):
Segmenter.sample(
self.entire_text_seg,
sample_size=3,
mode='unknown_mode',
)
示例6: test_parse_xml_tag_is_opening
def test_parse_xml_tag_is_opening(self):
"""Does _parse_xml_tag recognize opening tags?"""
tags = [
Segmenter._parse_xml_tag('<a>'),
Segmenter._parse_xml_tag('<a attr="1"/>'),
Segmenter._parse_xml_tag('</a>'),
]
self.assertEqual(
[tag['is_opening'] for tag in tags],
[True, True, False],
msg="_parse_xml_tag doesn't recognize opening tags!"
)
示例7: test_parse_xml_tag_element_name
def test_parse_xml_tag_element_name(self):
"""Does _parse_xml_tag parse element name?"""
tags = [
Segmenter._parse_xml_tag('<a>'),
Segmenter._parse_xml_tag('<a attr="1">'),
Segmenter._parse_xml_tag('</a>'),
Segmenter._parse_xml_tag('<a/>'),
]
self.assertEqual(
[tag['element'] for tag in tags],
['a', 'a', 'a', 'a'],
msg="_parse_xml_tag doesn't parse element name!"
)
示例8: test_parse_xml_tag_is_empty
def test_parse_xml_tag_is_empty(self):
"""Does _parse_xml_tag recognize empty elements?"""
tags = [
Segmenter._parse_xml_tag('<a>'),
Segmenter._parse_xml_tag('</a>'),
Segmenter._parse_xml_tag('<a/>'),
Segmenter._parse_xml_tag('<a attr="1"/>'),
]
self.assertEqual(
[tag['is_empty'] for tag in tags],
[False, False, True, True],
msg="_parse_xml_tag doesn't recognize empty elements!"
)
示例9: test_concatenate_progress
def test_concatenate_progress(self):
"""Does concatenate track progress?"""
def progress_callback():
"""Mock progress callback"""
self.count += 1
Segmenter.concatenate(
[self.letter_seg1],
progress_callback=progress_callback,
)
self.assertEqual(
self.count,
len(self.letter_seg1),
msg="concatenate doesn't track progress!"
)
示例10: test_bypass_deepcopy
def test_bypass_deepcopy(self):
"""Does bypass deep copy input segments?"""
segmentation = Segmenter.bypass(self.letter_seg)
self.assertNotEqual(
segmentation,
self.letter_seg,
msg="bypass doesn't deep copy input segments!"
)
示例11: test_bypass_copy_annotations
def test_bypass_copy_annotations(self):
"""Does bypass copy annotations?"""
segmentation = Segmenter.bypass(self.other_letter_seg)
self.assertEqual(
[s.annotations['a'] for s in segmentation],
[s.annotations['a'] for s in self.other_letter_seg],
msg="bypass doesn't copy annotations!"
)
示例12: test_bypass_copy_segments
def test_bypass_copy_segments(self):
"""Does bypass copy input segments?"""
segmentation = Segmenter.bypass(self.letter_seg)
self.assertEqual(
[s.get_content() for s in segmentation],
[s.get_content() for s in self.letter_seg],
msg="bypass doesn't copy input segments!"
)
示例13: test_import_xml_progress
def test_import_xml_progress(self):
"""Does import_xml track progress?"""
def progress_callback():
"""Mock progress callback"""
self.count += 1
Segmenter.import_xml(
self.broken_xml_seg,
element='a',
progress_callback=progress_callback,
)
self.assertEqual(
self.count,
len(self.broken_xml_seg),
msg="import_xml doesn't track progress!"
)
示例14: test_parse_xml_tag_attributes
def test_parse_xml_tag_attributes(self):
"""Does _parse_xml_tag parse attributes?"""
tag = Segmenter._parse_xml_tag('<a attr1="2" attr3="4">')
self.assertEqual(
tag['attributes'],
{'attr1': '2', 'attr3': '4'},
msg="_parse_xml_tag doesn't parse attributes!"
)
示例15: test_intersect_progress
def test_intersect_progress(self):
"""Does intersect track progress?"""
def progress_callback():
"""Mock progress callback"""
self.count += 1
Segmenter.intersect(
source=self.letter_seg,
filtering=self.third_letter_seg,
progress_callback=progress_callback,
)
self.assertEqual(
self.count,
len(self.letter_seg),
msg="intersect doesn't track progress!"
)