当前位置: 首页>>代码示例>>Python>>正文


Python KeyTerms.get_keyterm方法代码示例

本文整理汇总了Python中regparser.layer.key_terms.KeyTerms.get_keyterm方法的典型用法代码示例。如果您正苦于以下问题:Python KeyTerms.get_keyterm方法的具体用法?Python KeyTerms.get_keyterm怎么用?Python KeyTerms.get_keyterm使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在regparser.layer.key_terms.KeyTerms的用法示例。


在下文中一共展示了KeyTerms.get_keyterm方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: collapsed_markers_matches

# 需要导入模块: from regparser.layer.key_terms import KeyTerms [as 别名]
# 或者: from regparser.layer.key_terms.KeyTerms import get_keyterm [as 别名]
def collapsed_markers_matches(node_text, tagged_text):
    """Find collapsed markers, i.e. tree node paragraphs that begin within a
    single XML node, within this text. Remove citations and other false
    positives. This is pretty hacky right now -- it focuses on the plain
    text but takes cues from the tagged text. @todo: streamline logic"""
    # In addition to the regex above, keyterms are an acceptable prefix. We
    # therefore convert keyterms to satisfy the above regex
    node_for_keyterms = Node(node_text, node_type=Node.INTERP,
                             label=[get_first_interp_marker(node_text)])
    node_for_keyterms.tagged_text = tagged_text
    keyterm = KeyTerms.get_keyterm(node_for_keyterms)
    if keyterm:
        node_text = node_text.replace(keyterm, '.'*len(keyterm))

    collapsed_markers = []
    for marker in _first_markers:
        possible = ((m, m.start(), m.end())
                    for m in marker.finditer(node_text) if m.start() > 0)
        possible = remove_citation_overlaps(node_text, possible)
        # If certain characters follow, kill it
        for following in ("e.", ")", u"”", '"', "'"):
            possible = [(m, s, end) for m, s, end in possible
                        if not node_text[end:].startswith(following)]
        possible = [m for m, _, _ in possible]
        # As all "1." collapsed markers must be emphasized, run a quick
        # check to weed out some false positives
        if '<E T="03">1' not in tagged_text:
            possible = filter(lambda m: m.group(1) != '1', possible)
        collapsed_markers.extend(possible)
    return collapsed_markers
开发者ID:cmc333333,项目名称:regulations-parser,代码行数:32,代码来源:interpretations.py

示例2: process

# 需要导入模块: from regparser.layer.key_terms import KeyTerms [as 别名]
# 或者: from regparser.layer.key_terms.KeyTerms import get_keyterm [as 别名]
    def process(self, node):
        label = node.label_id()
        if label in self.model_forms_nodes and self.model_forms_nodes[label]:
            keyterm = KeyTerms.get_keyterm(node)

            if keyterm:
                end = '</E>'
                node_text = node.text[
                    node.text.find(end) + len(end):].split(' ')
            else:
                node_text = KeyTerms.process_node_text(node).split(' ')

            start_of_model_form = node_text[0]
            end_of_model_form = node_text[-1]

            if start_of_model_form and end_of_model_form:
                list_of_ends = [w for w in node_text if w == end_of_model_form]
                location_end = len(list_of_ends) - 1

                layer_el = [{
                    'start_word': start_of_model_form,
                    'start_locations': [0],
                    'end_word': end_of_model_form,
                    'end_locations':[location_end]
                }]
                return layer_el
开发者ID:EricSchles,项目名称:regulations-parser,代码行数:28,代码来源:model_forms_text.py

示例3: paragraph_with_marker

# 需要导入模块: from regparser.layer.key_terms import KeyTerms [as 别名]
# 或者: from regparser.layer.key_terms.KeyTerms import get_keyterm [as 别名]
    def paragraph_with_marker(self, text, tagged_text):
        """The paragraph has a marker, like (a) or a. etc."""
        # To aid in determining collapsed paragraphs, replace any
        # keyterms present
        node_for_keyterms = Node(text, node_type=Node.APPENDIX)
        node_for_keyterms.tagged_text = tagged_text
        node_for_keyterms.label = [initial_marker(text)[0]]
        keyterm = KeyTerms.get_keyterm(node_for_keyterms)
        if keyterm:
            mtext = text.replace(keyterm, ';'*len(keyterm))
        else:
            mtext = text

        for mtext in split_paragraph_text(mtext):
            if keyterm:     # still need the original text
                mtext = mtext.replace(';'*len(keyterm), keyterm)
            # label_candidate = [initial_marker(mtext)[0]]
            # existing_node = None
            # for node in self.nodes:
            #     if node.label == label_candidate:
            #         existing_node = node
            # if existing_node:
            #     self.paragraph_counter += 1
            #     node = Node(mtext, node_type=Node.APPENDIX,
            #                 label=['dup{}'.format(self.paragraph_counter),
            #                        initial_marker(mtext)[0]])
            # else:
            node = Node(mtext, node_type=Node.APPENDIX,
                        label=[initial_marker(mtext)[0]])
            node.tagged_text = tagged_text
            self.nodes.append(node)
开发者ID:phildini,项目名称:regulations-parser,代码行数:33,代码来源:appendices.py

示例4: replace_markerless

# 需要导入模块: from regparser.layer.key_terms import KeyTerms [as 别名]
# 或者: from regparser.layer.key_terms.KeyTerms import get_keyterm [as 别名]
 def replace_markerless(self, stack, node, depth):
     """Assign a unique index to all of the MARKERLESS paragraphs"""
     if node.label[-1] == mtypes.MARKERLESS:
         keyterm = KeyTerms.get_keyterm(node, ignore_definitions=False)
         if keyterm:
             p_num = keyterm_to_int(keyterm)
         else:
             # len(n.label[-1]) < 6 filters out keyterm nodes
             p_num = sum(n.is_markerless() and len(n.label[-1]) < 6
                         for n in stack.peek_level(depth)) + 1
         node.label[-1] = 'p{}'.format(p_num)
开发者ID:vrajmohan,项目名称:regulations-parser,代码行数:13,代码来源:paragraph_processor.py

示例5: paragraph_with_marker

# 需要导入模块: from regparser.layer.key_terms import KeyTerms [as 别名]
# 或者: from regparser.layer.key_terms.KeyTerms import get_keyterm [as 别名]
    def paragraph_with_marker(self, text, tagged_text):
        """The paragraph has a marker, like (a) or a. etc."""
        # To aid in determining collapsed paragraphs, replace any
        # keyterms present
        node_for_keyterms = Node(text, node_type=Node.APPENDIX)
        node_for_keyterms.tagged_text = tagged_text
        node_for_keyterms.label = [initial_marker(text)[0]]
        keyterm = KeyTerms.get_keyterm(node_for_keyterms)
        if keyterm:
            mtext = text.replace(keyterm, '.'*len(keyterm))
        else:
            mtext = text

        for mtext in split_paragraph_text(mtext):
            if keyterm:     # still need the original text
                mtext = mtext.replace('.'*len(keyterm), keyterm)
            node = Node(mtext, node_type=Node.APPENDIX,
                        label=[initial_marker(mtext)[0]])
            self.nodes.append(node)
开发者ID:EricSchles,项目名称:regulations-parser,代码行数:21,代码来源:appendices.py


注:本文中的regparser.layer.key_terms.KeyTerms.get_keyterm方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。