當前位置: 首頁>>代碼示例>>Python>>正文


Python xmldocs.XMLCorpusView類代碼示例

本文整理匯總了Python中nltk.corpus.reader.xmldocs.XMLCorpusView的典型用法代碼示例。如果您正苦於以下問題:Python XMLCorpusView類的具體用法?Python XMLCorpusView怎麽用?Python XMLCorpusView使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了XMLCorpusView類的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

 def __init__(self, filename, **kwargs):
     self.tags = kwargs.pop('tags', None)
     self.tagspec = '.*/seg/fs'
     self.xml_tool = XML_Tool(filename, 'ann_morphosyntax.xml')
     XMLCorpusView.__init__(
         self, self.xml_tool.build_preprocessed_file(), self.tagspec
     )
開發者ID:prz3m,項目名稱:kind2anki,代碼行數:7,代碼來源:nkjp.py

示例2: __init__

    def __init__(self, fileid, sent, tag, strip_space, stem):
        """
        :param fileid: The name of the underlying file.
        :param sent: If true, include sentence bracketing.
        :param tag: The name of the tagset to use, or None for no tags.
        :param strip_space: If true, strip spaces from word tokens.
        :param stem: If true, then substitute stems for words.
        """
        if sent:
            tagspec = '.*/s'
        else:
            tagspec = '.*/s/(.*/)?(c|w)'
        self._sent = sent
        self._tag = tag
        self._strip_space = strip_space
        self._stem = stem

        self.title = None  #: Title of the document.
        self.author = None  #: Author of the document.
        self.editor = None  #: Editor
        self.resps = None  #: Statement of responsibility

        XMLCorpusView.__init__(self, fileid, tagspec)

        # Read in a tasty header.
        self._open()
        self.read_block(self._stream, '.*/teiHeader$', self.handle_header)
        self.close()

        # Reset tag context.
        self._tag_context = {0: ()}
開發者ID:Journo-App,項目名稱:flask-by-example,代碼行數:31,代碼來源:bnc.py

示例3: __init__

 def __init__(self, filename, **kwargs):
     self.mode = kwargs.pop('mode', 0)
     self.tagspec = '.*/div/ab'
     self.segm_dict = dict()
     #xml preprocessing
     self.xml_tool = XML_Tool(filename, 'text.xml')
     #base class init
     XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
開發者ID:esabelhaus,項目名稱:secret-octo-dubstep,代碼行數:8,代碼來源:nkjp.py

示例4: read_block

 def read_block(self, stream, tagspec=None, elt_handler=None):
     return list(
         filter(
             lambda x: x is not None,
             XMLCorpusView.read_block(self, stream, tagspec, elt_handler),
         )
     )
開發者ID:prz3m,項目名稱:kind2anki,代碼行數:7,代碼來源:mte.py

示例5: __init__

    def __init__(self, fileid, unit, bracket_sent, pos_tag, sem_tag):
        """
        :param fileid: The name of the underlying file.
        :param unit: One of `'token'`, `'word'`, or `'chunk'`.
        :param bracket_sent: If true, include sentence bracketing.
        :param pos_tag: Whether to include part-of-speech tags.
        :param sem_tag: Whether to include semantic tags, namely WordNet lemma
            and OOV named entity status.
        """
        if bracket_sent: tagspec = '.*/s'
        else: tagspec = '.*/s/(punc|wf)'

        self._unit = unit
        self._sent = bracket_sent
        self._pos_tag = pos_tag
        self._sem_tag = sem_tag

        XMLCorpusView.__init__(self, fileid, tagspec)
開發者ID:brymaven,項目名稱:nltk,代碼行數:18,代碼來源:semcor.py

示例6: handle_query

 def handle_query(self):
     self._open()
     header = []
     while True:
         segm = XMLCorpusView.read_block(self, self._stream)
         if len(segm) == 0:
             break
         header.extend(segm)
     self.close()
     return header
開發者ID:esabelhaus,項目名稱:secret-octo-dubstep,代碼行數:10,代碼來源:nkjp.py

示例7: _detect_encoding

    def _detect_encoding(self, fileid):
        if isinstance(fileid, PathPointer): 
            s = fileid.open().readline() 
        else: 
            s = open(fileid, 'rb').readline()
        
        m = re.search(r'encoding="([^"]+)"', s)
        if m: return m.group(1)
        m = re.search(r"encoding='([^']+)'", s)
        if m: return m.group(1)

        return XMLCorpusView._detect_encoding(self, fileid)
開發者ID:IMAmuseum,項目名稱:getty-vocab-reconciliation,代碼行數:12,代碼來源:getty.py

示例8: read_block

    def read_block(self, stream, tagspec=None, elt_handler=None):
        """
        Returns text as a list of sentences.
        """
        txt = []
        while True:
            segm = XMLCorpusView.read_block(self, stream)
            if len(segm) == 0:
                break
            for part in segm:
                txt.append(part)

        return [' '.join([segm for segm in txt])]
開發者ID:esabelhaus,項目名稱:secret-octo-dubstep,代碼行數:13,代碼來源:nkjp.py

示例9: __init__

 def __init__(self, fileid, tagspec, elt_handler=None):
     XMLCorpusView.__init__(self, fileid, tagspec, elt_handler)
開發者ID:Copper-Head,項目名稱:nltk,代碼行數:2,代碼來源:mte.py

示例10: zip

    np.savetxt("lsa_model.csv", matrix, delimiter="\t")  # raw output

    doc_2d = []
    for doc, file in zip(matrix, filenames):  # reduce the data to 2 dimensions
        # print(file, "\n", doc, "\n\n")    # debug msg
        doc_2d.append(TSNE().fit_transform(doc).tolist()[0])

    matrix = np.asarray(doc_2d)  # update matrix array

    # raw output
    np.savetxt("lsa_reduced.csv", matrix, delimiter="\t")  # raw output

    # build list of tags from the metadata
    metadata = pd.DataFrame(index=filenames, columns=["Tags"])

    view = XMLCorpusView("txt/export-abstracts.xml", ".*/article")
    iter = view.iterate_from(0)
    for entry in iter:
        metadata.loc[entry.attrib["{http://www.w3.org/XML/1998/namespace}id"] + ".txt", "Tags"] = entry.attrib["type"]

    metadata.to_csv("lsa_metadata.csv")

    ##############################################################################
    # CLUSTERING

    print("clustering ...\n")

    # af = AffinityPropagation(damping=0.9, affinity="euclidean", preference=-50).fit(matrix)
    af = AffinityPropagation().fit(matrix)  # default

    cluster_centers_indices = af.cluster_centers_indices_
開發者ID:stefanpernes,項目名稱:word-embedding,代碼行數:31,代碼來源:lsa.py


注:本文中的nltk.corpus.reader.xmldocs.XMLCorpusView類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。