当前位置: 首页>>代码示例>>Python>>正文


Python lancaster.LancasterStemmer方法代码示例

本文整理汇总了Python中nltk.stem.lancaster.LancasterStemmer方法的典型用法代码示例。如果您正苦于以下问题:Python lancaster.LancasterStemmer方法的具体用法?Python lancaster.LancasterStemmer怎么用?Python lancaster.LancasterStemmer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.stem.lancaster的用法示例。


在下文中一共展示了lancaster.LancasterStemmer方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from nltk.stem import lancaster [as 别名]
# 或者: from nltk.stem.lancaster import LancasterStemmer [as 别名]
def __init__(self):

        ###############################################################
        #
        # Sets up all default requirements and placeholders 
        # needed for the NLU engine to run. 
        #
        # - Helpers: Useful global functions
        # - Logging: Logging class
        # - LancasterStemmer: Word stemmer
        #
        ###############################################################
        
        self.ignore  = [',','.','!','?']
        
        self.Helpers = Helpers()
        self._confs  = self.Helpers.loadConfigs()
        self.LogFile = self.Helpers.setLogFile(self._confs["aiCore"]["Logs"]+"JumpWay/")
        
        self.LancasterStemmer = LancasterStemmer() 
开发者ID:GeniSysAI,项目名称:NLU,代码行数:22,代码来源:Data.py

示例2: __init__

# 需要导入模块: from nltk.stem import lancaster [as 别名]
# 或者: from nltk.stem.lancaster import LancasterStemmer [as 别名]
def __init__(self, Logging, LogFile):
        
        self.LancasterStemmer = LancasterStemmer()

        self.Logging          = Logging
        self.LogFile          = LogFile
        
        self.ignore  = [
            '?',
            '!'
        ]
        
        self.Logging.logMessage(
            self.LogFile,
            "Data",
            "INFO",
            "Data Helper Ready") 
开发者ID:GeniSysAI,项目名称:NLU,代码行数:19,代码来源:Users.py

示例3: getTokens

# 需要导入模块: from nltk.stem import lancaster [as 别名]
# 或者: from nltk.stem.lancaster import LancasterStemmer [as 别名]
def getTokens(self, removeStopwords=True):
        """ Tokenizes the text, breaking it up into words, removing punctuation. """
        tokenizer = nltk.RegexpTokenizer('[a-zA-Z]\w+\'?\w*') # A custom regex tokenizer.
        spans = list(tokenizer.span_tokenize(self.text))
        # Take note of how many spans there are in the text
        self.length = spans[-1][-1]
        tokens = tokenizer.tokenize(self.text)
        tokens = [ token.lower() for token in tokens ] # make them lowercase
        stemmer = LancasterStemmer()
        tokens = [ stemmer.stem(token) for token in tokens ]
        if not removeStopwords:
            self.spans = spans
            return tokens
        tokenSpans = list(zip(tokens, spans)) # zip it up
        stopwords = nltk.corpus.stopwords.words('english') # get stopwords
        tokenSpans = [ token for token in tokenSpans if token[0] not in stopwords ] # remove stopwords from zip
        self.spans = [ x[1] for x in tokenSpans ] # unzip; get spans
        return [ x[0] for x in tokenSpans ] # unzip; get tokens 
开发者ID:JonathanReeve,项目名称:text-matcher,代码行数:20,代码来源:matcher.py

示例4: extract

# 需要导入模块: from nltk.stem import lancaster [as 别名]
# 或者: from nltk.stem.lancaster import LancasterStemmer [as 别名]
def extract(self, data=None, splitIt=False):

        ###############################################################
        #
        # Extracts words from sentences, stripping out characters in 
        # the ignore list above
        # 
        # https://www.nltk.org/_modules/nltk/stem/lancaster.html
        # http://insightsbot.com/blog/R8fu5/bag-of-words-algorithm-in-python-introduction
        #
        ###############################################################
        
        return [self.LancasterStemmer.stem(word) for word in (data.split() if splitIt == True else data) if word not in self.ignore] 
开发者ID:GeniSysAI,项目名称:NLU,代码行数:15,代码来源:Data.py

示例5: extract

# 需要导入模块: from nltk.stem import lancaster [as 别名]
# 或者: from nltk.stem.lancaster import LancasterStemmer [as 别名]
def extract(self, data=None, lowerIt=True, splitIt=False, ignoreWords=False):
        
        if ignoreWords:
            return [self.LancasterStemmer.stem(word if lowerIt == False else word.lower()) for word in (data.split() if splitIt == True else data) if word not in self.ignore]
        else:
            return [self.LancasterStemmer.stem(word if lowerIt == False else word.lower()) for word in (data.split() if splitIt == True else data)] 
开发者ID:GeniSysAI,项目名称:NLU,代码行数:8,代码来源:Users.py

示例6: __init__

# 需要导入模块: from nltk.stem import lancaster [as 别名]
# 或者: from nltk.stem.lancaster import LancasterStemmer [as 别名]
def __init__(self):

        ###############################################################
        #
        # Sets up all default requirements
        #
        # - Helpers: Useful global functions
        # - LancasterStemmer: Word stemmer
        #
        ###############################################################
        
        self.Helpers = Helpers()
        self._confs  = self.Helpers.loadConfigs()

        self.stemmer = LancasterStemmer() 
开发者ID:GeniSysAI,项目名称:NLU,代码行数:17,代码来源:Mitie.py

示例7: get_vocabularies

# 需要导入模块: from nltk.stem import lancaster [as 别名]
# 或者: from nltk.stem.lancaster import LancasterStemmer [as 别名]
def get_vocabularies(dataset, vocab_file, nearby_file):
  """Create map from example ID to (basic_words, nearby_words."""
  with open(vocab_file) as f:
    basic_vocab = [line.strip() for line in f]
  with open(nearby_file) as f:
    nearby_words = json.load(f)
  stemmer = LancasterStemmer()
  vocabs = {}
  for a in dataset['data']:
    for p in a['paragraphs']:
      for q in p['qas']:
        q_words = [w.lower() for w in word_tokenize(q['question'])]
        if OPTS.mode == 'basic':
          vocabs[q['id']] = (basic_vocab, [])
        elif OPTS.mode == 'add-question-words':
          vocabs[q['id']] = (basic_vocab, q_words)
        elif OPTS.mode.endswith('-nearby'):
          q_stems = [stemmer.stem(qw) for qw in q_words]
          cur_vocab = [w for w in basic_vocab if w not in q_stems]
          cur_nearby = []
          for q_word, q_stem in zip(q_words, q_stems):
            if q_word in nearby_words:
              qw_nearby = []
              for nearby_word in nearby_words[q_word]:
                if len(qw_nearby) == OPTS.num_nearby: break
                if nearby_word['word'] in PUNCTUATION: continue
                nearby_stem = stemmer.stem(nearby_word['word'])
                if nearby_stem != q_stem:
                  qw_nearby.append(nearby_word['word'])
              cur_nearby.extend(qw_nearby)
          vocabs[q['id']] = (cur_vocab, cur_nearby)
  return vocabs 
开发者ID:robinjia,项目名称:adversarial-squad,代码行数:34,代码来源:adversarial_squad.py


注:本文中的nltk.stem.lancaster.LancasterStemmer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。