当前位置: 首页>>代码示例>>Python>>正文


Python wordnet_ic.ic函数代码示例

本文整理汇总了Python中nltk.corpus.wordnet_ic.ic函数的典型用法代码示例。如果您正苦于以下问题:Python ic函数的具体用法?Python ic怎么用?Python ic使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了ic函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: similarity_by_infocontent

def similarity_by_infocontent(sense1, sense2, option):
    """ Returns similarity scores by information content. """
    if sense1.pos != sense2.pos: # infocontent sim can't do diff POS.
        return 0

    info_contents = ['ic-bnc-add1.dat', 'ic-bnc-resnik-add1.dat', 
                     'ic-bnc-resnik.dat', 'ic-bnc.dat', 
                     
                     'ic-brown-add1.dat', 'ic-brown-resnik-add1.dat', 
                     'ic-brown-resnik.dat', 'ic-brown.dat', 
                     
                     'ic-semcor-add1.dat', 'ic-semcor.dat',
                      
                     'ic-semcorraw-add1.dat', 'ic-semcorraw-resnik-add1.dat', 
                     'ic-semcorraw-resnik.dat', 'ic-semcorraw.dat', 
                     
                     'ic-shaks-add1.dat', 'ic-shaks-resnik.dat', 
                     'ic-shaks-resnink-add1.dat', 'ic-shaks.dat', 
                     
                     'ic-treebank-add1.dat', 'ic-treebank-resnik-add1.dat', 
                     'ic-treebank-resnik.dat', 'ic-treebank.dat']
  
    if option in ['res', 'resnik']:
        return wn.res_similarity(sense1, sense2, wnic.ic('ic-bnc-resnik-add1.dat'))
    #return min(wn.res_similarity(sense1, sense2, wnic.ic(ic)) \
    #             for ic in info_contents)

    elif option in ['jcn', "jiang-conrath"]:
        return wn.jcn_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))
  
    elif option in ['lin']:
        return wn.lin_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))
开发者ID:ChenglongChen,项目名称:pywsd,代码行数:32,代码来源:similarity.py

示例2: similarity

def similarity(word1, word2, tag):
    obj1 = wn.synset(word1 + "."+ tag+".01")
    obj2 = wn.synset(word2 + "."+ tag+".01")
    #print(obj1)
    brown_ic = wordnet_ic.ic('ic-brown.dat') 	# Information content
    semcor_ic = wordnet_ic.ic('ic-brown.dat')
    value = obj1.res_similarity(obj2, brown_ic)
    return value
开发者ID:Lightyagami1,项目名称:exploratoryProject,代码行数:8,代码来源:project.py

示例3: test_wordnet_similarities

 def test_wordnet_similarities(self):
     # Path based similarities.
     self.assertAlmostEqual(S('cat.n.01').path_similarity(S('cat.n.01')), 1.0)
     self.assertAlmostEqual(S('dog.n.01').path_similarity(S('cat.n.01')), 0.2)
     self.assertAlmostEqual(S('dog.n.01').lch_similarity(S('cat.n.01')), 2.028, places=3)
     self.assertAlmostEqual(S('dog.n.01').wup_similarity(S('cat.n.01')), 0.8571, places=3)
     # Information Content similarities.
     brown_ic = wnic.ic('ic-brown.dat')
     self.assertAlmostEqual(S('dog.n.01').jcn_similarity(S('cat.n.01'), brown_ic), 0.4497, places=3)
     semcor_ic = wnic.ic('ic-semcor.dat')
     self.assertAlmostEqual(S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3)
开发者ID:alpaco42,项目名称:ML_Spring_2018,代码行数:11,代码来源:test_wordnet.py

示例4: _other_recognition

    def _other_recognition(self, tagged_sentences, all_entities, question):
        # Nouns retrieval
        nouns = []
        for sentence in tagged_sentences:
            nouns += filter(lambda x: x[1] == "NN", sentence)
        nouns = [noun for (noun, tag) in nouns]

        # Nouns filtering
        # Remove all entities that are nouns
        all_entities = set(itertools.chain(*map(str.split, all_entities)))
        nouns = [noun for noun in nouns if noun not in all_entities]

        features = QuestionClassifier.get_features(question.text, "hn")
        head = features["head"]
        if head == "":
            return nouns

        # Filter nouns with WordNet synsets
        try:
            threshold = float(MyConfig.get("answer_extraction", "other_threshold"))
        except MyConfigException as e:
            logger = logging.getLogger("qa_logger")
            logger.warning(str(e))
            threshold = 0.6

        try:
            ic = wordnet_ic.ic(MyConfig.get("answer_extraction", "ic"))
        except MyConfigException as e:
            logger = logging.getLogger("qa_logger")
            logger.warning(str(e))
            ic = wordnet_ic.ic("ic-bnc.dat")

        result = []

        head_synsets = wn.synsets(head, pos=wn.NOUN)
        if len(head_synsets) == 0:
            noun_synsets = wn.synsets(features["noun"], pos=wn.NOUN)
            if len(noun_synsets) == 0:
                return nouns
            else:
                head_synset = noun_synsets[0]
        else:
            head_synset = head_synsets[0]

        for noun in nouns:
            try:
                noun_synset = wn.synsets(noun, pos=wn.NOUN)[0]
                if threshold < noun_synset.lin_similarity(head_synset, ic) < 0.9:
                    result.append(noun)
            except IndexError:
                continue

        return result
开发者ID:danigarabato,项目名称:qa,代码行数:53,代码来源:answer.py

示例5: test

def test():
    col = nltk.TextCollection(nltk.corpus.brown)
    brown_ic = wordnet_ic.ic('ic-brown.dat')
    sc = SimilarityCalculator(col, 'bp', brown_ic)
    sentence1 = preprocess("The jurors were taken into the courtroom in groups of 40 and asked to fill out a questionnaire.")
    sentence2 = preprocess("About 120 potential jurors were being asked to complete a lengthy questionnaire.")
    print sc.similarity_bidirectional(sentence1, sentence2)
开发者ID:varzan,项目名称:semantic-longestpoem,代码行数:7,代码来源:semsim.py

示例6: __init__

 def __init__ (self, sim_threshold = 0.1, sim_weight = 1, **kwds):
     global brown_ic
     super().__init__(**kwds)
     if not brown_ic:
         brown_ic = wordnet_ic.ic('ic-brown.dat')
     self.__threshold = sim_threshold
     self.__weight = sim_weight
开发者ID:agarsev,项目名称:grafeno,代码行数:7,代码来源:sim_link.py

示例7: get_similarity

 def get_similarity(self, synsets1, synsets2):
     brown_ic = wordnet_ic.ic("ic-brown.dat")
     max_value = 0
     for synset1 in synsets1:
         for synset2 in synsets2:
             value = wn.res_similarity(synset1, synset2, brown_ic)
             if value > max_value:
                 max_value = value
     return max_value
开发者ID:prasnko,项目名称:nlp-event-coreference,代码行数:9,代码来源:LexicalFeatureExtractor.py

示例8: sensesim

def sensesim(ss1,ss2,metric):

    if metric=='path':
        sim=ss1.path_similarity(ss2)
    elif metric=='lin':
        sim=ss1.lin_similarity(ss2,wn_ic.ic('ic-brown.dat'))
    elif metric=='jcn':
        sim=ss1.jcn_similarity(ss2,wn_ic.ic('ic-brown.dat'))
    elif metric=='res':
        sim=ss1.res_similarity(ss2,wn_ic.ic('ic-brown.dat'))
    elif metric=='lch':
        sim=ss1.lch_similarity(ss2)
    elif metric=='wup':
        sim=ss1.wup_similarity(ss2)
    else:
        print "Unknown metric", metric
        sim=0
    return sim
开发者ID:julieweeds,项目名称:Compounds,代码行数:18,代码来源:compare.py

示例9: __init__

    def __init__(self,parameters):

        self.parameters=parameters
        self.wn_sim=self.parameters.get("wn_sim",Analyser.simmetric)
        self.ic=wn_ic.ic('ic-semcor.dat')
        self.candidates={}
        self.synsetthresh=self.parameters.get("synset_thresh",Analyser.synsetthresh)
        self.totalthresh=self.parameters.get("total_thresh",Analyser.totalthresh)
        self.propthresh=self.parameters.get("prop_thresh",Analyser.propthresh)
        self.simthresh=self.parameters.get("sim_thresh",Analyser.simthresh)
开发者ID:julieweeds,项目名称:SentenceCompletionChallenge,代码行数:10,代码来源:senses.py

示例10: get_lin_distance

    def get_lin_distance(self, word1, word2):
        brown_ic = wordnet_ic.ic('ic-brown.dat')
        if len(wn.synsets(word1)) == 0 or len(wn.synsets(word2)) == 0:
            return 0

        target1 = wn.synsets(word1)[0]
        target2 = wn.synsets(word2)[0]

        try:
            result = target1.lin_similarity(target2, brown_ic)
            return result
        except:
            return 0
开发者ID:kanghj,项目名称:wordnews_server,代码行数:13,代码来源:WordDistanceCalculator.py

示例11: similarity_by_path

def similarity_by_path(sense1, sense2, option="path"):
  """ Returns maximum path similarity between two senses. """
  if option.lower() in ["path", "path_similarity"]: # Path similaritys
    return max(wn.path_similarity(sense1,sense2), 
               wn.path_similarity(sense1,sense2))
  elif option.lower() in ["wup", "wupa", "wu-palmer", "wu-palmer"]: # Wu-Palmer 
    return wn.wup_similarity(sense1, sense2)
  elif option.lower() in ['lch', "leacock-chordorow"]: # Leacock-Chodorow
    if sense1.pos != sense2.pos: # lch can't do diff POS
      return 0
    return wn.lch_similarity(sense1, sense2)

    return wn.lin_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))
开发者ID:alee101,项目名称:wsd,代码行数:13,代码来源:semanticsim.py

示例12: single_jiang_conrath

def single_jiang_conrath(cast_no1, cast_no2, syn_dict):
    brown_ic = wordnet_ic.ic('ic-brown.dat')
    synsets1 = syn_dict[cast_no1]
    synsets2 = syn_dict[cast_no2]
    total_sim = 0.0
    no_of_comparisons = 0.0
    for original_syn in synsets1:
        for syn1 in synsets1:
            if len(synsets1) is not 0 and syn1 is not None:
                for syn2 in synsets2:
                    if len(synsets2) is not 0 and syn2 is not None and syn1.pos()==syn2.pos() and ((syn1.pos() == "n") or (syn2.pos() == "v")):
                        sim = syn1.lch_similarity(syn2, brown_ic)
                        total_sim = total_sim + sim
                        no_of_comparisons+=1
    return total_sim/no_of_comparisons
开发者ID:wingy-wing,项目名称:dissertation,代码行数:15,代码来源:lch_wordnet_adapted_lesk_for_two_casts.py

示例13: main

def main():
    brown_ic = wordnet_ic.ic('ic-brown.dat')

    human_sims = parseFile("input.txt")

    lin_sims = linSimilarities(human_sims.keys(), brown_ic)
    res_sims = resSimilarities(human_sims.keys(), brown_ic)
    #print "Initializing Model"
    model = None
    model = gensim.models.Word2Vec()
    model = model.load_word2vec_format(RESOURCES+'glove_model.txt', binary=False)
    #print "Model created calling vec Sim"
    vec_sims = vecSimilarities(human_sims.keys(), model)
    #print "AFter call to vec Sim"
    lin_score = 0
    res_score = 0
    vec_score = 0

    print '{0:15} {1:15} {2:10} {3:20} {4:20} {5:20}'.format('word1','word2', 
                                                             'human', 'Lin', 
                                                             'Resnik', 'Word2Vec')
    for key, human in human_sims.items():
        try:
            lin = lin_sims[key]
        except:
            lin = 0
        lin_score += (lin - human) ** 2
        try:
            res = res_sims[key]
        except:
            res = 0
        res_score += (res - human) ** 2
        try:
            vec = vec_sims[key]
        except:
            vec = 0
        vec_score += (vec - human) ** 2
	firstword=key.partition('(')[-1].rpartition(',')[0]
	secondword=key.partition(',')[-1].rpartition(')')[0]
        secondword=secondword.strip()
	print '{0:15} {1:15} {2:10} {3:20} {4:20} {5:20}'.format(firstword,secondword, human, 
                                                                 lin, res, vec)

    num_examples = len(human_sims)
    print "\nMean Squared Errors"
    print "Lin method error: %0.2f" % (lin_score/num_examples) 
    print "Resnick method error: %0.2f" % (res_score/num_examples)
    print "Vector-based method error: %0.2f" % (vec_score/num_examples)
开发者ID:niha-p,项目名称:Natural-Language-Processing,代码行数:48,代码来源:B.py

示例14: lexical_compare

def lexical_compare(lemma_text,lemma_hypothesis):
	similarity_score = 0
	brown_ic = wordnet_ic.ic('ic-brown.dat')
	if re.search(lemma_text,lemma_hypothesis,re.M|re.I):
		return 50
	hypo_synset = wn.synsets(lemma_hypothesis)
	text_synset = wn.synsets(lemma_text)
	synset_index = get_index(hypo_synset, text_synset)
	if synset_index == -1:
		return 0	
	if len(hypo_synset) > 0 and len(text_synset) > 0:
		similarity_score = hypo_synset[synset_index].path_similarity(text_synset[0],brown_ic)
		similarity_score += hypo_synset[synset_index].wup_similarity(text_synset[0],brown_ic)  
		similarity_score += hypo_synset[synset_index].lin_similarity(text_synset[0],brown_ic)  	
		similarity_score += hypo_synset[synset_index].res_similarity(text_synset[0],brown_ic)  	
	return similarity_score
开发者ID:racoder,项目名称:question-answering-nlp,代码行数:16,代码来源:BOW_1.py

示例15: extract_word_clusters

def extract_word_clusters(commentList, commentCount):
    brown_ic = wordnet_ic.ic('ic-brown.dat')
    a, corpus, global_synsets = extract_global_bag_of_words(commentList, True)
    similarity_dict = {}
    i = 0
    t = len(global_synsets)**2
    
    for syn_out in global_synsets:
        similarity_dict[syn_out] = {} 
        for syn_in in global_synsets:
            if syn_in.pos() == syn_out.pos():
                similarity_dict[syn_out][syn_in] = syn_out.lin_similarity(syn_in, brown_ic)
            else:
                similarity_dict[syn_out][syn_in] = max(wn.path_similarity(syn_out,syn_in), wn.path_similarity(syn_in,syn_out))
        
            if i % 10000 == 0:
                print i, 'synsets processed out of',len(global_synsets)**2, '(',float(i)/(t),'%)'
            i += 1

    tuples = [(i[0], i[1].values()) for i in similarity_dict.items()] 
    vectors = [np.array(tup[1]) for tup in tuples]

    
    # Rule of thumb
    n = sqrt(len(global_synsets)/2)
    print "Number of clusters", n
    km_model = KMeans(n_clusters=n)
    km_model.fit(vectors)
    
    clustering = collections.defaultdict(list)
    for idx, label in enumerate(km_model.labels_):
        clustering[label].append(tuples[idx][0])
        
    pprint.pprint(dict(clustering), width=1)
    
    feature_vector = np.zeros([len(corpus),n])
    
    for i,comment in enumerate(corpus):
        for w in comment:
            for key, clust in clustering.items():
                if w in clust:
                    feature_vector[i][key] += 1
        if i % 1000 == 0:
            print i, 'comments processed'
        
    print feature_vector
    '''
开发者ID:DirkBrand,项目名称:Comment-Classification,代码行数:47,代码来源:mainExtractor.py


注:本文中的nltk.corpus.wordnet_ic.ic函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。