当前位置: 首页>>代码示例>>Python>>正文


Python distance.levenshtein方法代码示例

本文整理汇总了Python中distance.levenshtein方法的典型用法代码示例。如果您正苦于以下问题:Python distance.levenshtein方法的具体用法?Python distance.levenshtein怎么用?Python distance.levenshtein使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在distance的用法示例。


在下文中一共展示了distance.levenshtein方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: edit_distance

# 需要导入模块: import distance [as 别名]
# 或者: from distance import levenshtein [as 别名]
def edit_distance(references, hypotheses):
    """Computes Levenshtein distance between two sequences.

    Args:
        references: list of list of token (one hypothesis)
        hypotheses: list of list of token (one hypothesis)

    Returns:
        1 - levenshtein distance: (higher is better, 1 is perfect)

    """
    d_leven, len_tot = 0, 0
    for ref, hypo in zip(references, hypotheses):
        d_leven += distance.levenshtein(ref, hypo)
        len_tot += float(max(len(ref), len(hypo)))

    return 1. - d_leven / len_tot 
开发者ID:luopeixiang,项目名称:im2latex,代码行数:19,代码来源:score.py

示例2: __init__

# 需要导入模块: import distance [as 别名]
# 或者: from distance import levenshtein [as 别名]
def __init__(self, col_name, distance_fcn=distance.levenshtein,
                 threshold=None):
        """
        Constructing column information

        :param col_name: name of column
        :param distance_fcn: distance function
        :param threshold: threshold for clustering
        """
        self.col_name = col_name
        self.distance_fcn = distance_fcn

        if threshold is not None:
            self.threshold = threshold
        elif distance_fcn == distance.levenshtein:
            self.threshold = 3
        else:
            raise ValueError("Requires a threshold if not using levenshtein"
                             "distance") 
开发者ID:HoloClean,项目名称:HoloClean-Legacy-deprecated,代码行数:21,代码来源:col_norm_info.py

示例3: cluster_similar_responses

# 需要导入模块: import distance [as 别名]
# 或者: from distance import levenshtein [as 别名]
def cluster_similar_responses(output_path):
    max_count = get_max_socket_message_count(output_path)
    listing = glob.glob(output_path + '*-%s.log' % max_count)

    messages = [file(filename).read() for filename in listing]
    messages = [extract_description_from_message(m) for m in messages]
    messages = np.asarray(messages)

    print()
    print('Clustering %s responses...(this might take a while)' % len(messages))
    print()

    lev_similarity = -1 * np.array([[distance.levenshtein(m1, m2) for m1 in messages] for m2 in messages])

    affprop = sklearn.cluster.AffinityPropagation(affinity='precomputed',
                                                  damping=0.5)
    affprop.fit(lev_similarity)

    print('Generated clusters:')
    print()

    for cluster_id in np.unique(affprop.labels_):
        exemplar = messages[affprop.cluster_centers_indices_[cluster_id]]
        cluster = np.unique(messages[np.nonzero(affprop.labels_ == cluster_id)])
        cluster_str = ', '.join(cluster)
        print('-' * 80)
        print(' - *%s:* %s' % (exemplar, cluster_str))
        print('-' * 80)
        print() 
开发者ID:andresriancho,项目名称:websocket-fuzzer,代码行数:31,代码来源:analyze-output.py

示例4: eval

# 需要导入模块: import distance [as 别名]
# 或者: from distance import levenshtein [as 别名]
def eval(): 
    # Load graph
    g = Graph(mode="test")
    print("Graph loaded")

    # Load batch
    _Y = load_data(mode="test")

    X = np.zeros((len(_Y), hp.maxlen))
    Y = np.zeros((len(_Y), hp.maxlen))
    for i, y in enumerate(_Y):
        y = np.fromstring(y, np.int32)
        Y[i][:len(y)] = y
        np.random.shuffle(y)
        X[i][:len(y)] = y

    word2idx, idx2word = g.word2idx, g.idx2word
     
    # Start session         
    with g.graph.as_default():    
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            # Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))

            # Get model
            mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1]  # model name

            # inference
            if not os.path.exists('results'): os.mkdir('results')
            with codecs.open("results/" + mname, "w", "utf-8") as fout:
                num_words, total_edit_distance = 0, 0
                for i in range(0, len(Y), hp.batch_size):
                    ### Get mini-batches
                    x = X[i:i+hp.batch_size]
                    y = Y[i:i+hp.batch_size]

                    ### Autoregressive inference
                    preds = np.zeros((hp.batch_size, hp.maxlen), np.int32)
                    for j in range(hp.maxlen):
                        _preds = sess.run(g.preds, {g.x: x, g.y: preds})
                        preds[:, j] = _preds[:, j]

                    for xx, yy, pred in zip(x, y, preds):  # sentence-wise
                        inputs = " ".join(idx2word[idx] for idx in xx).replace("_", "").strip()
                        expected = " ".join(idx2word[idx] for idx in yy).replace("_", "").strip()
                        got = " ".join(idx2word[idx] for idx in pred[:len(inputs.split())])

                        edit_distance = distance.levenshtein(expected.split(), got.split())
                        total_edit_distance += edit_distance
                        num_words += len(expected.split())

                        fout.write(u"Inputs  : {}\n".format(inputs))
                        fout.write(u"Expected: {}\n".format(expected))
                        fout.write(u"Got     : {}\n".format(got))
                        fout.write(u"WER     : {}\n\n".format(edit_distance))
                fout.write(u"Total WER: {}/{}={}\n".format(total_edit_distance,
                                                           num_words,
                                                        round(float(total_edit_distance) / num_words, 2))) 
开发者ID:Kyubyong,项目名称:word_ordering,代码行数:61,代码来源:eval.py

示例5: img_edit_distance

# 需要导入模块: import distance [as 别名]
# 或者: from distance import levenshtein [as 别名]
def img_edit_distance(img1, img2):
    """Computes Levenshtein distance between two images.
    (From Harvard's NLP github)

    Slices the images into columns and consider one column as a character.

    Args:
        im1, im2: np arrays of shape (H, W, 1)

    Returns:
        column wise levenshtein distance
        max length of the two sequences

    """
    # load the image (H, W)
    img1, img2 = img1[:, :, 0], img2[:, :, 0]

    # transpose and convert to 0 or 1
    img1 = np.transpose(img1)
    h1 = img1.shape[1]
    w1 = img1.shape[0]
    img1 = (img1<=128).astype(np.uint8)

    img2 = np.transpose(img2)
    h2 = img2.shape[1]
    w2 = img2.shape[0]
    img2 = (img2<=128).astype(np.uint8)

    # create binaries for each column
    if h1 == h2:
        seq1 = [''.join([str(i) for i in item]) for item in img1]
        seq2 = [''.join([str(i) for i in item]) for item in img2]
    elif h1 > h2:
        seq1 = [''.join([str(i) for i in item]) for item in img1]
        seq2 = [''.join([str(i) for i in item])+''.join(['0']*(h1-h2)) for
                item in img2]
    else:
        seq1 = [''.join([str(i) for i in item])+''.join(['0']*(h2-h1)) for
                item in img1]
        seq2 = [''.join([str(i) for i in item]) for item in img2]

    # convert each column binary into int
    seq1_int = [int(item,2) for item in seq1]
    seq2_int = [int(item,2) for item in seq2]

    # distance
    l_dist = distance.levenshtein(seq1_int, seq2_int)
    length = float(max(len(seq1_int), len(seq2_int)))

    return l_dist, length 
开发者ID:guillaumegenthial,项目名称:im2latex,代码行数:52,代码来源:image.py

示例6: val

# 需要导入模块: import distance [as 别名]
# 或者: from distance import levenshtein [as 别名]
def val(net, test_dataset, criterion, max_iter=2):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(
        test_dataset,  batch_size=opt.batchSize, num_workers=int(opt.workers),
        sampler=dataset.randomSequentialSampler(test_dataset, opt.batchSize),
        collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()
    test_distance=0
    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        if ifUnicode:
             cpu_texts = [ clean_txt(tx.decode('utf-8'))  for tx in cpu_texts]
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
       # preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred.strip() == target.strip():
                n_correct += 1
            # print(distance.levenshtein(pred.strip(), target.strip()))
            test_distance +=distance.nlevenshtein(pred.strip(), target.strip(),method=2)
    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):

        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))
    accuracy = n_correct / float(max_iter * opt.batchSize)
    test_distance=test_distance/float(max_iter * opt.batchSize)
    testLoss = loss_avg.val()
    #print('Test loss: %f, accuray: %f' % (testLoss, accuracy))
    return testLoss,accuracy,test_distance 
开发者ID:phybrain,项目名称:efficientdensenet_crnn,代码行数:55,代码来源:train.py


注:本文中的distance.levenshtein方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。