Python Note.read方法代码示例

本文整理汇总了Python中note.Note.read方法的典型用法代码示例。如果您正苦于以下问题：Python Note.read方法的具体用法？Python Note.read怎么用？Python Note.read使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类note.Note的用法示例。

在下文中一共展示了Note.read方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("-i",
        dest = "txt",
        help = "The files to be predicted on (e.g. data/demo.tsv)",
    )

    parser.add_argument("-m",
        dest = "model",
        help = "The file to store the pickled model (e.g. models/demo.model)",
    )

    parser.add_argument("-o",
        dest = "out",
        help = "The directory to output predicted files (e.g. data/predictions)",
    )


    # Parse the command line arguments
    args = parser.parse_args()

    if (not args.txt) or (not args.model) or (not args.out):
        parser.print_help()
        exit(1)

    # Decode arguments
    txt_files  = glob.glob(args.txt)
    model_path = args.model
    out_dir    = args.out


    # Available data
    if not txt_files:
        print 'no predicting files :('
        exit(1)


    # Load model
    with open(model_path+'.model', 'rb') as fid:
        clf = pickle.load(fid)
    with open(model_path+'.dict', 'rb') as fid:
        vec = pickle.load(fid)


    # Predict labels for each file
    for pfile in txt_files:
        note = Note()
        note.read(pfile)
        XNotNormalized = zip(note.sid_list(), note.text_list())
        X = XNotNormalized
        #X = normalize_data_matrix(XNotNormalized)

        # Predict
        labels = predict( X, clf, vec )

        # output predictions
        outfile  = os.path.join(out_dir, os.path.basename(pfile))
        note.write( outfile, labels )

开发者ID:smartinsightsfromdata，项目名称:TwitterHawk，代码行数:62，代码来源:predict.py

示例2: main

# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():

    """
    main()

    Purpose: This program builds an SVM model for Twitter classification
    """

    parser = argparse.ArgumentParser()

    parser.add_argument("-t",
        dest = "txt",
        help = "The files that contain the training examples",
        default = os.path.join(BASE_DIR, 'data/twitter-train-cleansed-B.tsv')
    )

    parser.add_argument("-m",
        dest = "model",
        help = "The file to store the pickled model",
        default = os.path.join(BASE_DIR, 'models/awesome')
    )

    parser.add_argument("-g",
        dest = "grid",
        help = "Perform Grid Search",
        action='store_true',
        default = False
    )

    # Parse the command line arguments
    args = parser.parse_args()
    grid = args.grid


    # Decode arguments
    txt_files = glob.glob(args.txt)
    model_path = args.model


    if not txt_files:
        print 'no training files :('
        sys.exit(1)


    # Read the data into a Note object
    notes = []
    for txt in txt_files:
        note_tmp = Note()
        note_tmp.read(txt)
        notes.append(note_tmp)

    # Get data from notes
    X = []
    Y = []
    for n in notes:
        X += zip(n.sid_list(), n.text_list())
        Y += n.label_list()

    # Build model
    train(X, Y, model_path, grid)

开发者ID:smartinsightsfromdata，项目名称:SemEval-2015，代码行数:62，代码来源:train.py

示例3: main

# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():

    """
    main()

    Purpose: This program builds an SVM model for Twitter classification
    """

    parser = argparse.ArgumentParser()

    parser.add_argument("-t",
        dest = "txt",
        help = "Files that contain the training examples (e.g. data/demo.tsv)",
    )

    parser.add_argument("-m",
        dest = "model",
        help = "The file to store the pickled model (e.g. models/demo.model)",
    )

    # Parse the command line arguments
    args = parser.parse_args()

    if (not args.txt) or (not args.model):
        parser.print_help()
        exit(1)

    # Decode arguments
    txt_files = glob.glob(args.txt)
    model_path = args.model


    if not txt_files:
        print 'no training files :('
        sys.exit(1)


    # Read the data into a Note object
    notes = []
    for txt in txt_files:
        note_tmp = Note()
        note_tmp.read(txt)
        notes.append(note_tmp)

    # Get data from notes
    X = []
    Y = []
    for n in notes:
        X += zip(n.sid_list(), n.text_list())
        Y += n.label_list()

    # Build model
    train(X, Y, model_path)

开发者ID:smartinsightsfromdata，项目名称:TwitterHawk，代码行数:55，代码来源:train.py

示例4: main

# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("-t",
        help = "Files containing predictions",
        dest = "txt",
        default = os.path.join(BASE_DIR, 'data/predictions/*')
    )

    parser.add_argument("-r",
        help = "The directory that contains reference gold standard concept files",
        dest = "ref",
        default = os.path.join(BASE_DIR, 'data')
    )

    parser.add_argument("-o",
        help = "Write the evaluation to a file rather than STDOUT",
        dest = "output",
        default = None
    )

    parser.add_argument("-e",
        help = "Do error analysis",
        dest = "error",
        action = 'store_true'
    )

    # Parse command line arguments
    args = parser.parse_args()


    # Is output destination specified
    if args.output:
        args.output = open(args.output, "w")
    else:
        args.output = sys.stdout


    txt_files = glob.glob(args.txt)
    txt_files_map = helper.map_files(txt_files)


    ref_directory = args.ref


    ref_files = os.listdir(ref_directory)
    ref_files = map(lambda f: os.path.join(args.ref, f), ref_files)
    ref_files_map = helper.map_files(ref_files)

    files = []
    for k in txt_files_map:
        if k in ref_files_map:
            files.append((txt_files_map[k], ref_files_map[k]))


    print files


    # Useful for error analysis
    text = []

    # One list of all labels
    pred_labels = []
    gold_labels = []

    # txt <- predicted labels
    # ref <- actual labels
    for txt, ref in files:

        # A note that represents the model's predictions
        pnote = Note()
        pnote.read( txt )

        # A note that is the actual concept labels
        gnote = Note()
        gnote.read( ref )

        # Accumulate all predictions
        pred_labels += pnote.label_list()
        gold_labels += gnote.label_list()

        # Collect text for error analysis
        text += pnote.text_list()


    # Compute results
    evaluate(pred_labels, gold_labels, out=args.output)


    # Error analysis
    if args.error:
        print '\n\n\n'
        error_analysis(text, pred_labels, gold_labels)

开发者ID:smartinsightsfromdata，项目名称:TwitterHawk，代码行数:96，代码来源:evaluate.py

示例5: main

# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("-i",
        dest = "txt",
        help = "The files to be predicted on",
        default = os.path.join(BASE_DIR, 'data/test-gold-A.txt')
        #default = os.path.join(BASE_DIR, 'data/sms-test-gold-A.tsv')
    )

    parser.add_argument("-m",
        dest = "model",
        help = "The file to store the pickled model",
        default = os.path.join(BASE_DIR, 'models/awesome')
    )

    parser.add_argument("-o",
        dest = "out",
        help = "The directory to output predicted files",
        default = os.path.join(BASE_DIR, 'data/predictions')
    )


    # Parse the command line arguments
    args = parser.parse_args()


    # Decode arguments
    txt_files  = glob.glob(args.txt)
    model_path = args.model
    out_dir    = args.out


    # Available data
    if not txt_files:
        print 'no predicting files :('
        exit(1)

    # Predict
    for txt_file in txt_files:
        note = Note()
        note.read(txt_file)
        X = zip(note.getIDs(),note.getTweets())

        labels,confidences = predict_using_model(X, model_path, out_dir)

        '''
        # Confident predictions
        labels_map = {'positive':0, 'negative':1, 'neutral':2}
        proxy = []
        for t,l,c in zip(note.getTweets(),labels,confidences):
            conf = []
            for i in range(len(labels_map)):
                if i == labels_map[l]: continue
                conf.append( c[labels_map[l]] - c[i] )
            avg = sum(conf) / len(conf)
            start,end,tweet = t
            if avg > 1:
                #print tweet[start:end+1]
                #print l
                #print c
                #print
                #proxy.append(l)
                proxy.append('poop')
            else:
                print 'not conf'
                print tweet[start:end+1]
                print l
                print c
                print
                proxy.append(l)
                #proxy.append('poop')
        '''


        # output predictions
        outfile  = os.path.join(out_dir, os.path.basename(txt_file))
        note.write( outfile, labels )

开发者ID:smartinsightsfromdata，项目名称:SemEval-2015，代码行数:81，代码来源:predict.py

示例6: main

# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("-t",
        dest = "txt",
        help = "The files that contain the training examples",
        default = os.path.join(BASE_DIR, 'data/annotated.txt')
    )

    parser.add_argument("-n",
        dest = "length",
        help = "Number of data points to use",
        default = -1
    )

    parser.add_argument("-f",
        dest = "folds",
        help = "Number of folds to partition data into",
        default = 10
    )

    parser.add_argument("-r",
        dest = "random",
        help = "Random shuffling of input data.",
        action = 'store_true',
        default = False
    )


    # Parse the command line arguments
    args = parser.parse_args()


    # Decode arguments
    txt_files = glob.glob(args.txt)
    length = int(args.length)
    num_folds = int(args.folds)


    # Get data from files
    if not txt_files:
        print 'no training files :('
        sys.exit(1)

    notes = []
    for txt in txt_files:
        note_tmp = Note()
        note_tmp.read(txt)
        notes.append(note_tmp)

    # List of all data
    X = []
    Y = []
    for n in notes:
        # Data points
        x = [ it for it in zip(n.sid_list(), n.text_list()) ]
        X += x

        # Labels
        y = [ it for it in n.label_list() ]
        Y += y

    # Limit length
    X = X[:length]
    Y = Y[:length]

    # Build confusion matrix
    confusion = [ [0 for i in labels_map] for j in labels_map ]

    # Instantiate feat obj once (it'd really slow down CV to rebuild every time)
    feat_obj = FeaturesWrapper()

    # Extract features once
    feats = train.extract_features(X, feat_obj)
    data = zip(feats,Y)

    # For each held-out test set
    i = 1
    for training,testing in cv_partitions(data[:length], num_folds=num_folds, shuffle=args.random):

        # Users like to see progress
        print 'Fold: %d of %d' % (i,num_folds)
        i += 1

        # Train on non-heldout data
        X_train = [ d[0] for d in training ]
        Y_train = [ d[1] for d in training ]
        vec,clf = train.train_vectorized(X_train, Y_train, model_path=None, grid=False)

        # Predict on held out
        X_test = [ d[0] for d in testing ]
        Y_test = [ d[1] for d in testing ]
        labels = predict.predict_vectorized(X_test, clf, vec)

        # Compute confusion matrix for held_out data
        testing_confusion = evaluate.create_confusion(labels, Y_test)
        confusion = add_matrix(confusion, testing_confusion)


#.........这里部分代码省略.........

开发者ID:smartinsightsfromdata，项目名称:SemEval-2015，代码行数:103，代码来源:cv.py

注：本文中的note.Note.read方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。