本文整理汇总了Python中note.Note.read方法的典型用法代码示例。如果您正苦于以下问题:Python Note.read方法的具体用法?Python Note.read怎么用?Python Note.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类note.Note
的用法示例。
在下文中一共展示了Note.read方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-i",
dest = "txt",
help = "The files to be predicted on (e.g. data/demo.tsv)",
)
parser.add_argument("-m",
dest = "model",
help = "The file to store the pickled model (e.g. models/demo.model)",
)
parser.add_argument("-o",
dest = "out",
help = "The directory to output predicted files (e.g. data/predictions)",
)
# Parse the command line arguments
args = parser.parse_args()
if (not args.txt) or (not args.model) or (not args.out):
parser.print_help()
exit(1)
# Decode arguments
txt_files = glob.glob(args.txt)
model_path = args.model
out_dir = args.out
# Available data
if not txt_files:
print 'no predicting files :('
exit(1)
# Load model
with open(model_path+'.model', 'rb') as fid:
clf = pickle.load(fid)
with open(model_path+'.dict', 'rb') as fid:
vec = pickle.load(fid)
# Predict labels for each file
for pfile in txt_files:
note = Note()
note.read(pfile)
XNotNormalized = zip(note.sid_list(), note.text_list())
X = XNotNormalized
#X = normalize_data_matrix(XNotNormalized)
# Predict
labels = predict( X, clf, vec )
# output predictions
outfile = os.path.join(out_dir, os.path.basename(pfile))
note.write( outfile, labels )
示例2: main
# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():
"""
main()
Purpose: This program builds an SVM model for Twitter classification
"""
parser = argparse.ArgumentParser()
parser.add_argument("-t",
dest = "txt",
help = "The files that contain the training examples",
default = os.path.join(BASE_DIR, 'data/twitter-train-cleansed-B.tsv')
)
parser.add_argument("-m",
dest = "model",
help = "The file to store the pickled model",
default = os.path.join(BASE_DIR, 'models/awesome')
)
parser.add_argument("-g",
dest = "grid",
help = "Perform Grid Search",
action='store_true',
default = False
)
# Parse the command line arguments
args = parser.parse_args()
grid = args.grid
# Decode arguments
txt_files = glob.glob(args.txt)
model_path = args.model
if not txt_files:
print 'no training files :('
sys.exit(1)
# Read the data into a Note object
notes = []
for txt in txt_files:
note_tmp = Note()
note_tmp.read(txt)
notes.append(note_tmp)
# Get data from notes
X = []
Y = []
for n in notes:
X += zip(n.sid_list(), n.text_list())
Y += n.label_list()
# Build model
train(X, Y, model_path, grid)
示例3: main
# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():
"""
main()
Purpose: This program builds an SVM model for Twitter classification
"""
parser = argparse.ArgumentParser()
parser.add_argument("-t",
dest = "txt",
help = "Files that contain the training examples (e.g. data/demo.tsv)",
)
parser.add_argument("-m",
dest = "model",
help = "The file to store the pickled model (e.g. models/demo.model)",
)
# Parse the command line arguments
args = parser.parse_args()
if (not args.txt) or (not args.model):
parser.print_help()
exit(1)
# Decode arguments
txt_files = glob.glob(args.txt)
model_path = args.model
if not txt_files:
print 'no training files :('
sys.exit(1)
# Read the data into a Note object
notes = []
for txt in txt_files:
note_tmp = Note()
note_tmp.read(txt)
notes.append(note_tmp)
# Get data from notes
X = []
Y = []
for n in notes:
X += zip(n.sid_list(), n.text_list())
Y += n.label_list()
# Build model
train(X, Y, model_path)
示例4: main
# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-t",
help = "Files containing predictions",
dest = "txt",
default = os.path.join(BASE_DIR, 'data/predictions/*')
)
parser.add_argument("-r",
help = "The directory that contains reference gold standard concept files",
dest = "ref",
default = os.path.join(BASE_DIR, 'data')
)
parser.add_argument("-o",
help = "Write the evaluation to a file rather than STDOUT",
dest = "output",
default = None
)
parser.add_argument("-e",
help = "Do error analysis",
dest = "error",
action = 'store_true'
)
# Parse command line arguments
args = parser.parse_args()
# Is output destination specified
if args.output:
args.output = open(args.output, "w")
else:
args.output = sys.stdout
txt_files = glob.glob(args.txt)
txt_files_map = helper.map_files(txt_files)
ref_directory = args.ref
ref_files = os.listdir(ref_directory)
ref_files = map(lambda f: os.path.join(args.ref, f), ref_files)
ref_files_map = helper.map_files(ref_files)
files = []
for k in txt_files_map:
if k in ref_files_map:
files.append((txt_files_map[k], ref_files_map[k]))
print files
# Useful for error analysis
text = []
# One list of all labels
pred_labels = []
gold_labels = []
# txt <- predicted labels
# ref <- actual labels
for txt, ref in files:
# A note that represents the model's predictions
pnote = Note()
pnote.read( txt )
# A note that is the actual concept labels
gnote = Note()
gnote.read( ref )
# Accumulate all predictions
pred_labels += pnote.label_list()
gold_labels += gnote.label_list()
# Collect text for error analysis
text += pnote.text_list()
# Compute results
evaluate(pred_labels, gold_labels, out=args.output)
# Error analysis
if args.error:
print '\n\n\n'
error_analysis(text, pred_labels, gold_labels)
示例5: main
# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-i",
dest = "txt",
help = "The files to be predicted on",
default = os.path.join(BASE_DIR, 'data/test-gold-A.txt')
#default = os.path.join(BASE_DIR, 'data/sms-test-gold-A.tsv')
)
parser.add_argument("-m",
dest = "model",
help = "The file to store the pickled model",
default = os.path.join(BASE_DIR, 'models/awesome')
)
parser.add_argument("-o",
dest = "out",
help = "The directory to output predicted files",
default = os.path.join(BASE_DIR, 'data/predictions')
)
# Parse the command line arguments
args = parser.parse_args()
# Decode arguments
txt_files = glob.glob(args.txt)
model_path = args.model
out_dir = args.out
# Available data
if not txt_files:
print 'no predicting files :('
exit(1)
# Predict
for txt_file in txt_files:
note = Note()
note.read(txt_file)
X = zip(note.getIDs(),note.getTweets())
labels,confidences = predict_using_model(X, model_path, out_dir)
'''
# Confident predictions
labels_map = {'positive':0, 'negative':1, 'neutral':2}
proxy = []
for t,l,c in zip(note.getTweets(),labels,confidences):
conf = []
for i in range(len(labels_map)):
if i == labels_map[l]: continue
conf.append( c[labels_map[l]] - c[i] )
avg = sum(conf) / len(conf)
start,end,tweet = t
if avg > 1:
#print tweet[start:end+1]
#print l
#print c
#print
#proxy.append(l)
proxy.append('poop')
else:
print 'not conf'
print tweet[start:end+1]
print l
print c
print
proxy.append(l)
#proxy.append('poop')
'''
# output predictions
outfile = os.path.join(out_dir, os.path.basename(txt_file))
note.write( outfile, labels )
示例6: main
# 需要导入模块: from note import Note [as 别名]
# 或者: from note.Note import read [as 别名]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-t",
dest = "txt",
help = "The files that contain the training examples",
default = os.path.join(BASE_DIR, 'data/annotated.txt')
)
parser.add_argument("-n",
dest = "length",
help = "Number of data points to use",
default = -1
)
parser.add_argument("-f",
dest = "folds",
help = "Number of folds to partition data into",
default = 10
)
parser.add_argument("-r",
dest = "random",
help = "Random shuffling of input data.",
action = 'store_true',
default = False
)
# Parse the command line arguments
args = parser.parse_args()
# Decode arguments
txt_files = glob.glob(args.txt)
length = int(args.length)
num_folds = int(args.folds)
# Get data from files
if not txt_files:
print 'no training files :('
sys.exit(1)
notes = []
for txt in txt_files:
note_tmp = Note()
note_tmp.read(txt)
notes.append(note_tmp)
# List of all data
X = []
Y = []
for n in notes:
# Data points
x = [ it for it in zip(n.sid_list(), n.text_list()) ]
X += x
# Labels
y = [ it for it in n.label_list() ]
Y += y
# Limit length
X = X[:length]
Y = Y[:length]
# Build confusion matrix
confusion = [ [0 for i in labels_map] for j in labels_map ]
# Instantiate feat obj once (it'd really slow down CV to rebuild every time)
feat_obj = FeaturesWrapper()
# Extract features once
feats = train.extract_features(X, feat_obj)
data = zip(feats,Y)
# For each held-out test set
i = 1
for training,testing in cv_partitions(data[:length], num_folds=num_folds, shuffle=args.random):
# Users like to see progress
print 'Fold: %d of %d' % (i,num_folds)
i += 1
# Train on non-heldout data
X_train = [ d[0] for d in training ]
Y_train = [ d[1] for d in training ]
vec,clf = train.train_vectorized(X_train, Y_train, model_path=None, grid=False)
# Predict on held out
X_test = [ d[0] for d in testing ]
Y_test = [ d[1] for d in testing ]
labels = predict.predict_vectorized(X_test, clf, vec)
# Compute confusion matrix for held_out data
testing_confusion = evaluate.create_confusion(labels, Y_test)
confusion = add_matrix(confusion, testing_confusion)
#.........这里部分代码省略.........