本文整理汇总了Python中SonicScrewdriver.pairtreefile方法的典型用法代码示例。如果您正苦于以下问题:Python SonicScrewdriver.pairtreefile方法的具体用法?Python SonicScrewdriver.pairtreefile怎么用?Python SonicScrewdriver.pairtreefile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SonicScrewdriver
的用法示例。
在下文中一共展示了SonicScrewdriver.pairtreefile方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: open
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
# Generate Cotraining Set
# This script uses a set of volumes already classified and sorted by a model
# in order to generate additional training data for a new model.
import SonicScrewdriver as utils
from shutil import copyfile
indices, columns, metadata = utils.readtsv("/Volumes/TARDIS/work/cotrain/sortedcotrain.tsv")
toget = indices[-200:]
toget = [utils.pairtreefile(x) for x in toget]
genredir = "/Volumes/TARDIS/work/cotrain/top200/genremaps/"
featuredir = "/Volumes/TARDIS/work/cotrain/top200/pagefeatures/"
for htid in toget:
featuresource = "/Volumes/TARDIS/work/cotrain/pagefeatures/" + htid + ".pg.tsv"
featuredestination = "/Volumes/TARDIS/work/cotrain/top200/pagefeatures/" + htid + ".pg.tsv"
copyfile(featuresource, featuredestination)
genresource = "/Volumes/TARDIS/work/cotrain/predictions/" + htid + ".predict"
genredestination = "/Volumes/TARDIS/work/cotrain/top200/genremaps/" + htid + ".map"
with open(genresource, mode="r", encoding = "utf-8") as f:
filelines = f.readlines()
with open(genredestination, mode="w", encoding = "utf-8") as f:
for line in filelines:
line = line.rstrip()
示例2: open
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
import sys, os
import SonicScrewdriver as utils
from shutil import copyfile
with open("/Users/tunder/Dropbox/pagedata/activelearn/sourcefile.txt", mode="r", encoding="utf-8") as f:
filelines = f.readlines()
files = list()
for line in filelines:
files.append(line.rstrip())
with open("/Users/tunder/Dropbox/pagedata/activelearn/learn1.arff", mode="w", encoding="utf-8") as f:
f.write("% List of files in associated folder.\n")
f.write("% Does not really use arff format.\n")
f.write("\n")
f.write("@RELATION learn1\n\n")
f.write("@ATTRIBUTE htid string\n")
f.write("@ATTRIBUTE endpg numeric\n")
f.write("@ATTRIBUTE startpgpart numeric\n")
f.write("@ATTRIBUTE endpgpart numeric\n")
f.write("@ATTRIBUTE probability numeric\n")
f.write("\n")
for afile in files:
outline = utils.pairtreefile(afile) + ",0,0,0,0,0\n"
f.write(outline)
sourcepath = "/Volumes/TARDIS/work/cotrain/texts/" + utils.pairtreefile(afile) + ".norm.txt"
destination = "/Users/tunder/Dropbox/pagedata/activelearn/" + utils.pairtreefile(afile) + ".txt"
copyfile(sourcepath, destination)
示例3: dict
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
# I'm not repeating these columns, because the first is not useful and the second
# is not reliable.
outrow = [htid]
for column in columns[1:]:
if column not in columns_to_exclude:
outrow.append(table[column][dirtyhtid])
return outrow
metadata_path = '/Volumes/TARDIS/work/metadata/MergedMonographs.tsv'
rows, columns, table = utils.readtsv(metadata_path)
indextorows = dict()
for row in rows:
cleanid = utils.pairtreefile(row)
newrow = make_row(cleanid, row, columns, table)
indextorows[cleanid] = newrow
for genreabbrev, genre in genrenames.items():
print(genre)
genrepath = os.path.join(rootpath, genre)
volsinsubset = list()
# Because there are some volumes in the metadata that weren't
# included in the 95-percent subset. Those won't be present
# as files, and shouldn't be carried forward to the next stage.
metadataforgenre = dict()
示例4: count_words
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
if os.path.isfile(filepath):
tokencount, wordcount = count_words(filepath)
else:
print("Missing file: " + filepath)
sys.exit(0)
newrow = [idcode, date, tokencount, wordcount, author, title]
outtable.append(newrow)
print(counter)
counter += 1
rows, columns, table = utils.readtsv('/Users/tunder/Dropbox/GenreProject/metadata/topicmodelingsample.tsv')
sourcedir = "/Volumes/TARDIS/work/moneytexts/"
for row in rows:
filename = utils.pairtreefile(row) + ".fic.txt"
filepath = os.path.join(sourcedir, filename)
if os.path.isfile(filepath):
tokencount, wordcount = count_words(filepath)
else:
print("Missing file: " + filepath)
sys.exit(0)
idcode = table["HTid"][row]
date = str(utils.simple_date(row, table))
author = table["author"][row]
title = table["title"][row]
newrow = [idcode, date, tokencount, wordcount, author, title]
outtable.append(newrow)
print(counter)
counter += 1
示例5: set
# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
import os
import SonicScrewdriver as utils
folder = "/Users/tunder/Dropbox/pagedata/thirdfeatures/pagefeatures/"
files = os.listdir(folder)
validfiles = set()
for filename in files:
if not filename.startswith(".") and len(filename) > 7:
filename = filename[:-7]
validfiles.add(filename)
otherfolder = "/Volumes/TARDIS/output/slices/"
slices = os.listdir(otherfolder)
slicefiles = set()
for aslice in slices:
if aslice.startswith("."):
continue
with open(otherfolder + aslice, encoding="utf-8") as f:
fl = f.readlines()
for line in fl:
line = line.rstrip()
line = utils.pairtreefile(line)
slicefiles.add(line)
print(slicefiles.intersection(validfiles))