当前位置: 首页>>代码示例>>Python>>正文


Python SonicScrewdriver.pairtreefile方法代码示例

本文整理汇总了Python中SonicScrewdriver.pairtreefile方法的典型用法代码示例。如果您正苦于以下问题:Python SonicScrewdriver.pairtreefile方法的具体用法?Python SonicScrewdriver.pairtreefile怎么用?Python SonicScrewdriver.pairtreefile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在SonicScrewdriver的用法示例。


在下文中一共展示了SonicScrewdriver.pairtreefile方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: open

# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
# Generate Cotraining Set

# This script uses a set of volumes already classified and sorted by a model
# in order to generate additional training data for a new model.

import SonicScrewdriver as utils
from shutil import copyfile

indices, columns, metadata = utils.readtsv("/Volumes/TARDIS/work/cotrain/sortedcotrain.tsv")

toget = indices[-200:]

toget = [utils.pairtreefile(x) for x in toget]

genredir = "/Volumes/TARDIS/work/cotrain/top200/genremaps/"
featuredir = "/Volumes/TARDIS/work/cotrain/top200/pagefeatures/"

for htid in toget:

	featuresource = "/Volumes/TARDIS/work/cotrain/pagefeatures/" + htid + ".pg.tsv"
	featuredestination = "/Volumes/TARDIS/work/cotrain/top200/pagefeatures/" + htid + ".pg.tsv"
	copyfile(featuresource, featuredestination)

	genresource = "/Volumes/TARDIS/work/cotrain/predictions/" + htid + ".predict"
	genredestination = "/Volumes/TARDIS/work/cotrain/top200/genremaps/" + htid + ".map"
	with open(genresource, mode="r", encoding = "utf-8") as f:
		filelines = f.readlines()

	with open(genredestination, mode="w", encoding = "utf-8") as f:
		for line in filelines:
			line = line.rstrip()
开发者ID:tedunderwood,项目名称:HathiGenreTrainingset,代码行数:33,代码来源:GenerateCotrainingSet.py

示例2: open

# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
import sys, os
import SonicScrewdriver as utils
from shutil import copyfile

with open("/Users/tunder/Dropbox/pagedata/activelearn/sourcefile.txt", mode="r", encoding="utf-8") as f:
	filelines = f.readlines()

files = list()
for line in filelines:
	files.append(line.rstrip())

with open("/Users/tunder/Dropbox/pagedata/activelearn/learn1.arff", mode="w", encoding="utf-8") as f:
	f.write("% List of files in associated folder.\n")
	f.write("% Does not really use arff format.\n")
	f.write("\n")
	f.write("@RELATION learn1\n\n")
	f.write("@ATTRIBUTE htid string\n")
	f.write("@ATTRIBUTE endpg numeric\n")
	f.write("@ATTRIBUTE startpgpart numeric\n")
	f.write("@ATTRIBUTE endpgpart numeric\n")
	f.write("@ATTRIBUTE probability numeric\n")
	f.write("\n")

	for afile in files:
		outline = utils.pairtreefile(afile) + ",0,0,0,0,0\n"
		f.write(outline)
		sourcepath = "/Volumes/TARDIS/work/cotrain/texts/" + utils.pairtreefile(afile) + ".norm.txt"
		destination = "/Users/tunder/Dropbox/pagedata/activelearn/" + utils.pairtreefile(afile) + ".txt"
		copyfile(sourcepath, destination)
		
开发者ID:tedunderwood,项目名称:HathiGenreTrainingset,代码行数:31,代码来源:arffmaker.py

示例3: dict

# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
    # I'm not repeating these columns, because the first is not useful and the second
    # is not reliable.

    outrow = [htid]
    for column in columns[1:]:
        if column not in columns_to_exclude:
            outrow.append(table[column][dirtyhtid])

    return outrow

metadata_path = '/Volumes/TARDIS/work/metadata/MergedMonographs.tsv'
rows, columns, table = utils.readtsv(metadata_path)

indextorows = dict()
for row in rows:
    cleanid = utils.pairtreefile(row)
    newrow = make_row(cleanid, row, columns, table)
    indextorows[cleanid] = newrow

for genreabbrev, genre in genrenames.items():

    print(genre)

    genrepath = os.path.join(rootpath, genre)

    volsinsubset = list()
    # Because there are some volumes in the metadata that weren't
    # included in the 95-percent subset. Those won't be present
    # as files, and shouldn't be carried forward to the next stage.
    metadataforgenre = dict()
开发者ID:tedunderwood,项目名称:HathiGenreTrainingset,代码行数:32,代码来源:enrichmetadata.py

示例4: count_words

# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
        if os.path.isfile(filepath):
            tokencount, wordcount = count_words(filepath)
        else:
            print("Missing file: " + filepath)
            sys.exit(0)
        newrow = [idcode, date, tokencount, wordcount, author, title]
        outtable.append(newrow)
        print(counter)
        counter += 1

rows, columns, table = utils.readtsv('/Users/tunder/Dropbox/GenreProject/metadata/topicmodelingsample.tsv')

sourcedir = "/Volumes/TARDIS/work/moneytexts/"

for row in rows:
    filename = utils.pairtreefile(row) + ".fic.txt"
    filepath = os.path.join(sourcedir, filename)
    if os.path.isfile(filepath):
        tokencount, wordcount = count_words(filepath)
    else:
        print("Missing file: " + filepath)
        sys.exit(0)

    idcode = table["HTid"][row]
    date = str(utils.simple_date(row, table))
    author = table["author"][row]
    title = table["title"][row]
    newrow = [idcode, date, tokencount, wordcount, author, title]
    outtable.append(newrow)
    print(counter)
    counter += 1
开发者ID:tedunderwood,项目名称:GenreProject,代码行数:33,代码来源:better_metadata_maker.py

示例5: set

# 需要导入模块: import SonicScrewdriver [as 别名]
# 或者: from SonicScrewdriver import pairtreefile [as 别名]
import os
import SonicScrewdriver as utils

folder = "/Users/tunder/Dropbox/pagedata/thirdfeatures/pagefeatures/"
files = os.listdir(folder)

validfiles = set()
for filename in files:
	if not filename.startswith(".") and len(filename) > 7:
		filename = filename[:-7]
		validfiles.add(filename)

otherfolder = "/Volumes/TARDIS/output/slices/"

slices = os.listdir(otherfolder)
slicefiles = set()

for aslice in slices:
	if aslice.startswith("."):
		continue
	with open(otherfolder + aslice, encoding="utf-8") as f:
		fl = f.readlines()
	for line in fl:
		line = line.rstrip()
		line = utils.pairtreefile(line)
		slicefiles.add(line)

print(slicefiles.intersection(validfiles))


开发者ID:tedunderwood,项目名称:HathiGenreTrainingset,代码行数:30,代码来源:findmatches.py


注:本文中的SonicScrewdriver.pairtreefile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。