本文整理匯總了Python中retriever.lib.models.Table類的典型用法代碼示例。如果您正苦於以下問題:Python Table類的具體用法?Python Table怎麽用?Python Table使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Table類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
engine = self.engine
filename = 'Aquatic_animal_excretion_data.csv'
tablename = 'aquatic_animals'
table = Table(str(tablename), delimiter=',')
table.columns = [
("index", ("pk-int",)),
("sourcenumber", ("int",)),
("sourcename", ("char",)),
("speciesname", ("char",)),
("speciescode", ("char",)),
("invert/vert", ("char",)),
("phylum", ("char",)),
("class", ("char",)),
("order", ("char",)),
("family", ("char",)),
("trophicgild", ("char",)),
("drymass", ("double",)),
("logdrymass", ("double",)),
("ecosystemtype", ("char",)),
("energysource", ("char",)),
("habitat", ("char",)),
("residentecosystem", ("char",)),
("temperature", ("double",)),
("nexcretionrate", ("double",)),
("pexcretionrate", ("double",)),
("lognexcretionrate", ("double",)),
("logpexcretionrate", ("double",)),
("incubationtime", ("double",)),
("nform", ("char",)),
("pform", ("char",)),
("bodyc", ("double",)),
("bodyn", ("double",)),
("bodyp", ("double",)),
("bodyc:n", ("double",)),
("bodyc:p", ("double",)),
("bodyn:p", ("double",)),
("bodydatasource", ("char",)),
("datasource", ("char",)),
("dataproviders", ("char",))]
engine.table = table
if not os.path.isfile(engine.format_filename(filename)):
engine.download_files_from_archive(self.urls[tablename], [filename], filetype="zip")
engine.create_table()
engine.insert_data_from_file(engine.format_filename(str(filename)))
示例2: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
engine = self.engine
taxa = ('Plant', 'Animal')
for tax in taxa:
table = Table(tax.lower() + 's', delimiter=',', header_rows = 3, pk='record_id', contains_pk=True)
columns = [("record_id" , ("pk-int",) ),
("station_id" , ("int",) ),
("obs_date" , ("char",) ),
("ind_id" , ("int",) ),
("sci_name" , ("char",) ),
("com_name" , ("char",) ),
("kingdom" , ("char",) ),
("pheno_cat" , ("char",) ),
("pheno_name" , ("char",) ),
("pheno_status" , ("char",) ),
("lat" , ("double",) ),
("lon" , ("double",) ),
("elevation" , ("int",) ),
("network_name" , ("char",) )]
table.columns = columns
engine.table = table
engine.create_table()
base_url = 'http://www.usanpn.org/getObs/observations/'
years = range(2009, 2013)
for year in years:
if year == 2009 and tax == 'Animal': continue
url = base_url + 'get%s%sDataNoDefinitions' % (year, tax)
filename = '%s_%s.csv' % (tax, year)
engine.download_file(url, filename)
engine.insert_data_from_file(engine.find_file(filename))
return engine
示例3: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
engine = self.engine
files = ["Macroplot_data_Rev.txt", "Microplot_data.txt", "Site_variables.txt", "Species_list.txt"]
engine.download_files_from_archive(self.urls["data"], files, filetype="zip")
# Create table species
engine.auto_create_table(Table('species', cleanup=self.cleanup_func_table),
filename="Species_list.txt")
engine.insert_data_from_file(engine.format_filename("Species_list.txt"))
# Create table sites
engine.auto_create_table(Table('sites', cleanup=self.cleanup_func_table),
filename="Site_variables.txt")
engine.insert_data_from_file(engine.format_filename("Site_variables.txt"))
# Create table microplots
table = Table('microplots')
table.columns = [('record_id', ('pk-auto',)), ('SpCode', ('char', '30')), ('Count', ('ct-int',))]
table.ct_names = ['BSP1', 'BSP2', 'BSP3', 'BSP4', 'BSP5', 'BSP6', 'BSP7', 'BSP8', 'BSP9',
'BSP10', 'BSP11', 'BSP12', 'BSP13', 'BSP14', 'BSP15', 'BSP16', 'BSP17',
'BSP18', 'BSP20', 'BSP21', 'BSP22', 'BSP23', 'BSP24', 'BSP25', 'BSP26',
'BSP27', 'BSP28', 'BSP29', 'BSP30', 'BSP31', 'BSP33', 'BSP34', 'BSP35',
'BSP36', 'BSP37', 'BSP41', 'BSP42', 'BSP43', 'BSP44', 'BSP45', 'BSP46',
'BSP47', 'BSP48', 'BSP49', 'BSP50', 'BSP51', 'BSP52', 'BSP53', 'BSP54',
'BSP55', 'BSP56', 'BSP57', 'BSP58', 'BSP59', 'BSP60', 'BSP61', 'BSP62',
'BSP63', 'BSP64', 'BSP65', 'BSP66', 'BSP67', 'BSP68', 'BSP69', 'BSP70',
'BSP71', 'BSP72', 'BSP73', 'BSP74', 'BSP75', 'BSP76', 'BSP78', 'BSP79',
'BSP80', 'BSP82', 'BSP83', 'BSP84', 'BSP85', 'BSP86', 'BSP87', 'BSP88',
'BSP89', 'BSP90', 'BSP91', 'BSP92', 'BSP93', 'BSP94', 'BSP95', 'BSP96',
'BSP97', 'BSP98', 'BSP99', 'BSP100', 'BSP101', 'BSP102', 'BSP104']
table.ct_column = 'PlotID'
engine.auto_create_table(table, filename="Microplot_data.txt")
engine.insert_data_from_file(engine.format_filename("Microplot_data.txt"))
# Create table microplots
table = Table('macroplots')
table.ct_names = ['TreeGirth1', 'TreeGirth2', 'TreeGirth3', 'TreeGirth4', 'TreeGirth5']
table.ct_column = 'Tree'
table.columns = [('record_id', ('pk-auto',)), ('PlotID', ('char', '20')), ('SpCode', ('char', '30')),
('Girth', ('ct-int',))]
engine.auto_create_table(table, filename="Macroplot_data_Rev.txt")
engine.insert_data_from_file(engine.format_filename("Macroplot_data_Rev.txt"))
示例4: download
def download(self, engine=None, debug=False):
try:
Script.download(self, engine, debug)
engine = self.engine
# Species table
table = Table("species", cleanup=Cleanup(), contains_pk=True,
header_rows=6)
table.columns=[("species_id", ("pk-int",) ),
("AOU", ("int",) ),
("english_common_name", ("char",50) ),
("french_common_name", ("char",50) ),
("spanish_common_name", ("char",50) ),
("sporder", ("char",30) ),
("family", ("char",30) ),
("genus", ("char",30) ),
("species", ("char",50) ),
]
table.fixed_width = [7,6,51,51,51,51,51,51,50]
engine.table = table
engine.create_table()
engine.insert_data_from_url(self.urls["species"])
# Routes table
if not os.path.isfile(engine.format_filename("routes_new.csv")):
engine.download_files_from_archive(self.urls["routes"],
["routes.csv"])
read = open(engine.format_filename("routes.csv"), "rb")
write = open(engine.format_filename("routes_new.csv"), "wb")
print "Cleaning routes data..."
write.write(read.readline())
for line in read:
values = line.split(',')
v = Decimal(values[5])
if v > 0:
values[5] = str(v * Decimal("-1"))
write.write(','.join(str(value) for value in values))
write.close()
read.close()
engine.auto_create_table(Table("routes", cleanup=Cleanup()),
filename="routes_new.csv")
engine.insert_data_from_file(engine.format_filename("routes_new.csv"))
# Weather table
if not os.path.isfile(engine.format_filename("weather_new.csv")):
engine.download_files_from_archive(self.urls["weather"],
["weather.csv"])
read = open(engine.format_filename("weather.csv"), "rb")
write = open(engine.format_filename("weather_new.csv"), "wb")
print "Cleaning weather data..."
for line in read:
values = line.split(',')
newvalues = []
for value in values:
if ':' in value:
newvalues.append(value.replace(':', ''))
elif value == "N":
newvalues.append(None)
else:
newvalues.append(value)
write.write(','.join(str(value) for value in newvalues))
write.close()
read.close()
engine.auto_create_table(Table("weather", pk="RouteDataId", cleanup=Cleanup()),
filename="weather_new.csv")
engine.insert_data_from_file(engine.format_filename("weather_new.csv"))
# Region_codes table
table = Table("region_codes", pk=False, header_rows=11,
fixed_width=[11, 11, 30])
def regioncodes_cleanup(value, engine):
replace = {chr(225):"a", chr(233):"e", chr(237):"i", chr(243):"o"}
newvalue = str(value)
for key in replace.keys():
if key in newvalue:
newvalue = newvalue.replace(key, replace[key])
return newvalue
table.cleanup = Cleanup(regioncodes_cleanup)
table.columns=[("countrynum" , ("int",) ),
("regioncode" , ("int",) ),
("regionname" , ("char",30) )]
engine.table = table
engine.create_table()
engine.insert_data_from_url(self.urls["region_codes"])
# Counts table
table = Table("counts", pk=False, delimiter=',')
table.columns=[("RouteDataID" , ("int",) ),
#.........這裏部分代碼省略.........
示例5: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
engine = self.engine
filename = 'vertnet_latest_mammals.csv'
tablename = 'mammals'
table = Table(str(tablename), delimiter=',')
table.columns = [
("record_id", ("pk-auto",)),
("beginrecord", ("char",)),
("icode", ("char",)),
("title", ("char",)),
("citation", ("char",)),
("contact", ("char",)),
("email", ("char",)),
("emlrights", ("char",)),
("gbifdatasetid", ("char",)),
("gbifpublisherid", ("char",)),
("doi", ("char",)),
("migrator", ("char",)),
("networks", ("char",)),
("orgcountry", ("char",)),
("orgname", ("char",)),
("orgstateprovince", ("char",)),
("pubdate", ("char",)),
("source_url", ("char",)),
("iptrecordid", ("char",)),
("associatedmedia", ("char",)),
("associatedoccurrences", ("char",)),
("associatedorganisms", ("char",)),
("associatedreferences", ("char",)),
("associatedsequences", ("char",)),
("associatedtaxa", ("char",)),
("bed", ("char",)),
("behavior", ("char",)),
("catalognumber", ("char",)),
("continent", ("char",)),
("coordinateprecision", ("char",)),
("coordinateuncertaintyinmeters", ("char",)),
("country", ("char",)),
("countrycode", ("char",)),
("county", ("char",)),
("dateidentified", ("char",)),
("day", ("char",)),
("decimallatitude", ("char",)),
("decimallongitude", ("char",)),
("disposition", ("char",)),
("earliestageorloweststage", ("char",)),
("earliesteonorlowesteonothem", ("char",)),
("earliestepochorlowestseries", ("char",)),
("earliesteraorlowesterathem", ("char",)),
("earliestperiodorlowestsystem", ("char",)),
("enddayofyear", ("char",)),
("establishmentmeans", ("char",)),
("eventdate", ("char",)),
("eventid", ("char",)),
("eventremarks", ("char",)),
("eventtime", ("char",)),
("fieldnotes", ("char",)),
("fieldnumber", ("char",)),
("footprintspatialfit", ("char",)),
("footprintsrs", ("char",)),
("footprintwkt", ("char",)),
("formation", ("char",)),
("geodeticdatum", ("char",)),
("geologicalcontextid", ("char",)),
("georeferencedby", ("char",)),
("georeferenceddate", ("char",)),
("georeferenceprotocol", ("char",)),
("georeferenceremarks", ("char",)),
("georeferencesources", ("char",)),
("georeferenceverificationstatus", ("char",)),
("group", ("char",)),
("habitat", ("char",)),
("highergeography", ("char",)),
("highergeographyid", ("char",)),
("highestbiostratigraphiczone", ("char",)),
("identificationid", ("char",)),
("identificationqualifier", ("char",)),
("identificationreferences", ("char",)),
("identificationremarks", ("char",)),
("identificationverificationstatus", ("char",)),
("identifiedby", ("char",)),
("individualcount", ("char",)),
("island", ("char",)),
("islandgroup", ("char",)),
("latestageorhigheststage", ("char",)),
("latesteonorhighesteonothem", ("char",)),
("latestepochorhighestseries", ("char",)),
("latesteraorhighesterathem", ("char",)),
("latestperiodorhighestsystem", ("char",)),
("lifestage", ("char",)),
("lithostratigraphicterms", ("char",)),
("locality", ("char",)),
("locationaccordingto", ("char",)),
("locationid", ("char",)),
("locationremarks", ("char",)),
("lowestbiostratigraphiczone", ("char",)),
("materialsampleid", ("char",)),
#.........這裏部分代碼省略.........
示例6: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
engine = self.engine
csv_files = []
request_src = "http://www.data-retriever.org/"
base_url = "http://www.usanpn.org/npn_portal/observations/getObservations.xml?start_date={startYear}&end_date={endYear_date}&request_src={request_src}"
header_values = ["observation_id",
"update_datetime",
"site_id",
"latitude",
"longitude",
"elevation_in_meters",
"state",
"species_id",
"genus",
"species",
"common_name",
"kingdom",
"individual_id",
"phenophase_id",
"phenophase_description",
"observation_date",
"day_of_year",
"phenophase_status",
"intensity_category_id",
"intensity_value",
"abundance_value"
]
columns = [("record_id", ("pk-auto",)),
("observation_id", ("int",)), # subsequently refered to as "status record"
("update_datetime", ("char",)),
("site_id", ("int",)),
("latitude", ("double",)),
("longitude", ("double",)),
("elevation_in_meters", ("char",)),
("state", ("char",)),
("species_id", ("int",)),
("genus", ("char",)),
("species", ("char",)),
("common_name", ("char",)),
("kingdom", ("char",)), # skip kingdom
("individual_id", ("char",)),
("phenophase_id", ("int",)),
("phenophase_description", ("char",)),
("observation_date", ("char",)),
("day_of_year", ("char",)),
("phenophase_status", ("char",)),
("intensity_category_id", ("char",)),
("intensity_value", ("char",)),
("abundance_value", ("char",))
]
start_date = datetime.date(2009, 1, 1)
end_date = datetime.date.today()
while start_date < end_date:
to_date = start_date + datetime.timedelta(90)
if to_date >= end_date:
data_url = base_url.format(startYear=str(start_date), endYear_date=str(end_date),
request_src=request_src)
else:
data_url = base_url.format(startYear=str(start_date), endYear_date=str(to_date),
request_src=request_src)
xml_file_name = '{}'.format(start_date) + ".xml"
engine.download_file(data_url, xml_file_name)
# Create csv files for 3 months
csv_observation = '{}'.format(start_date) + ".csv"
csv_files.append(csv_observation)
csv_buff = open_fw(engine.format_filename(csv_observation))
csv_writer = open_csvw(csv_buff)
csv_writer.writerow(header_values)
# Parse xml to read data
file_read = ""
fname = DATA_WRITE_PATH.strip('{dataset}') + 'NPN/' + xml_file_name
with open(fname, 'r') as fp1:
file_read = fp1.read()
root = ET.fromstring(file_read)
for elements in root:
index_map = {val: i for i, val in enumerate(header_values)}
diction = sorted(elements.attrib.items(), key=lambda pair: index_map[pair[0]])
csv_writer.writerow([x[1] for x in diction])
csv_buff.close()
start_date = to_date + datetime.timedelta(1)
# Create table
table = Table('obsercations', delimiter=',', pk='record_id', contains_pk=True)
table.columns = columns
engine.table = table
engine.create_table()
for data_file in csv_files:
engine.insert_data_from_file(engine.find_file(data_file))
#.........這裏部分代碼省略.........
示例7: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
reload(sys)
if hasattr(sys, 'setdefaultencoding'):
sys.setdefaultencoding("utf-8")
self.engine.download_file(self.urls["GWDD"], "GlobalWoodDensityDatabase.xls")
filename = os.path.basename("GlobalWoodDensityDatabase.xls")
book = xlrd.open_workbook(self.engine.format_filename(filename))
sh = book.sheet_by_index(1)
rows = sh.nrows
# Creating data files
file_path = self.engine.format_filename("gwdd_data.csv")
gwdd_data = open_fw(file_path)
csv_writer = open_csvw(gwdd_data)
csv_writer.writerow(["Number", "Family", "Binomial", "Wood_Density", "Region", "Reference_Number"])
for index in range(1, rows):
row = sh.row(index)
# get each row and format the sell value.
row_as_list = [to_str(column_value.value) for column_value in row]
csv_writer.writerow(row_as_list)
gwdd_data.close()
table = Table("data", delimiter=",")
table.columns = [("Number", ("pk-int",)),
("Family", ("char",)),
("Binomial", ("char",)),
("Wood_Density", ("double",)),
("Region", ("char",)),
("Reference_Number", ("int",))]
table.pk = 'Number'
table.contains_pk = True
self.engine.table = table
self.engine.create_table()
self.engine.insert_data_from_file(engine.format_filename(file_path))
# Creating reference tale file
file_path = self.engine.format_filename("gwdd_ref.csv")
ref_file = open_fw(file_path)
csv_writerd = open_csvw(ref_file)
csv_writerd.writerow(["Reference_Number", "Reference"])
sh = book.sheet_by_index(2)
rows = sh.nrows
for index in range(1, rows):
row = sh.row(index)
# get each row and format the sell value.
row_as_list = [to_str(column_value.value, object_encoding=sys.stdout) for column_value in row]
csv_writerd.writerow(row_as_list)
ref_file.close()
table = Table("reference", delimiter=",")
table.columns = [("Reference_Number", ("pk-int",)), ("Reference", ("char",))]
table.pk = 'Reference_Number'
table.contains_pk = True
self.engine.table = table
self.engine.create_table()
self.engine.insert_data_from_file(engine.format_filename(file_path))
return self.engine
示例8: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
self.engine.auto_create_table(Table("sites"), url=self.urls["sites"])
self.engine.insert_data_from_url(self.urls["sites"])
self.engine.download_file(self.urls["stems"], "all_Excel.zip")
local_zip = zipfile.ZipFile(self.engine.format_filename("all_Excel.zip"))
filelist = local_zip.namelist()
local_zip.close()
self.engine.download_files_from_archive(self.urls["stems"], filelist)
filelist = [os.path.basename(filename) for filename in filelist]
lines = []
tax = []
for filename in filelist:
print "Extracting data from " + filename + "..."
book = xlrd.open_workbook(self.engine.format_filename(filename))
sh = book.sheet_by_index(0)
rows = sh.nrows
cn = {'stems': []}
n = 0
for c in sh.row(0):
if not Excel.empty_cell(c):
cid = Excel.cell_value(c).lower()
# line number column is sometimes named differently
if cid in ["sub", "number"]:
cid = "line"
# the "number of individuals" column is named in various
# different ways; they always at least contain "nd"
if "nd" in cid:
cid = "count"
# if column is a stem, add it to the list of stems;
# otherwise, make note of the column name/number
if "stem" in cid:
cn["stems"].append(n)
else:
cn[cid] = n
n += 1
# sometimes, a data file does not contain a liana or count column
if not "liana" in cn.keys():
cn["liana"] = -1
if not "count" in cn.keys():
cn["count"] = -1
for i in range(1, rows):
row = sh.row(i)
cellcount = len(row)
# make sure the row is real, not just empty cells
if cellcount > 4 and not Excel.empty_cell(row[0]):
try:
this_line = {}
def format_value(s):
s = Excel.cell_value(s)
return str(s).title().replace("\\", "/").replace('"', '')
# get the following information from the appropriate columns
for i in ["line", "family", "genus", "species",
"liana", "count"]:
if cn[i] > -1:
this_line[i] = format_value(row[cn[i]])
if this_line[i] == '`':
this_line[i] = 1
this_line["stems"] = [Excel.cell_value(row[c])
for c in cn["stems"]
if not Excel.empty_cell(row[c])]
this_line["site"] = filename[0:-4]
lines.append(this_line)
# Check how far the species is identified
full_id = 0
if len(this_line["species"]) < 3:
if len(this_line["genus"]) < 3:
id_level = "family"
else:
id_level = "genus"
else:
id_level = "species"
full_id = 1
tax.append((this_line["family"],
this_line["genus"],
this_line["species"].lower().replace('\\', '').replace('"', ''),
id_level,
str(full_id)))
except:
raise
pass
tax = sorted(tax, key=lambda group: group[0] + " " + group[1] + " " + group[2])
unique_tax = []
tax_dict = dict()
tax_count = 0
# Get all unique families/genera/species
for group in tax:
if not (group in unique_tax):
unique_tax.append(group)
#.........這裏部分代碼省略.........
示例9: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
engine = self.engine
filenames = ['Aquatic_animal_excretion_data.csv',
'Aquatic_animal_excretion_variable_descriptions.csv']
for file_paths in filenames:
if not os.path.isfile(engine.format_filename(file_paths)):
url = self.urls["aquatic_animals"]
engine.download_files_from_archive(url, filenames, "zip")
# processing Aquatic_animal_excretion_data.csv
filename = 'Aquatic_animal_excretion_data.csv'
tablename = 'aquatic_animals'
table = Table(str(tablename), delimiter=',')
table.columns = [
("index", ("pk-int",)),
("sourcenumber", ("int",)),
("sourcename", ("char",)),
("speciesname", ("char",)),
("speciescode", ("char",)),
("invert/vert", ("char",)),
("phylum", ("char",)),
("class", ("char",)),
("order", ("char",)),
("family", ("char",)),
("trophicgild", ("char",)),
("drymass", ("double",)),
("logdrymass", ("double",)),
("ecosystemtype", ("char",)),
("energysource", ("char",)),
("habitat", ("char",)),
("residentecosystem", ("char",)),
("temperature", ("double",)),
("nexcretionrate", ("double",)),
("pexcretionrate", ("double",)),
("lognexcretionrate", ("double",)),
("logpexcretionrate", ("double",)),
("incubationtime", ("double",)),
("nform", ("char",)),
("pform", ("char",)),
("bodyc", ("double",)),
("bodyn", ("double",)),
("bodyp", ("double",)),
("bodyc:n", ("double",)),
("bodyc:p", ("double",)),
("bodyn:p", ("double",)),
("bodydatasource", ("char",)),
("datasource", ("char",)),
("dataproviders", ("char",))]
engine.table = table
engine.create_table()
engine.insert_data_from_file(engine.format_filename(str(filename)))
# processing Aquatic_animal_excretion_variable_descriptions.csv
filename = 'Aquatic_animal_excretion_variable_descriptions.csv'
tablename = 'variable_descriptions'
table = Table(str(tablename), delimiter=',')
table.columns = [
("Column", ("char",)),
("Variable", ("char",)),
("Description", ("char",)),
("Data Class", ("char",)),
("Units", ("char",)),
("Minimum_value", ("char",)),
("Maximum_value", ("char",)),
("Possible_values", ("char",)),
("Missing_data_symbol", ("char",)),
("Notes", ("char",))]
engine.table = table
engine.create_table()
engine.insert_data_from_file(engine.format_filename(str(filename)))
示例10: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
self.engine.auto_create_table(Table("sites"), url=self.urls["sites"], filename='gentry_sites.csv')
self.engine.insert_data_from_url(self.urls["sites"])
self.engine.download_file(self.urls["stems"], "all_Excel.zip")
local_zip = zipfile.ZipFile(self.engine.format_filename("all_Excel.zip"))
filelist = local_zip.namelist()
local_zip.close()
self.engine.download_files_from_archive(self.urls["stems"], filelist)
filelist = [os.path.basename(filename) for filename in filelist]
# Currently all_Excel.zip is missing CURUYUQU.xls
# Download it separately and add it to the file list
if not self.engine.find_file('CURUYUQU.xls'):
self.engine.download_file("http://www.mobot.org/mobot/gentry/123/samerica/CURUYUQU.xls", "CURUYUQU.xls")
filelist.append('CURUYUQU.xls')
lines = []
tax = []
for filename in filelist:
print("Extracting data from " + filename + "...")
book = xlrd.open_workbook(self.engine.format_filename(filename))
sh = book.sheet_by_index(0)
rows = sh.nrows
cn = {'stems': []}
n = 0
for colnum, c in enumerate(sh.row(0)):
if not Excel.empty_cell(c):
cid = c.value.lower().strip()
# line number column is sometimes named differently
if cid in ["sub", "number"]:
cid = "line"
# the "number of individuals" column is named in various
# different ways; they always at least contain "nd"
if "nd" in cid:
cid = "count"
# in QUIAPACA.xls the "number of individuals" column is
# misnamed "STEMDBH" just like the stems columns, so weep
# for the state of scientific data and then fix manually
if filename == "QUIAPACA.xls" and colnum == 13:
cid = "count"
# if column is a stem, add it to the list of stems;
# otherwise, make note of the column name/number
if "stem" in cid or "dbh" in cid:
cn["stems"].append(n)
else:
cn[cid] = n
n += 1
# sometimes, a data file does not contain a liana or count column
if not "liana" in list(cn.keys()):
cn["liana"] = -1
if not "count" in list(cn.keys()):
cn["count"] = -1
for i in range(1, rows):
row = sh.row(i)
cellcount = len(row)
# make sure the row is real, not just empty cells
if not all(Excel.empty_cell(cell) for cell in row):
try:
this_line = {}
# get the following information from the appropriate columns
for i in ["line", "family", "genus", "species",
"liana", "count"]:
if cn[i] > -1:
if row[cn[i]].ctype != 2:
# if the cell type(ctype) is not a number
this_line[i] = row[cn[i]].value.lower().strip().replace("\\", "/").replace('"', '')
else:
this_line[i] = row[cn[i]].value
if this_line[i] == '`':
this_line[i] = 1
this_line["stems"] = [row[c]
for c in cn["stems"]
if not Excel.empty_cell(row[c])]
this_line["site"] = filename[0:-4]
# Manually correct CEDRAL data, which has a single line
# that is shifted by one to the left starting at Liana
if this_line["site"] == "CEDRAL" and type(this_line["liana"]) == float:
this_line["liana"] = ""
this_line["count"] = 3
this_line["stems"] = [2.5, 2.5, 30, 18, 25]
lines.append(this_line)
# Check how far the species is identified
full_id = 0
if len(this_line["species"]) < 3:
if len(this_line["genus"]) < 3:
id_level = "family"
else:
id_level = "genus"
else:
id_level = "species"
full_id = 1
#.........這裏部分代碼省略.........
示例11: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
engine = self.engine
filename = "database.csv"
tablename = "predicts_main"
table = Table(str(tablename), delimiter=',')
table.columns = [("Source_ID", ("char",)),
("Reference", ("char",)),
("Study_number", ("int",)),
("Study_name", ("char",)),
("SS", ("char",)),
("Diversity_metric", ("char",)),
("Diversity_metric_unit", ("char",)),
("Diversity_metric_type", ("char",)),
("Diversity_metric_is_effort_sensitive", ("char",)),
("Diversity_metric_is_suitable_for_Chao", ("char",)),
("Sampling_method", ("char",)),
("Sampling_effort_unit", ("char",)),
("Study_common_taxon", ("char",)),
("Rank_of_study_common_taxon", ("char",)),
("Site_number", ("int",)),
("Site_name", ("char",)),
("Block", ("char",)),
("SSS", ("char",)),
("SSB", ("char",)),
("SSBS", ("char",)),
("Sample_start_earliest", ("char",)),
("Sample_end_latest", ("char",)),
("Sample_midpoint", ("char",)),
("Sample_date_resolution", ("char",)),
("Max_linear_extent_metres", ("double",)),
("Habitat_patch_area_square_metres", ("double",)),
("Sampling_effort", ("double",)),
("Rescaled_sampling_effort", ("double",)),
("Habitat_as_described", ("char",)),
("Predominant_land_use", ("char",)),
("Source_for_predominant_land_use", ("char",)),
("Use_intensity", ("char",)),
("Km_to_nearest_edge_of_habitat", ("double",)),
("Years_since_fragmentation_or_conversion", ("double",)),
("Transect_details", ("char",)),
("Coordinates_method", ("char",)),
("Longitude", ("double",)),
("Latitude", ("double",)),
("Country_distance_metres", ("double",)),
("Country", ("char")),
("UN_subregion", ("char",)),
("UN_region", ("char",)),
("Ecoregion_distance_metres", ("double",)),
("Ecoregion", ("char",)),
("Biome", ("char",)),
("Realm", ("char",)),
("Hotspot", ("char",)),
("Wilderness_area", ("char",)),
("N_samples", ("double",)),
("Taxon_number", ("double",)),
("Taxon_name_entered", ("char",)),
("Indication", ("char",)),
("Parsed_name", ("char",)),
("Taxon", ("char",)),
("COL_ID", ("double",)),
("Name_status", ("char",)),
("Rank", ("char",)),
("Kingdom", ("char",)),
("Phylum", ("char",)),
("Class", ("char",)),
("Order", ("char",)),
("Family", ("char",)),
("Genus", ("char",)),
("Species", ("char",)),
("Best_guess_binomial", ("char",)),
("Higher_taxa", ("char",)),
("Higher_taxon", ("char",)),
("Measurement", ("double",)),
("Effort_corrected_measurement", ("double",))]
engine.table = table
if not os.path.isfile(engine.format_filename(filename)):
engine.download_files_from_archive(self.urls["PREDICTS"],
[filename],
"zip",
False,
"download.zip")
engine.create_table()
engine.insert_data_from_file(engine.format_filename(str(filename)))
示例12: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
self.engine.download_file(self.urls["GWDD"], "GlobalWoodDensityDatabase.xls")
filename = os.path.basename("GlobalWoodDensityDatabase.xls")
book = xlrd.open_workbook(self.engine.format_filename(filename))
sh = book.sheet_by_index(1)
rows = sh.nrows
#Creating data table
lines = []
for i in range(1, rows):
row = sh.row(i)
if not all(Excel.empty_cell(cell) for cell in row):
this_line = {}
def format_value(s):
s = Excel.cell_value(s)
return str(s).title().replace("\\", "/").replace('"', '')
for num, label in enumerate(["Number", "Family", "Binomial", "Wood_Density",
"Region", "Reference_Number"]):
this_line[label] = format_value(row[num])
lines.append(this_line)
table = Table("data", delimiter="\t")
table.columns=[("Number" , ("pk-int",) ),
("Family" , ("char",) ),
("Binomial" , ("char",) ),
("Wood_Density" , ("double",) ),
("Region" , ("char",) ),
("Reference_Number" , ("int",) )]
table.pk = 'Number'
table.contains_pk = True
gwdd = []
for line in lines:
gwdd_data = [line["Number"],
line["Family"],
line["Binomial"],
line["Wood_Density"],
line["Region"],
line["Reference_Number"]]
gwdd.append(gwdd_data)
data = ['\t'.join(gwdd_line) for gwdd_line in gwdd]
self.engine.table = table
self.engine.create_table()
self.engine.add_to_table(data)
#Creating reference table
lines = []
sh = book.sheet_by_index(2)
rows = sh.nrows
for i in range(1, rows):
row = sh.row(i)
if not all(Excel.empty_cell(cell) for cell in row):
this_line = {}
def format_value(s):
s = Excel.cell_value(s)
return str(s).title().replace("\\", "/").replace('"', '')
for num, label in enumerate(["Reference_Number", "Reference"]):
this_line[label] = format_value(row[num])
lines.append(this_line)
table = Table("reference", delimiter="\t")
table.columns=[("Reference_Number" , ("pk-int",) ),
("Reference" , ("char",) )]
table.pk = 'Reference_Number'
table.contains_pk = True
gwdd = []
for line in lines:
gwdd_ref = [line["Reference_Number"],
line["Reference"]]
gwdd.append(gwdd_ref)
data = ['\t'.join(gwdd_line) for gwdd_line in gwdd]
self.engine.table = table
self.engine.create_table()
self.engine.add_to_table(data)
return self.engine
示例13: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
engine = self.engine
engine.download_files_from_archive(self.urls["capture"], archive_type="zip")
# Convert xlsx to csv.
xlsx_file = self.engine.format_filename("DSD_FI_CAPTURE.xlsx")
file_path = self.engine.format_filename("DSD_CAPTURE.csv")
book = xlrd.open_workbook(xlsx_file)
sh = book.sheet_by_index(0)
rows = sh.nrows
# Creating data files
new_data = open_fw(file_path)
csv_writer = open_csvw(new_data)
csv_writer.writerow(["Order", "Concept_id",
"Role_Type", "Codelist_id",
"Codelist_Code_id", "Description"])
for index in range(2, rows):
row = sh.row(index)
# Get each row and format the sell value.
# Data starts at index 2
row_as_list = [to_str(column_value.value) for column_value in row]
csv_writer.writerow(row_as_list)
new_data.close()
file_names = [
('CL_FI_UNIT.csv', 'unit_data'),
('CL_FI_WATERAREA_GROUPS.csv', 'waterarea_groups'),
('DSD_CAPTURE.csv', 'dsd_capture_data'),
('CL_FI_SPECIES_GROUPS.csv', 'species_group')
]
for (filename, tablename) in file_names:
data_path = self.engine.format_filename(filename)
table = Table(tablename, delimiter=',', cleanup=self.cleanup_func_table)
self.engine.auto_create_table(table, filename=filename)
self.engine.insert_data_from_file(data_path)
# File CL_FI_COUNTRY_GROUPS.csv has multi encoding
file_names_encoded = [
('CL_FI_COUNTRY_GROUPS.csv', 'country_groups'),
]
for (filename, tablename) in file_names_encoded:
data_path = self.engine.format_filename(filename)
table = Table(tablename, delimiter=',', cleanup=self.cleanup_func_table)
table.columns = [('UN_Code', ('int', )),
('Identifier', ('int', )),
('ISO2_Code', ('char', '5')),
('ISO3_Code', ('char', '5')),
('Name_En', ('char', '50')),
('Name_Fr', ('char', '50')),
('Name_Es', ('char', '50')),
('Name_Ar', ('char', '120')),
('Name_Cn', ('char', '90')),
('Name_Ru', ('char', '150')),
('Official_Name_En', ('char', '70')),
('Official_Name_Fr', ('char', '70')),
('Official_Name_Es', ('char', '70')),
('Official_Name_Ar', ('char', '1100')),
('Official_Name_Cn', ('char', '70')),
('Official_Name_Ru', ('char', '130')),
('Continent_Group', ('char', '15')),
('EcoClass_Group', ('char', '50')),
('GeoRegion_Group', ('char', '30'))]
self.engine.auto_create_table(table, filename=filename)
self.engine.insert_data_from_file(data_path)
# TS_FI_CAPTURE is
file_names_encoded = [
('TS_FI_CAPTURE.csv', 'ts_capture_data',)
]
for (filename, tablename) in file_names_encoded:
data_path = self.engine.format_filename(filename)
table = Table(tablename, delimiter=',', cleanup=self.cleanup_func_table)
table.columns = [('COUNTRY', ('int', )),
('FISHING_AREA', ('int', )),
('SPECIES', ('char', '10')),
('YEAR', ('int', )),
('UNIT', ('char', '5')),
('QUANTITY', ('double', )),
('SYMBOL', ('char', '4'))]
self.engine.auto_create_table(table, filename=filename)
self.engine.insert_data_from_file(data_path)
示例14: download
def download(self, engine=None, debug=False):
Script.download(self, engine, debug)
engine = self.engine
engine.download_files_from_archive(self.urls["data"], ["Data_Files/Amniote_Database_Aug_2015.csv",
"Data_Files/Amniote_Database_References_Aug_2015.csv",
"Data_Files/Amniote_Range_Count_Aug_2015.csv"],
filetype="zip")
ct_column = 'trait' # all tables use the same ct_column name
# Create tables from Amniote_Database_Aug.csv and Amniote_Database_References_Aug_2015.csv
# Both reference and main have the same headers
ct_names = ['female_maturity_d', 'litter_or_clutch_size_n', 'litters_or_clutches_per_y', 'adult_body_mass_g',
'maximum_longevity_y', 'gestation_d', 'weaning_d', 'birth_or_hatching_weight_g', 'weaning_weight_g',
'egg_mass_g', 'incubation_d', 'fledging_age_d', 'longevity_y', 'male_maturity_d',
'inter_litter_or_interbirth_interval_y', 'female_body_mass_g', 'male_body_mass_g',
'no_sex_body_mass_g', 'egg_width_mm', 'egg_length_mm', 'fledging_mass_g', 'adult_svl_cm',
'male_svl_cm', 'female_svl_cm', 'birth_or_hatching_svl_cm', 'female_svl_at_maturity_cm',
'female_body_mass_at_maturity_g', 'no_sex_svl_cm', 'no_sex_maturity_d']
# Create table main from Amniote_Database_Aug_2015.csv
columns = [
('record_id', ('pk-auto',)), ('class', ('char', '20')), ('order', ('char', '20')),
('family', ('char', '20')), ('genus', ('char', '20')), ('species', ('char', '50')),
('subspecies', ('char', '20')), ('common_name', ('char', '400')), ('trait_value', ('ct-double',))]
table_main = Table('main', delimiter=',', cleanup=self.cleanup_func_table)
table_main.ct_column = ct_column
table_main.ct_names = ct_names
table_main.columns = columns
engine.auto_create_table(table_main,
filename="Amniote_Database_Aug_2015.csv")
engine.insert_data_from_file(engine.format_filename("Amniote_Database_Aug_2015.csv"))
# Create table reference from Amniote_Database_References_Aug_2015.csv
reference_columns = [
('record_id', ('pk-auto',)), ('class', ('char', '20')), ('order', ('char', '20')),
('family', ('char', '20')), ('genus', ('char', '20')), ('species', ('char', '50')),
('subspecies', ('char', '20')), ('common_name', ('char', '400')), ('reference', ('ct-char',))]
table_references = Table('references', delimiter=',', cleanup=self.cleanup_func_table)
table_references.ct_column = ct_column
table_references.ct_names = ct_names
table_references.columns = reference_columns
engine.auto_create_table(table_references,
filename="Amniote_Database_References_Aug_2015.csv")
engine.insert_data_from_file(engine.format_filename("Amniote_Database_References_Aug_2015.csv"))
# Create table Range
# This table has different values for headers from the above tables.
range_ct_names = ["min_female_maturity", "max_female_maturity", "count_female_maturity", "min_litter_clutch_size",
"max_litter_clutch_size", "count_litter_clutch_size", "min_litters_clutches",
"max_litters_clutches", "count_litters_clutches", "min_adult_body_mass", "max_adult_body_mass",
"count_adult_body_mass", "min_maximum_longevity", "max_maximum_longevity",
"count_maximum_longevity", "min_gestation", "max_gestation", "count_gestation", "min_weaning",
"max_weaning", "count_weaning", "min_birth_hatching_weight", "max_birth_hatching_weight",
"count_birth_hatching_weight", "min_weaning_weight", "max_weaning_weight", "count_weaning_weight",
"min_egg_mass", "max_egg_mass", "count_egg_mass", "min_incubation", "max_incubation",
"count_incubation", "min_fledging_age", "max_fledging_age", "count_fledging_age",
"min_male_maturity", "max_male_maturity", "count_male_maturity",
"min_inter_litter_interbirth_interval", "max_inter_litter_interbirth_interval",
"count_inter_litter_interbirth_interval", "min_female_body_mass", "max_female_body_mass",
"count_female_body_mass", "min_male_body_mass", "max_male_body_mass", "count_male_body_mass",
"min_no_sex_body_mass", "max_no_sex_body_mass", "count_no_sex_body_mass", "min_egg_width",
"max_egg_width", "count_egg_width", "min_egg_length", "max_egg_length", "count_egg_length",
"min_fledging_mass", "max_fledging_mass", "count_fledging_mass", "min_adult_svl", "max_adult_svl",
"count_adult_svl", "min_male_svl", "max_male_svl", "count_male_svl", "min_female_svl",
"max_female_svl", "count_female_svl", "min_hatching_svl", "max_hatching_svl", "count_hatching_svl",
"min_female_svl_at_maturity", "max_female_svl_at_maturity", "count_female_svl_at_maturity",
"min_female_body_mass_at_maturity", "max_female_body_mass_at_maturity",
"count_female_body_mass_at_maturity", "min_no_sex_svl", "max_no_sex_svl", "count_no_sex_svl",
"min_no_sex_maturity", "max_no_sex_maturity", "count_no_sex_maturity"]
range_columns = [
('record_id', ('pk-auto',)), ('classx', ('char', '20')), ('orderx', ('char', '20')),
('familyx', ('char', '20')), ('genus', ('char', '20')), ('species', ('char', '50')),
('subspecies', ('char', '20')), ('common_name', ('char', '400')), ('trait_value', ('ct-double',))]
table_range = Table('range', delimiter=',', cleanup=self.cleanup_func_table)
table_range.ct_column = ct_column
table_range.ct_names = range_ct_names
table_range.columns = range_columns
engine.auto_create_table(table_range,
filename="Amniote_Range_Count_Aug_2015.csv")
engine.insert_data_from_file(engine.format_filename("Amniote_Range_Count_Aug_2015.csv"))
示例15: download
def download(self, engine=None, debug=False):
try:
Script.download(self, engine, debug)
engine = self.engine
# Routes table
if not os.path.isfile(engine.format_filename("routes_new.csv")):
engine.download_files_from_archive(self.urls["routes"],
["routes.csv"])
read = open(engine.format_filename("routes.csv"), "rb")
write = open(engine.format_filename("routes_new.csv"), "wb")
print "Cleaning routes data..."
write.write(read.readline())
for line in read:
values = line.split(',')
v = Decimal(values[5])
if v > 0:
values[5] = str(v * Decimal("-1"))
write.write(','.join(str(value) for value in values))
write.close()
read.close()
engine.auto_create_table(Table("routes", cleanup=Cleanup()),
filename="routes_new.csv")
engine.insert_data_from_file(engine.format_filename("routes_new.csv"))
# Weather table
if not os.path.isfile(engine.format_filename("weather_new.csv")):
engine.download_files_from_archive(self.urls["weather"],
["weather.csv"])
read = open(engine.format_filename("weather.csv"), "rb")
write = open(engine.format_filename("weather_new.csv"), "wb")
print "Cleaning weather data..."
for line in read:
values = line.split(',')
newvalues = []
for value in values:
if ':' in value:
newvalues.append(value.replace(':', ''))
elif value == "N":
newvalues.append(None)
else:
newvalues.append(value)
write.write(','.join(str(value) for value in newvalues))
write.close()
read.close()
engine.auto_create_table(Table("weather", pk="RouteDataId", cleanup=Cleanup()),
filename="weather_new.csv")
engine.insert_data_from_file(engine.format_filename("weather_new.csv"))
# Species table
table = Table("species", pk=False, delimiter=',')
table.columns=[("species_id" , ("pk-auto",) ),
("AOU" , ("int",) ),
("genus" , ("char",30) ),
("species" , ("char",50) ),
("subspecies" , ("char",30) ),
("id_to_species" , ("bool",) )]
engine.table = table
engine.create_table()
engine.download_file(self.urls["species"], "SpeciesList.txt")
species_list = open(engine.format_filename("SpeciesList.txt"), "rb")
for n in range(8):
species_list.readline()
rows = []
for line in species_list:
if line and len(line) > 273:
latin_name = line[273:].split()
if len(latin_name) < 2:
# If there's no species given, add "None" value
latin_name.append("None")
subspecies = ' '.join(latin_name[2:]) if len(latin_name) > 2 else "None"
id_to_species = "1" if latin_name[1] != "None" else "0"
if latin_name[1] == "sp.":
latin_name[1] = "None"
id_to_species = "0"
if ("x" in latin_name or "/" in latin_name
or "/" in subspecies or "or" in latin_name):
# Hybrid species or only identified to a group of species
latin_name[1] = ' '.join(latin_name[1:])
subspecies = "None"
id_to_species = "0"
rows.append(','.join([
line.split()[1],
latin_name[0],
latin_name[1],
subspecies,
id_to_species
]))
#.........這裏部分代碼省略.........