本文整理汇总了Python中Orange.data.StringVariable.make方法的典型用法代码示例。如果您正苦于以下问题:Python StringVariable.make方法的具体用法?Python StringVariable.make怎么用?Python StringVariable.make使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Orange.data.StringVariable
的用法示例。
在下文中一共展示了StringVariable.make方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: concatenate_data
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def concatenate_data(tables, filenames, label):
domain, xs = domain_union_for_spectra(tables)
ntables = [(table if isinstance(table, Table) else table[2]).transform(domain)
for table in tables]
data = type(ntables[0]).concatenate(ntables, axis=0)
source_var = StringVariable.make("Filename")
label_var = StringVariable.make("Label")
# add other variables
xs_atts = tuple([ContinuousVariable.make("%f" % f) for f in xs])
domain = Domain(xs_atts + domain.attributes, domain.class_vars,
domain.metas + (source_var, label_var))
data = data.transform(domain)
# fill in spectral data
xs_sind = np.argsort(xs)
xs_sorted = xs[xs_sind]
pos = 0
for table in tables:
t = table if isinstance(table, Table) else table[2]
if not isinstance(table, Table):
indices = xs_sind[np.searchsorted(xs_sorted, table[0])]
data.X[pos:pos+len(t), indices] = table[1]
pos += len(t)
data[:, source_var] = np.array(list(
chain(*(repeat(fn, len(table))
for fn, table in zip(filenames, ntables)))
)).reshape(-1, 1)
data[:, label_var] = np.array(list(
chain(*(repeat(label, len(table))
for fn, table in zip(filenames, ntables)))
)).reshape(-1, 1)
return data
示例2: test_nyt_corpus_domain_generation
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def test_nyt_corpus_domain_generation(self):
corpus = self.nyt.run_query('slovenia')
meta_vars = [StringVariable.make(field) for field in NYT_TEXT_FIELDS] + \
[StringVariable.make('pub_date'), StringVariable.make('country')]
self.assertEqual(len(meta_vars), len(corpus.domain.metas))
self.assertEqual(len(corpus.Y), 10)
示例3: parse_record_json
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def parse_record_json(record, includes_metadata):
"""
Parses the JSON representation of the record returned by the New York Times Article API.
:param record: The JSON representation of the query's results.
:param includes_metadata: The flags that determine which fields to include.
:return: A list of articles parsed into documents and a list of the
corresponding metadata, joined in a tuple.
"""
text_fields = ["headline", "lead_paragraph", "snippet", "abstract", "keywords"]
documents = []
class_values = []
meta_vars = [StringVariable.make(field) for field, flag in zip(text_fields, includes_metadata) if flag]
# Also add pub_date and glocation.
meta_vars += [StringVariable.make("pub_date"), StringVariable.make("country")]
metadata = np.empty((0, len(meta_vars)), dtype=object)
for doc in record["response"]["docs"]:
string_document = ""
metas_row = []
for field, flag in zip(text_fields, includes_metadata):
if flag and field in doc:
field_value = ""
if isinstance(doc[field], dict):
field_value = " ".join([val for val in doc[field].values() if val])
elif isinstance(doc[field], list):
field_value = " ".join([kw["value"] for kw in doc[field] if kw])
else:
if doc[field]:
field_value = doc[field]
string_document += field_value
metas_row.append(field_value)
# Add the pub_date.
field_value = ""
if "pub_date" in doc and doc["pub_date"]:
field_value = doc["pub_date"]
metas_row.append(field_value)
# Add the glocation.
metas_row.append(",".join([kw["value"] for kw in doc["keywords"] if kw["name"] == "glocations"]))
# Add the section_name.
class_val = ""
if "section_name" in doc and doc["section_name"]:
class_val = doc["section_name"]
documents.append(string_document)
class_values.append(class_val)
metadata = np.vstack((metadata, np.array(metas_row)))
return documents, metadata, meta_vars, class_values
示例4: generate_corpus
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def generate_corpus(self, url_list):
"""
generate new corpus with values requested by user
:param url_list:
:return: corpus
"""
new_table=None
text_includes_params = [self.includes_article, self.includes_author, self.includes_date,
self.includes_title, self.includes_web_url]
if True not in text_includes_params:
self.warning(1, "You must select at least one text field.")
return
required_text_fields = [incl_field for yes, incl_field in zip(text_includes_params, ARTICLE_TEXT_FIELDS) if yes]
meta_vars = [StringVariable.make(field) for field in required_text_fields]
metadata=[]
for url in url_list:
info, is_cached =_get_info(url)
final_fields = [incl_field for yes, incl_field in zip(text_includes_params, info) if yes]
metadata.append(final_fields)
metadata = np.array(metadata, dtype=object)
metas=metadata
domain = Domain([], class_vars=None, metas=(meta_vars))
new_table = Corpus(None, None, metadata, domain, meta_vars)
self.output_corpus=new_table
self.send("Corpus",self.output_corpus)
示例5: _corpus_from_records
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def _corpus_from_records(records, includes_metadata):
"""Receives PubMed records and transforms them into a corpus.
Args:
records (list): A list of PubMed entries.
includes_metadata (list): A list of text fields to include.
Returns:
corpus: The output Corpus.
"""
meta_values, class_values = _records_to_corpus_entries(
records,
includes_metadata=includes_metadata
)
meta_vars = []
for field_name, _ in includes_metadata:
if field_name == 'pub_date':
meta_vars.append(TimeVariable(field_name))
else:
meta_vars.append(StringVariable.make(field_name))
class_vars = [
DiscreteVariable('section_name', values=list(set(class_values)))
]
domain = Domain([], class_vars=class_vars, metas=meta_vars)
Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None]
return Corpus(domain=domain, Y=Y, metas=meta_values)
示例6: _create_corpus
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def _create_corpus(self):
corpus = None
names = ["name", "path", "content"]
data = []
category_data = []
text_categories = list(set(t.category for t in self._text_data))
values = list(set(text_categories))
category_var = DiscreteVariable.make("category", values=values)
for textdata in self._text_data:
data.append(
[textdata.name,
textdata.path,
textdata.content]
)
category_data.append(category_var.to_val(textdata.category))
if len(text_categories) > 1:
category_data = np.array(category_data)
else:
category_var = []
category_data = np.empty((len(data), 0))
domain = Domain(
[], category_var, [StringVariable.make(name) for name in names]
)
domain["name"].attributes["title"] = True
data = np.array(data, dtype=object)
if len(data):
corpus = Corpus(domain,
Y=category_data,
metas=data,
text_features=[domain.metas[2]])
return corpus
示例7: _generate_corpus
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def _generate_corpus(records, required_text_fields):
"""
Generates a corpus from the input NYT records.
:param records: The input NYT records.
:type records: list
:param required_text_fields: A list of the available NYT text fields.
:type required_text_fields: list
:return: :class: `orangecontrib.text.corpus.Corpus`
"""
metas, class_values = _parse_record_json(records, required_text_fields)
documents = []
for doc in metas:
documents.append(" ".join([d for d in doc if d is not None]).strip())
# Create domain.
meta_vars = [StringVariable.make(field) for field in required_text_fields]
meta_vars += [StringVariable.make("pub_date"), StringVariable.make("country")]
class_vars = [DiscreteVariable("section_name", values=list(set(class_values)))]
domain = Domain([], class_vars=class_vars, metas=meta_vars)
Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None]
return Corpus(documents, None, Y, metas, domain)
示例8: test_domaineditor_makes_variables
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def test_domaineditor_makes_variables(self):
# Variables created with domain editor should be interchangeable
# with variables read from file.
dat = """V0\tV1\nc\td\n\n1.0\t2"""
v0 = StringVariable.make("V0")
v1 = ContinuousVariable.make("V1")
with named_file(dat, suffix=".tab") as filename:
self.open_dataset(filename)
model = self.widget.domain_editor.model()
model.setData(model.createIndex(0, 1), "text", Qt.EditRole)
model.setData(model.createIndex(1, 1), "numeric", Qt.EditRole)
self.widget.apply_button.click()
data = self.get_output(self.widget.Outputs.data)
self.assertEqual(data.domain["V0"], v0)
self.assertEqual(data.domain["V1"], v1)
示例9: transpose_table
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def transpose_table(table):
"""
Transpose the rows and columns of the table.
Args:
table: Data in :obj:`Orange.data.Table`
Returns:
Transposed :obj:`Orange.data.Table`. (Genes as columns)
"""
attrs = table.domain.attributes
attr = [ContinuousVariable.make(ex['Gene'].value) for ex in table]
# Set metas
new_metas = [StringVariable.make(name) if name is not 'Time' else TimeVariable.make(name)
for name in sorted(table.domain.variables[0].attributes.keys())]
domain = Domain(attr, metas=new_metas)
meta_values = [[exp.attributes[var.name] for var in domain.metas] for exp in attrs]
return Table(domain, table.X.transpose(), metas=meta_values)
示例10: _guess_variable
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def _guess_variable(self, field_name, field_metadata, inspect_table):
type_code = field_metadata[0]
FLOATISH_TYPES = (700, 701, 1700) # real, float8, numeric
INT_TYPES = (20, 21, 23) # bigint, int, smallint
CHAR_TYPES = (25, 1042, 1043,) # text, char, varchar
BOOLEAN_TYPES = (16,) # bool
DATE_TYPES = (1082, 1114, 1184, ) # date, timestamp, timestamptz
# time, timestamp, timestamptz, timetz
TIME_TYPES = (1083, 1114, 1184, 1266,)
if type_code in FLOATISH_TYPES:
return ContinuousVariable.make(field_name)
if type_code in TIME_TYPES + DATE_TYPES:
tv = TimeVariable.make(field_name)
tv.have_date |= type_code in DATE_TYPES
tv.have_time |= type_code in TIME_TYPES
return tv
if type_code in INT_TYPES: # bigint, int, smallint
if inspect_table:
values = self.get_distinct_values(field_name, inspect_table)
if values:
return DiscreteVariable.make(field_name, values)
return ContinuousVariable.make(field_name)
if type_code in BOOLEAN_TYPES:
return DiscreteVariable.make(field_name, ['false', 'true'])
if type_code in CHAR_TYPES:
if inspect_table:
values = self.get_distinct_values(field_name, inspect_table)
# remove trailing spaces
values = [v.rstrip() for v in values]
if values:
return DiscreteVariable.make(field_name, values)
return StringVariable.make(field_name)
示例11: etc_to_table
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def etc_to_table(self, etc_json, time_var=False, callback=lambda: None):
""" Converts data from Json to :obj:`Orange.data.table`
Args:
etc_json (dict): Data in json like format
time_var (bool): Create column of time points. Default is set to False.
Returns:
:obj:`Orange.data.Table`
"""
cbc = CallBack(2, callback, callbacks=30)
variables = []
time_point = 1
for time in etc_json['etc']['timePoints']:
var = ContinuousVariable('TP ' + str(time_point))
var.attributes['Time'] = str(time)
variables.append(var)
time_point += 1
meta_attr = StringVariable.make('Gene')
domain = Domain(variables, metas=[meta_attr])
cbc()
table = []
for row in etc_json['etc']['genes']:
gene_expression = [exp for exp in etc_json['etc']['genes'][row]]
gene_expression.append(row)
table.append(gene_expression)
orange_table = Table(domain, table)
if time_var:
orange_table = transpose_table(orange_table)
cbc()
cbc.end()
return orange_table
示例12: _corpus_from_records
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def _corpus_from_records(records, includes_metadata):
"""Receives PubMed records and transforms them into a corpus.
Args:
records (list): A list of PubMed entries.
includes_metadata (list): A list of text fields to include.
Returns:
corpus: The output Corpus.
"""
meta_vars = []
time_var = None
for field_name, _ in includes_metadata:
if field_name == PUBMED_FIELD_DATE:
time_var = TimeVariable(field_name)
meta_vars.append(time_var)
else:
meta_vars.append(StringVariable.make(field_name))
if field_name == PUBMED_FIELD_TITLE:
meta_vars[-1].attributes["title"] = True
meta_values, class_values = _records_to_corpus_entries(
records,
includes_metadata=includes_metadata,
time_var=time_var,
)
class_vars = [
DiscreteVariable('section',
values=list(map(str, set(filter(None, class_values)))))
]
domain = Domain([], class_vars=class_vars, metas=meta_vars)
Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None]
return Corpus(domain=domain, Y=Y, metas=meta_values)
示例13: read
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def read(self):
try:
import opusFC
except ImportError:
raise RuntimeError(self._OPUS_WARNING)
if self.sheet:
db = self.sheet
else:
db = self.sheets[0]
db = tuple(db.split(" "))
dim = db[1]
try:
data = opusFC.getOpusData(self.filename, db)
except Exception:
raise IOError("Couldn't load spectrum from " + self.filename)
attrs, clses, metas = [], [], []
attrs = [ContinuousVariable.make(repr(data.x[i]))
for i in range(data.x.shape[0])]
y_data = None
meta_data = None
if type(data) == opusFC.MultiRegionDataReturn:
y_data = []
meta_data = []
metas.extend([ContinuousVariable.make('map_x'),
ContinuousVariable.make('map_y'),
StringVariable.make('map_region'),
TimeVariable.make('start_time')])
for region in data.regions:
y_data.append(region.spectra)
mapX = region.mapX
mapY = region.mapY
map_region = np.full_like(mapX, region.title, dtype=object)
start_time = region.start_time
meta_region = np.column_stack((mapX, mapY,
map_region, start_time))
meta_data.append(meta_region.astype(object))
y_data = np.vstack(y_data)
meta_data = np.vstack(meta_data)
elif type(data) == opusFC.MultiRegionTRCDataReturn:
y_data = []
meta_data = []
metas.extend([ContinuousVariable.make('map_x'),
ContinuousVariable.make('map_y'),
StringVariable.make('map_region')])
attrs = [ContinuousVariable.make(repr(data.labels[i]))
for i in range(len(data.labels))]
for region in data.regions:
y_data.append(region.spectra)
mapX = region.mapX
mapY = region.mapY
map_region = np.full_like(mapX, region.title, dtype=object)
meta_region = np.column_stack((mapX, mapY, map_region))
meta_data.append(meta_region.astype(object))
y_data = np.vstack(y_data)
meta_data = np.vstack(meta_data)
elif type(data) == opusFC.ImageDataReturn:
metas.extend([ContinuousVariable.make('map_x'),
ContinuousVariable.make('map_y')])
data_3D = data.spectra
for i in np.ndindex(data_3D.shape[:1]):
map_y = np.full_like(data.mapX, data.mapY[i])
coord = np.column_stack((data.mapX, map_y))
if y_data is None:
y_data = data_3D[i]
meta_data = coord.astype(object)
else:
y_data = np.vstack((y_data, data_3D[i]))
meta_data = np.vstack((meta_data, coord))
elif type(data) == opusFC.ImageTRCDataReturn:
metas.extend([ContinuousVariable.make('map_x'),
ContinuousVariable.make('map_y')])
attrs = [ContinuousVariable.make(repr(data.labels[i]))
for i in range(len(data.labels))]
data_3D = data.traces
for i in np.ndindex(data_3D.shape[:1]):
map_y = np.full_like(data.mapX, data.mapY[i])
coord = np.column_stack((data.mapX, map_y))
if y_data is None:
y_data = data_3D[i]
meta_data = coord.astype(object)
else:
y_data = np.vstack((y_data, data_3D[i]))
meta_data = np.vstack((meta_data, coord))
elif type(data) == opusFC.TimeResolvedTRCDataReturn:
y_data = data.traces
#.........这里部分代码省略.........
示例14: read
# 需要导入模块: from Orange.data import StringVariable [as 别名]
# 或者: from Orange.data.StringVariable import make [as 别名]
def read(self):
who = matlab.whosmat(self.filename)
if not who:
raise IOError("Couldn't load matlab file " + self.filename)
else:
ml = matlab.loadmat(self.filename, chars_as_strings=True)
ml = {a: b for a, b in ml.items() if isinstance(b, np.ndarray)}
# X is the biggest numeric array
numarrays = []
for name, con in ml.items():
if issubclass(con.dtype.type, numbers.Number):
numarrays.append((name, reduce(lambda x, y: x*y, con.shape, 1)))
X = None
if numarrays:
nameX = max(numarrays, key=lambda x: x[1])[0]
X = ml.pop(nameX)
# find an array with compatible shapes
attributes = []
if X is not None:
nameattributes = None
for name, con in ml.items():
if con.shape in [(X.shape[1],), (1, X.shape[1])]:
nameattributes = name
break
attributenames = ml.pop(nameattributes).ravel() if nameattributes else range(X.shape[1])
attributenames = [str(a).strip() for a in attributenames] # strip because of numpy char array
attributes = [ContinuousVariable.make(a) for a in attributenames]
metas = []
metaattributes = []
sizemetas = None
if X is None:
counts = defaultdict(list)
for name, con in ml.items():
counts[len(con)].append(name)
if counts:
sizemetas = max(counts.keys(), key=lambda x: len(counts[x]))
else:
sizemetas = len(X)
if sizemetas:
for name, con in ml.items():
if len(con) == sizemetas:
metas.append(name)
metadata = []
for m in sorted(metas):
f = ml[m]
metaattributes.append(StringVariable.make(m))
f.resize(sizemetas, 1)
metadata.append(f)
metadata = np.hstack(tuple(metadata))
domain = Domain(attributes, metas=metaattributes)
if X is None:
X = np.zeros((sizemetas, 0))
return Orange.data.Table.from_numpy(domain, X, Y=None, metas=metadata)