本文整理汇总了Python中lxml.etree.fromstring方法的典型用法代码示例。如果您正苦于以下问题:Python etree.fromstring方法的具体用法?Python etree.fromstring怎么用?Python etree.fromstring使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lxml.etree
的用法示例。
在下文中一共展示了etree.fromstring方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parseHTMLxpathSearch
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def parseHTMLxpathSearch(http_source, xpathString):
#---------------------------------------------------------------------------------
return_values = []
http_source= str(http_source).replace('\x00','')
try:
html = lxml.html.fromstring(http_source)
for data in html.xpath(xpathString):
return_values.append(etree.tostring(data.content))
data.clear()
except:
pass
return return_values
#---------------------------------------------------------------------------------
# parse HTML and return value asked
开发者ID:kenb123,项目名称:Basic-Expression-Lexicon-Variation-Algorithms-BELVA,代码行数:25,代码来源:belvaParseXML.py
示例2: _parse_xml
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def _parse_xml(self, xml):
# Parse MediaRenderer description XML
xml_root = etree.fromstring(xml)
namespaces = xml_root.nsmap
namespaces.pop(None, None)
# Determine AVRC URL
url_base = xml_root.find(self.MR_YAMAHA_URLBASE_PATH, namespaces)
control_url = xml_root.find(self.MR_YAMAHA_CONTROLURL_PATH, namespaces)
if ((url_base is None) or (control_url is None)):
return False
ip, port = urlparse.urlparse(url_base.text).netloc.split(':')
if ((not ip) or (not port)):
return False
self.ip = ip
self.port = port
self.control_url = control_url.text
return True
示例3: mirrorRepomd
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def mirrorRepomd(cachedir, url):
# Use repomd.xml to get the location of primary.xml.gz
repoindex = ETL.fromstring(requests.get('{}/repodata/repomd.xml'.format(url)).content)
primarypath = repoindex.xpath("string(./repo:data[@type='primary']/repo:location/@href)",
namespaces={'repo': 'http://linux.duke.edu/metadata/repo'})
if not primarypath.endswith(".xml.gz"):
raise Exception('unsupported primary format')
primarydest = os.path.join(cachedir, os.path.basename(primarypath))
if not os.path.exists(primarydest):
# Delete the old files first
for oldfile in glob.glob(glob.escape(cachedir) + "/*.xml.gz"):
os.unlink(oldfile)
with tempfile.NamedTemporaryFile(dir=cachedir) as primarytemp:
primarytemp.write(requests.get(url + '/' + primarypath).content)
os.link(primarytemp.name, primarydest)
return primarydest
示例4: create_package_container
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def create_package_container(self, project, package, meta=None, disable_build=False):
"""
Creates a package container without any fields in project/package
:param project: project to create it
:param package: package name
:param meta: package metadata
:param disable_build: should the package be created with build
flag disabled
"""
if not meta:
meta = '<package name="{}"><title/><description/></package>'
meta = meta.format(package)
if disable_build:
root = ET.fromstring(meta)
elm = ET.SubElement(root, 'build')
ET.SubElement(elm, 'disable')
meta = ET.tostring(root)
url = self.makeurl(['source', project, package, '_meta'])
http_PUT(url, data=meta)
示例5: add_bugowner
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def add_bugowner(self, package, owner):
url = self.makeurl(['source', self.project, package, '_meta'])
root = ET.fromstring(self.cached_GET(url))
idname = 'userid' if owner.kind == 'person' else 'groupid'
# XXX: can't use 'and' here to filter for bugowner too
exists = root.findall('./{}[@{}="{}"]'.format(owner.kind, idname, owner.name))
for node in exists:
if node.get('role') == 'bugowner':
logger.debug("%s/%s already has %s %s", self.project, package, owner.kind, owner.name)
return
node = ET.SubElement(root, owner.kind)
node.set(idname, owner.name)
node.set('role', 'bugowner')
data = ET.tostring(root)
logger.debug(data)
self.http_PUT(url, data=data)
示例6: create_group
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def create_group(self, name, users=[]):
meta = """
<group>
<title>{}</title>
</group>
""".format(name)
if len(users):
root = ET.fromstring(meta)
persons = ET.SubElement(root, 'person')
for user in users:
ET.SubElement(persons, 'person', {'userid': user} )
meta = ET.tostring(root)
if not name in self.groups:
self.groups.append(name)
url = osc.core.makeurl(APIURL, ['group', name])
osc.core.http_PUT(url, data=meta)
示例7: parse_xml
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def parse_xml(xml_string):
xml_string = _xml_scheme.sub('', xml_string.decode("utf-8"))
xml = etree.fromstring(xml_string)
def convert_node(node):
children = list(node)
if len(children):
if children[0].tag == 'item':
val = list(convert_node(child)[1] for child in children)
else:
val = dict(convert_node(child) for child in children)
elif node.tag.endswith('Set'):
val = []
else:
# TODO(ft): do not use private function
val = (ec2utils._try_convert(node.text)
if node.text
else node.text)
return node.tag, val
return dict([convert_node(xml)])
示例8: main
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def main() -> None:
manifest = requests.get('http://mtgoclientdepot.onlinegaming.wizards.com/MTGO.application')
tree = etree.fromstring(manifest.content)
identity = tree.find('{urn:schemas-microsoft-com:asm.v1}assemblyIdentity')
version = identity.attrib['version']
print('Current MTGO Version is {0}'.format(version))
data = {'version': version}
with open('mtgo_version.json', mode='w') as f:
json.dump(data, f)
project = repo.get_verification_project()
current = [c for c in project.get_columns() if c.name == version]
if not current:
print(f'Creating column for {version}')
project.create_column(version)
示例9: tryParseXML
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def tryParseXML(self, xml_data):
try:
if not self.penguin.ReceivePacketEnabled:
return True
XMLdata = parseXML(str(xml_data))
t = XMLdata.get('t')
if t not in AVAILABLE_XML_PACKET_TYPES:
return None
body = XMLdata.xpath('//body')
for i in range(len(body)):
b = body[i]
action = b.get("action") # Just to make sure `action` exists!
return [t, body]
except:
return None
示例10: country_population
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def country_population():
from lxml import etree
from urllib.request import urlopen
page = urlopen('https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population').read()
parser = etree.HTMLParser()
tree = etree.fromstring(page, parser=parser)
tables = tree.findall('.//table')
for table in tables:
if 'wikitable' in table.attrib.get('class', ''):
rows = table.findall('.//tr')
for row in rows:
cells = row.findall('td')
if len(cells) > 3:
name = cells[1].find('.//a').attrib.get('title')
population = cells[2].text
yield(dict(
name=name,
population=population
))
示例11: _parse
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def _parse(data, limit):
tree = etree.fromstring(data)
output = []
# RSS
prefix = ""
tag = "*/item"
if tree.find(tag) is None:
prefix = "{http://purl.org/rss/1.0/}"
tag = prefix + "item"
for element in tree.findall(tag):
title = element.find(prefix + "title")
link = element.find(prefix + "link")
if link is None:
link = element.find("guid")
_maybe_append(output, title, link)
if limit and len(output) == limit:
break
if output:
return ["<ul>"] + output + ["</ul>"]
示例12: get_cat_image_url
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def get_cat_image_url(timeout: float) -> str:
api_url = 'http://thecatapi.com/api/images/get'
async with aiohttp.ClientSession() as session:
while True:
try:
async with session.get(
api_url, params={'format': 'xml', 'type': 'jpg,png'}
) as res:
if res.status != 200:
raise APIServerError
xml_result = await res.read()
tree = etree.fromstring(xml_result)
url = tree.find('data/images/image/url').text
except aiohttp.client_exceptions.ServerDisconnectedError:
await asyncio.sleep(0.1)
continue
try:
async with async_timeout.timeout(timeout=timeout):
async with session.get(url) as res:
async with res:
if res.status == 200:
return url
except (aiohttp.ClientConnectorError, asyncio.TimeoutError):
continue
示例13: main
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def main(_):
if FLAGS.set not in SETS:
raise ValueError('set must be in : {}'.format(SETS))
if FLAGS.year not in YEARS:
raise ValueError('year must be in : {}'.format(YEARS))
data_dir = FLAGS.data_dir
years = ['VOC2007', 'VOC2012']
if FLAGS.year != 'merged':
years = [FLAGS.year]
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
for year in years:
logging.info('Reading from PASCAL %s dataset.', year)
examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
'aeroplane_' + FLAGS.set + '.txt')
annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
examples_list = dataset_util.read_examples_list(examples_path)
for idx, example in enumerate(examples_list):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples_list))
path = os.path.join(annotations_dir, example + '.xml')
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
FLAGS.ignore_difficult_instances)
writer.write(tf_example.SerializeToString())
writer.close()
示例14: create_tf_record
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def create_tf_record(output_filename,
label_map_dict,
annotations_dir,
image_dir,
examples):
"""Creates a TFRecord file from examples.
Args:
output_filename: Path to where output file is saved.
label_map_dict: The label map dictionary.
annotations_dir: Directory where annotation files are stored.
image_dir: Directory where image files are stored.
examples: Examples to parse and save to tf record.
"""
writer = tf.python_io.TFRecordWriter(output_filename)
for idx, example in enumerate(examples):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples))
path = os.path.join(annotations_dir, 'xmls', example + '.xml')
if not os.path.exists(path):
logging.warning('Could not find %s, ignoring example.', path)
continue
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
writer.write(tf_example.SerializeToString())
writer.close()
# TODO: Add test for pet/PASCAL main files.
示例15: create_pdf_from_bookmark
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import fromstring [as 别名]
def create_pdf_from_bookmark(bookmark):
logging.info('Processing %s', bookmark.title)
# add some introductory HTML to the page (title, etc.)
stylesheet_html = ('<head><style>body {font-family: Verdana;'
'font-size: 11pt;}</style></head>')
txt = bookmark.get_text()['data']
txt = txt.decode('utf-8')
parser = etree.HTMLParser()
tree = etree.fromstring(txt, parser)
tree.insert(0, etree.XML(stylesheet_html))
new_html = etree.tostring(tree)
# create/manage the directory structure for the article
date = datetime.datetime.fromtimestamp(bookmark.time)
year_dir = str(date.year)
month_dir = str(date.month)
dest_dir = os.path.join(PDF_DEST_FOLDER, year_dir, month_dir)
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
pdf_filename = os.path.join(dest_dir, '%s.pdf' % bookmark.title)
tmp_file = tempfile.NamedTemporaryFile(delete=False)
tmp_file.write(new_html)
tmp_file.close()
html_filename = '%s.html' % tmp_file.name
os.rename(tmp_file.name, html_filename)
# generate the PDF and cleanup
pdf_cmd = ['wkhtmltopdf', html_filename, pdf_filename]
proc = subprocess.Popen(
pdf_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
cmd_output, return_code = proc.communicate()
os.unlink(html_filename)
return pdf_filename