Quick start¶
Installation¶
Install from PyPI:
pip install pyuppsala
Or with uv:
uv add pyuppsala
Requirements: Python 3.10 or later. No C compiler needed – the package ships pre-built wheels compiled from Rust.
Parse an XML document¶
from pyuppsala import Document
doc = Document("<root><child>hello</child></root>")
el = doc.document_element
print(el.tag.local_name) # "root"
print(el.text_content) # "hello"
You can also parse from bytes (UTF-8 and UTF-16 are auto-detected):
from pyuppsala import parse_bytes
doc = parse_bytes(b"<root>ok</root>")
Quick text access with element_text¶
For simple elements like <name>value</name>, use element_text
instead of text_content – it returns the text of the first
Text/CDATA child without recursing:
from pyuppsala import Document
doc = Document("<person><name>Alice</name><age>30</age></person>")
root = doc.document_element
for child in root:
print(f"{child.tag.local_name} = {child.element_text}")
# name = Alice
# age = 30
Source tracking¶
Every parsed node remembers its position in the original input. You can retrieve the original source text and byte ranges:
from pyuppsala import Document
xml = '<root><item id="1">hello</item><item id="2">world</item></root>'
doc = Document(xml)
# The full original input
print(doc.input_text == xml) # True
# Source text of a specific node
item = doc.document_element.children[0]
print(item.source) # '<item id="1">hello</item>'
# Byte range for slicing
start, end = item.source_range
print(xml[start:end]) # '<item id="1">hello</item>'
Query with XPath¶
from pyuppsala import Document, XPathEvaluator
doc = Document("""\
<bookstore>
<book category="fiction">
<title>The Great Gatsby</title>
</book>
<book category="non-fiction">
<title>A Brief History of Time</title>
</book>
</bookstore>
""")
doc.prepare_xpath()
xpath = XPathEvaluator()
# Select nodes
books = xpath.select(doc, "//book")
print(len(books)) # 2
# Evaluate to a string
title = xpath.evaluate(doc, "string(//book[@category='fiction']/title)")
print(title) # "The Great Gatsby"
# Evaluate to a number
count = xpath.evaluate(doc, "count(//book)")
print(count) # 2.0
# Evaluate to a boolean
has_fiction = xpath.evaluate(doc, "boolean(//book[@category='fiction'])")
print(has_fiction) # True
Namespace-aware XPath requires registering prefixes:
doc = Document('<root xmlns:ns="urn:test"><ns:item/></root>')
doc.prepare_xpath()
xpath = XPathEvaluator()
xpath.add_namespace("ns", "urn:test")
nodes = xpath.select(doc, "/root/ns:item")
Find child elements by namespace¶
For direct child lookups by namespace URI and local name, use
first_child_element_by_name_ns() and
child_elements_by_name_ns():
from pyuppsala import Document
xml = """\
<root xmlns:a="urn:example">
<a:item>first</a:item>
<a:other>skip</a:other>
<a:item>second</a:item>
</root>
"""
doc = Document(xml)
root = doc.document_element
# Get the first matching child
first = root.first_child_element_by_name_ns("urn:example", "item")
print(first.element_text) # "first"
# Get all matching children
items = root.child_elements_by_name_ns("urn:example", "item")
print(len(items)) # 2
Check element names with matches_name_ns¶
from pyuppsala import Document
xml = '<saml:Assertion xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion">ok</saml:Assertion>'
doc = Document(xml)
root = doc.document_element
if root.matches_name_ns("urn:oasis:names:tc:SAML:2.0:assertion", "Assertion"):
print("This is a SAML Assertion")
Validate with XSD¶
from pyuppsala import XsdValidator
schema = """\
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="greeting" type="xs:string"/>
</xs:schema>
"""
validator = XsdValidator(schema)
# Quick boolean check
print(validator.is_valid_str("<greeting>Hello</greeting>")) # True
# Detailed error list
errors = validator.validate_str("<greeting><bad/></greeting>")
for err in errors:
print(err) # prints line:column: message
Build XML without a DOM¶
from pyuppsala import XmlWriter
w = XmlWriter()
w.write_declaration()
w.start_element("catalog", [("xmlns", "urn:example")])
w.start_element("item", [("id", "1")])
w.text("Widget")
w.end_element("item")
w.end_element("catalog")
print(w.to_string())
Mutate the DOM¶
from pyuppsala import Document
doc = Document("<root><a/></root>")
root = doc.document_element
# Create and attach new nodes
b = doc.create_element("b")
doc.append_child(root, b)
text = doc.create_text("hello")
doc.append_child(b, text)
# Detach and reattach
doc.detach(b)
doc.insert_before(root, b, root.children[0])
print(doc.to_xml())
QName matching¶
from pyuppsala import QName
q = QName("Envelope", namespace_uri="http://schemas.xmlsoap.org/soap/envelope/", prefix="soap")
# Match by local name and namespace
print(q.matches("Envelope", namespace_uri="http://schemas.xmlsoap.org/soap/envelope/")) # True
print(q.matches("Envelope")) # False -- namespace doesn't match None
print(q.matches("Body", namespace_uri="http://schemas.xmlsoap.org/soap/envelope/")) # False