Examples

Parse, query, modify

Parse an HTML-like structure, find elements by tag name, and modify them:

from pyuppsala import Document

xml = """\
<html>
  <body>
    <p class="intro">Hello</p>
    <p class="body">World</p>
  </body>
</html>
"""

doc = Document(xml)

# Find all <p> elements
paragraphs = doc.get_elements_by_tag_name("p")
for p in paragraphs:
    print(f"{p.get_attribute('class')}: {p.text_content}")

# Add a new paragraph
body = doc.get_elements_by_tag_name("body")[0]
new_p = doc.create_element("p")
new_p.set_attribute("class", "footer")
text = doc.create_text("Goodbye")
doc.append_child(new_p, text)
doc.append_child(body, new_p)

print(doc.to_xml_with_options(indent="  "))

Namespace-aware processing

from pyuppsala import Document, XPathEvaluator

xml = """\
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
  <soap:Body>
    <m:GetPrice xmlns:m="urn:example">
      <m:Item>Widget</m:Item>
    </m:GetPrice>
  </soap:Body>
</soap:Envelope>
"""

doc = Document(xml)
doc.prepare_xpath()

xpath = XPathEvaluator()
xpath.add_namespace("soap", "http://schemas.xmlsoap.org/soap/envelope/")
xpath.add_namespace("m", "urn:example")

item = xpath.evaluate(doc, "string(//m:Item)")
print(item)  # "Widget"

body = xpath.select(doc, "/soap:Envelope/soap:Body")
print(body[0].tag.prefixed_name)  # "soap:Body"

Schema validation with detailed errors

from pyuppsala import XsdValidator

schema = """\
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
  <xs:element name="person">
    <xs:complexType>
      <xs:sequence>
        <xs:element name="name" type="xs:string"/>
        <xs:element name="age" type="xs:positiveInteger"/>
      </xs:sequence>
      <xs:attribute name="id" type="xs:integer" use="required"/>
    </xs:complexType>
  </xs:element>
</xs:schema>
"""

validator = XsdValidator(schema)

# Valid document
errors = validator.validate_str(
    '<person id="1"><name>Alice</name><age>30</age></person>'
)
assert len(errors) == 0

# Invalid: missing required attribute, wrong element order
errors = validator.validate_str(
    "<person><age>-5</age><name>Bob</name></person>"
)
for err in errors:
    print(f"  Line {err.line}, Col {err.column}: {err.message}")

Building XML with XmlWriter

from pyuppsala import XmlWriter, parse

w = XmlWriter()
w.write_declaration()
w.start_element("feed", [("xmlns", "http://www.w3.org/2005/Atom")])

for i, title in enumerate(["First Post", "Second Post"]):
    w.start_element("entry")
    w.start_element("title")
    w.text(title)
    w.end_element("title")
    w.start_element("id")
    w.text(f"urn:uuid:{i}")
    w.end_element("id")
    w.end_element("entry")

w.end_element("feed")

# Verify the output parses correctly
doc = parse(w.to_string())
entries = doc.get_elements_by_tag_name("entry")
print(f"Generated {len(entries)} entries")  # 2

XSD regex patterns

from pyuppsala import XsdRegex

# US ZIP code
zip_re = XsdRegex(r"[0-9]{5}(-[0-9]{4})?")
print(zip_re.is_match("12345"))       # True
print(zip_re.is_match("12345-6789"))  # True
print(zip_re.is_match("abcde"))       # False

# Unicode letter categories
letters = XsdRegex(r"\p{L}+")
print(letters.is_match("Hello"))   # True
print(letters.is_match("12345"))   # False

# Character class subtraction (vowels removed)
consonants = XsdRegex(r"[a-z-[aeiou]]+")
print(consonants.is_match("bcdfg"))  # True
print(consonants.is_match("abcde"))  # False

Streaming serialization

from pyuppsala import Document

doc = Document("<root><a/><b/><c/></root>")

# Write to a file
doc.write_to_file("/tmp/output.xml")

# Pretty-print to a file
pretty = doc.to_xml_with_options(indent="    ")
with open("/tmp/pretty.xml", "w") as f:
    f.write(pretty)

# Serialize a subtree
a_node = doc.get_elements_by_tag_name("a")[0]
print(a_node.to_xml())  # "<a/>"

Fast text extraction with element_text

Use element_text for simple <tag>value</tag> patterns. It returns the text of the first Text or CDATA child without recursion:

from pyuppsala import Document

doc = Document("""\
<catalog>
  <product>
    <name>Widget</name>
    <price>9.99</price>
    <description>A <b>great</b> product</description>
  </product>
</catalog>
""")

product = doc.get_elements_by_tag_name("product")[0]
for child in product:
    if child.kind == "element":
        # element_text is fast: only looks at the first text child
        print(f"{child.tag.local_name}: {child.element_text}")
# name: Widget
# price: 9.99
# description: A   <-- only the first text child "A ", not "A great product"

# For recursive text content, use text_content instead
desc = doc.get_elements_by_tag_name("description")[0]
print(desc.text_content)  # "A great product"

Source tracking for error reporting

Use source, source_range, and Document.input_text for source-level error reporting:

from pyuppsala import Document

xml = """\
<config>
  <database host="localhost" port="5432"/>
  <cache host="redis.local" port="abc"/>
</config>"""

doc = Document(xml)

for db in doc.get_elements_by_tag_name("cache"):
    port = db.get_attribute("port")
    if not port.isdigit():
        print(f"Invalid port at line {db.line}, column {db.column}")
        print(f"Source: {db.source}")
        # Invalid port at line 3, column 3
        # Source: <cache host="redis.local" port="abc"/>

Byte ranges are useful for highlighting in editors:

xml = '<root><item id="1">hello</item><item id="2">world</item></root>'
doc = Document(xml)

for item in doc.get_elements_by_tag_name("item"):
    start, end = item.source_range
    print(f"Bytes {start}-{end}: {xml[start:end]}")
# Bytes 6-30: <item id="1">hello</item>
# Bytes 30-54: <item id="2">world</item>

Element name matching

Use matches_name_ns() and QName.matches() for namespace-aware dispatch:

from pyuppsala import Document

NS_A = "urn:app:config"
NS_B = "urn:app:runtime"

xml = f"""\
<root xmlns:cfg="{NS_A}" xmlns:rt="{NS_B}">
  <cfg:setting>value1</cfg:setting>
  <rt:setting>value2</rt:setting>
  <cfg:setting>value3</cfg:setting>
</root>
"""

doc = Document(xml)
root = doc.document_element

for child in root:
    if child.kind != "element":
        continue
    if child.matches_name_ns(NS_A, "setting"):
        print(f"Config: {child.element_text}")
    elif child.matches_name_ns(NS_B, "setting"):
        print(f"Runtime: {child.element_text}")
# Config: value1
# Runtime: value2
# Config: value3

QName.matches works on standalone QName objects too:

from pyuppsala import QName

# Build a dispatch table
handlers = {
    ("urn:app", "create"): lambda: "Creating...",
    ("urn:app", "delete"): lambda: "Deleting...",
}

q = QName("create", namespace_uri="urn:app", prefix="app")
for (ns, name), handler in handlers.items():
    if q.matches(name, namespace_uri=ns):
        print(handler())  # "Creating..."

Building a document from scratch

from pyuppsala import Document

doc = Document.empty()
root = doc.create_element("catalog")
doc.append_child(doc.root, root)

products = [
    {"id": "1", "name": "Widget", "price": "9.99"},
    {"id": "2", "name": "Gadget", "price": "19.99"},
    {"id": "3", "name": "Doohickey", "price": "4.99"},
]

for prod in products:
    elem = doc.create_element("product")
    elem.set_attribute("id", prod["id"])

    name = doc.create_element("name")
    doc.append_child(name, doc.create_text(prod["name"]))
    doc.append_child(elem, name)

    price = doc.create_element("price")
    doc.append_child(price, doc.create_text(prod["price"]))
    doc.append_child(elem, price)

    doc.append_child(root, elem)

print(doc.to_xml_with_options(indent="  "))
# <catalog>
#   <product id="1">
#     <name>Widget</name>
#     <price>9.99</price>
#   </product>
#   <product id="2">
#     <name>Gadget</name>
#     <price>19.99</price>
#   </product>
#   <product id="3">
#     <name>Doohickey</name>
#     <price>4.99</price>
#   </product>
# </catalog>

XPath context nodes

Use the context parameter to evaluate XPath relative to a specific node:

from pyuppsala import Document, XPathEvaluator

doc = Document("""\
<company>
  <dept name="Engineering">
    <employee>Alice</employee>
    <employee>Bob</employee>
  </dept>
  <dept name="Sales">
    <employee>Charlie</employee>
  </dept>
</company>
""")
doc.prepare_xpath()

xpath = XPathEvaluator()

# Count all employees
total = xpath.evaluate(doc, "count(//employee)")
print(f"Total employees: {total}")  # 3.0

# Count per department using context
for dept in xpath.select(doc, "//dept"):
    name = dept.get_attribute("name")
    count = xpath.evaluate(doc, "count(employee)", context=dept)
    print(f"  {name}: {count}")
# Engineering: 2.0
# Sales: 1.0

Combining XmlWriter with validation

from pyuppsala import XmlWriter, XsdValidator, parse

# Define a schema
schema = """\
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
  <xs:element name="order">
    <xs:complexType>
      <xs:sequence>
        <xs:element name="item" maxOccurs="unbounded">
          <xs:complexType>
            <xs:simpleContent>
              <xs:extension base="xs:string">
                <xs:attribute name="qty" type="xs:positiveInteger" use="required"/>
              </xs:extension>
            </xs:simpleContent>
          </xs:complexType>
        </xs:element>
      </xs:sequence>
      <xs:attribute name="id" type="xs:positiveInteger" use="required"/>
    </xs:complexType>
  </xs:element>
</xs:schema>
"""
validator = XsdValidator(schema)

# Build XML with XmlWriter
w = XmlWriter()
w.start_element("order", [("id", "42")])
w.start_element("item", [("qty", "3")])
w.text("Widget")
w.end_element("item")
w.start_element("item", [("qty", "1")])
w.text("Gadget")
w.end_element("item")
w.end_element("order")

# Validate the generated XML
xml = w.to_string()
print(f"Valid: {validator.is_valid_str(xml)}")  # True

# Parse and inspect
doc = parse(xml)
for item in doc.get_elements_by_tag_name("item"):
    print(f"  {item.element_text} x{item.get_attribute('qty')}")
# Widget x3
# Gadget x1

Tree manipulation: reorder children

from pyuppsala import Document

doc = Document("<list><c>3</c><a>1</a><b>2</b></list>")
root = doc.document_element

# Collect children and sort by tag name
children = list(root.children)
children.sort(key=lambda n: n.tag.local_name)

# Detach all, then re-attach in order
for child in children:
    doc.detach(child)
for child in children:
    doc.append_child(root, child)

print(doc.to_xml())
# "<list><a>1</a><b>2</b><c>3</c></list>"

Node iteration patterns

from pyuppsala import Document

doc = Document("<root><a/><b/><c/><d/><e/></root>")
root = doc.document_element

# len() and indexing
print(len(root))  # 5
print(root[0].tag.local_name)   # "a"
print(root[-1].tag.local_name)  # "e"

# Iteration
names = [child.tag.local_name for child in root]
print(names)  # ['a', 'b', 'c', 'd', 'e']

# Truthiness -- nodes are always truthy
if root:
    print("Root exists")

# String conversion
print(str(root))   # "<root><a/><b/><c/><d/><e/></root>"
print(repr(root))  # "Node(<root>)"