10000 Add missing xmlparser attributes: namespace_prefixes, ordered_attributes, specified_attributes, intern by Copilot · Pull Request #6494 · RustPython/RustPython · GitHub
[go: up one dir, main page]

Skip to content
Merged

Add missing xmlparser attributes: namespace_prefixes, ordered_attributes, specified_attributes, intern #6494

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
< 10000 div data-view-component="true" class="Popover position-fixed mt-2 pt-1 right-0 left-0">
Diff view
6 changes: 0 additions & 6 deletions Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,24 @@ class SetAttributeTest(unittest.TestCase):
def setUp(self):
self.parser = expat.ParserCreate(namespace_separator='!')

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_buffer_text(self):
self.assertIs(self.parser.buffer_text, False)
for x in 0, 1, 2, 0:
self.parser.buffer_text = x
self.assertIs(self.parser.buffer_text, bool(x))

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_namespace_prefixes(self):
self.assertIs(self.parser.namespace_prefixes, False)
for x in 0, 1, 2, 0:
self.parser.namespace_prefixes = x
self.assertIs(self.parser.namespace_prefixes, bool(x))

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_ordered_attributes(self):
self.assertIs(self.parser.ordered_attributes, False)
for x in 0, 1, 2, 0:
self.parser.ordered_attributes = x
self.assertIs(self.parser.ordered_attributes, bool(x))

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_specified_attributes(self):
self.assertIs(self.parser.specified_attributes, False)
for x in 0, 1, 2, 0:
Expand Down Expand Up @@ -244,7 +240,6 @@ def test_parse_bytes(self):
# Issue #6697.
self.assertRaises(AttributeError, getattr, parser, '\uD800')

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_parse_str(self):
out = self.Outputter()
parser = expat.ParserCreate(namespace_separator='!')
Expand All @@ -255,7 +250,6 @@ def test_parse_str(self):
operations = out.out
self._verify_parse_output(operations)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_parse_file(self):
# Try parsing a file
out = self.Outputter()
Expand Down
183 changes: 182 additions & 1 deletion crates/stdlib/src/pyexpat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@ macro_rules! create_property {
};
}

macro_rules! create_bool_property {
($ctx: expr, $attributes: expr, $name: expr, $class: expr, $element: ident) => {
let attr = $ctx.new_static_getset(
$name,
$class,
move |this: &PyExpatLikeXmlParser| this.$element.read().clone(),
move |this: &PyExpatLikeXmlParser,
value: PyObjectRef,
vm: &VirtualMachine|
-> PyResult<()> {
let bool_value = value.is_true(vm)?;
*this.$element.write() = vm.ctx.new_bool(bool_value).into();
Ok(())
},
);

$attributes.insert($ctx.intern_str($name), attr.into());
};
}

#[pymodule(name = "pyexpat")]
mod _pyexpat {
use crate::vm::{
Expand All @@ -51,6 +71,29 @@ mod _pyexpat {
character_data: MutableObject,
entity_decl: MutableObject,
buffer_text: MutableObject,
namespace_prefixes: MutableObject,
ordered_attributes: MutableObject,
specified_attributes: MutableObject,
intern: MutableObject,
// Additional handlers (stubs for compatibility)
processing_instruction: MutableObject,
unparsed_entity_decl: MutableObject,
notation_decl: MutableObject,
start_namespace_decl: MutableObject,
end_namespace_decl: MutableObject,
comment: MutableObject,
start_cdata_section: MutableObject,
end_cdata_section: MutableObject,
default: MutableObject,
default_expand: MutableObject,
not_standalone: MutableObject,
external_entity_ref: MutableObject,
start_doctype_decl: MutableObject,
end_doctype_decl: MutableObject,
xml_decl: MutableObject,
element_decl: MutableObject,
attlist_decl: MutableObject,
skipped_entity: MutableObject,
}
type PyExpatLikeXmlParserRef = PyRef<PyExpatLikeXmlParser>;

Expand All @@ -71,6 +114,31 @@ mod _pyexpat {
character_data: MutableObject::new(vm.ctx.none()),
entity_decl: MutableObject::new(vm.ctx.none()),
buffer_text: MutableObject::new(vm.ctx.new_bool(false).into()),
namespace_prefixes: MutableObject::new(vm.ctx.new_bool(false).into()),
ordered_attributes: MutableObject::new(vm.ctx.new_bool(false).into()),
specified_attributes: MutableObject::new(vm.ctx.new_bool(false).into()),
// String interning dictionary - used by the parser to intern element/attribute names
// for memory efficiency and faster comparisons. See CPython's pyexpat documentation.
intern: MutableObject::new(vm.ctx.new_dict().into()),
// Additional handlers (stubs for compatibility)
processing_instruction: MutableObject::new(vm.ctx.none()),
unparsed_entity_decl: MutableObject::new(vm.ctx.none()),
notation_decl: MutableObject::new(vm.ctx.none()),
start_namespace_decl: MutableObject::new(vm.ctx.none()),
end_namespace_decl: MutableObject::new(vm.ctx.none()),
comment: MutableObject::new(vm.ctx.none()),
start_cdata_section: MutableObject::new(vm.ctx.none()),
end_cdata_section: MutableObject::new(vm.ctx.none()),
default: MutableObject::new(vm.ctx.none()),
default_expand: MutableObject::new(vm.ctx.none()),
not_standalone: MutableObject::new(vm.ctx.none()),
external_entity_ref: MutableObject::new(vm.ctx.none()),
start_doctype_decl: MutableObject::new(vm.ctx.none()),
end_doctype_decl: MutableObject::new(vm.ctx.none()),
xml_decl: MutableObject::new(vm.ctx.none()),
element_decl: MutableObject::new(vm.ctx.none()),
attlist_decl: MutableObject::new(vm.ctx.none()),
skipped_entity: MutableObject::new(vm.ctx.none()),
}
.into_ref(&vm.ctx))
}
Expand All @@ -89,7 +157,120 @@ mod _pyexpat {
character_data
);
create_property!(ctx, attributes, "EntityDeclHandler", class, entity_decl);
create_property!(ctx, attributes, "buffer_text", class, buffer_text);
create_bool_property!(ctx, attributes, "buffer_text", class, buffer_text);
create_bool_property!(
ctx,
attributes,
"namespace_prefixes",
class,
namespace_prefixes
);
create_bool_property!(
ctx,
attributes,
"ordered_attributes",
class,
ordered_attributes
);
create_bool_property!(
ctx,
attributes,
"specified_attributes",
class,
specified_attributes
);
create_property!(ctx, attributes, "intern", class, intern);
Comment on lines +160 to +182
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🌐 Web query:

CPython xml.parsers.expat xmlparser attribute names buffer_text namespace_prefixes ordered_attributes specified_attributes

💡 Result:

Summary of xml.parsers.expat xmlparser attributes

  • buffer_text: when True, the parser buffers character data to avoid many small CharacterDataHandler calls (Expat normally splits text at line endings). Default False; can be changed at any time. [1]

  • buffer_size / buffer_used: buffer_size sets the buffer capacity used when buffer_text is True (changing it flushes the buffer). buffer_used reports how many bytes (UTF‑8) are currently stored. [1]

  • ordered_attributes: when nonzero, attributes passed to StartElementHandler are a list (name, value, name, value, ...) in the document order instead of a dict mapping names→values. Default False. [1]

  • specified_attributes: when nonzero, the parser reports only attributes that were explicitly specified in the instance document (not those defaulted from DTD attribute declarations). Use with care if you rely on attribute-declaration defaults. Default False. [1]

  • namespace handling / namespace_prefixes: Expat can do namespace processing when created via ParserCreate with a namespace separator (the Python wrapper exposes this via namespace_separator). With namespace processing enabled, Expat consumes xmlns/xmlns:... attributes (so they won’t appear as normal attributes) and expands element and attribute names to "URIlocalname". Namespace declaration events can be observed with StartNamespaceDeclHandler/EndNamespaceDeclHandler. (The behavior of attribute names and namespace-declaration attributes follows Expat’s namespace processing rules.) [2][1]

Sources

  • Python xml.parsers.expat (pyexpat) documentation. [1]
  • Expat reference: Namespace processing and attribute handling. [2]

🏁 Script executed:

# First, let me check the actual code to see the full context of what's being registered
head -200 crates/stdlib/src/pyexpat.rs | tail -50

Repository: RustPython/RustPython

Length of output: 1783


🌐 Web query:

CPython pyexpat intern attribute dictionary

💡 Result:

Short answer: pyexpat interns attribute names/strings when it builds the Python attributes dictionary so the attribute keys/other XML name strings become interned Python str objects (reducing allocations and speeding dict lookups). The C implementation calls its string-intern helper when converting Expat's XML_Char strings to Python strings before putting them into the attributes dict; the public API then hands you a normal dict (or ordered list if ordered_attributes is enabled). See the StartElementHandler docs and the pyexpat.c source where string_intern is used.

Sources: Python pyexpat documentation (StartElementHandler / attributes). [1] PyExpat C implementation showing string_intern usage when constructing handler arguments. [2]

References
[1] xml.parsers.expat — StartElementHandler / attributes (Python stdlib docs).
[2] Modules/pyexpat.c (source: string_intern usage when converting element/attribute names).


🌐 Web query:

CPython xml.parsers.expat ParserCreate namespace_prefixes attribute xmlparser

💡 Result:

Short answer: There is no documented xmlparser.namespace_prefixes attribute in CPython's xml.parsers.expat. To enable namespace processing you pass namespace_separator to ParserCreate(); namespace declarations are reported via StartNamespaceDeclHandler/EndNamespaceDeclHandler and element/attribute names are expanded using the separator. Expat itself can be instructed to return the (uri, localname, prefix) triplet (XML_SetReturnNSTriplet), but that API is part of libexpat and is not exposed as a simple xmlparser.namespace_prefixes attribute in the Python docs. [1][2]

References

  • xml.parsers.expat.ParserCreate / xmlparser handlers (namespace_separator, StartNamespaceDeclHandler). [1]
  • Expat namespace processing and XML_SetReturnNSTriplet (triplet return). [2]

Sources
[1] Python docs — xml.parsers.expat (ParserCreate, StartNamespaceDeclHandler).
[2] Expat reference — Namespace Processing / XML_SetReturnNSTriplet.


🌐 Web query:

CPython xml.parsers.expat xmlparser complete list of attributes properties

💡 Result:

Below is a concise, complete listing (as given by the CPython docs) of the xml.parsers.expat xmlparser type: its public methods, instance attributes (properties) and the handler attributes you can set. Source: Python standard-library documentation for xml.parsers.expat. [1]

Methods

  • Parse(data[, isfinal])
  • ParseFile(file)
  • SetBase(base)
  • GetBase()
  • GetInputContext()
  • ExternalEntityParserCreate(context[, encoding])
  • SetParamEntityParsing(flag)
  • UseForeignDTD([flag])
  • SetReparseDeferralEnabled(enabled)
  • GetReparseDeferralEnabled()
  • SetBillionLaughsAttackProtectionActivationThreshold(threshold)
  • SetBillionLaughsAttackProtectionMaximumAmplification(max_factor)
  • SetAllocTrackerActivationThreshold(threshold)
  • SetAllocTrackerMaximumAmplification(max_factor)

Instance attributes / properties

  • buffer_size
  • buffer_text
  • buffer_used
  • ordered_attributes
  • specified_attributes

Error-related attributes (valid after ExpatError)

  • ErrorByteIndex
  • ErrorCode
  • ErrorColumnNumber
  • ErrorLineNumber

Current parse-position attributes

  • CurrentByteIndex
  • CurrentColumnNumber
  • CurrentLineNumber

Handler attributes (assign callable functions: o.handlername = func)

  • XmlDeclHandler(version, encoding, standalone)
  • StartDoctypeDeclHandler(doctypeName, systemId, publicId, has_internal_subset)
  • EndDoctypeDeclHandler()
  • ElementDeclHandler(name, model)
  • AttlistDeclHandler(elname, attname, type, default, required)
  • StartElementHandler(name, attributes)
  • EndElementHandler(name)
  • ProcessingInstructionHandler(target, data)
  • CharacterDataHandler(data)
  • UnparsedEntityDeclHandler(entityName, base, systemId, publicId, notationName)
  • EntityDeclHandler(entityName, is_parameter_entity, value, base, systemId, publicId, notationName)
  • NotationDeclHandler(notationName, base, systemId, publicId)
  • StartNamespaceDeclHandler(prefix, uri)
  • EndNamespaceDeclHandler(prefix)
  • StartCdataSectionHandler()
  • EndCdataSectionHandler()
  • CommentHandler(data)
  • StartResumeHandler() / (see docs for Expat resume-related handlers)
  • ExternalEntityRefHandler(context, base, systemId, publicId)
  • SkippedEntityHandler(entityName)
  • DefaultHandler(data)
  • UnknownEncodingHandler(encoding)
  • StartDoctypeDeclHandler / EndDoctypeDeclHandler (listed above)
  • (Other Expat-specific handlers exposed in the docs — assignable by the same o.handlername = callable pattern)

Notes

  • Many handler names and a few newer methods/attributes (e.g., reparse-deferral and protection-related setters) were added in recent CPython/Expat versions; availability depends on your Python/Expat version. See the docs for the exact signatures and version notes.

Reference
[1] Python documentation — xml.parsers.expat (XMLParser / xmlparser) (CPython stdlib).


🏁 Script executed:

# Check the actual implementation in the file to see if there are comments explaining these attributes
grep -A 5 -B 5 "namespace_prefixes\|intern" crates/stdlib/src/pyexpat.rs | head -60

Repository: RustPython/RustPython

Length of output: 2765


Remove non-standard attributes namespace_prefixes and intern or add comments explaining why they diverge from CPython's xml.parsers.expat API.

According to the official CPython documentation, the xmlparser instance attributes are: buffer_size, buffer_text, buffer_used, ordered_attributes, and specified_attributes. The RustPython implementation registers buffer_text, ordered_attributes, and specified_attributes correctly, but also includes:

  • namespace_prefixes — not documented in CPython's public API
  • intern — not a public xmlparser attribute in CPython (though internal string interning occurs in the C implementation)

Either remove these or add inline documentation explaining them as RustPython extensions to match CPython's behavior and prevent compatibility issues.

🤖 Prompt for AI Agents
In crates/stdlib/src/pyexpat.rs around lines 160–182, the parser registers
non-standard attributes "namespace_prefixes" and "intern" that are not part of
CPython's xml.parsers.expat public API; either remove the create_bool_property!
for "namespace_prefixes" and the create_property! for "intern" to restore
CPython compatibility, or keep them but add concise inline comments immediately
above each registration stating they are RustPython extensions, citing that they
diverge from CPython's documented attributes and why they are present (e.g.,
implementation convenience or internal interning exposure), so callers and
future maintainers understand the intentional API difference.

// Additional handlers (stubs for compatibility)
create_property!(
ctx,
attributes,
"ProcessingInstructionHandler",
class,
processing_instruction
);
create_property!(
ctx,
attributes,
"UnparsedEntityDeclHandler",
class,
unparsed_entity_decl
);
create_property!(ctx, attributes, "NotationDeclHandler", class, notation_decl);
create_property!(
ctx,
attributes,
"StartNamespaceDeclHandler",
class,
start_namespace_decl
);
create_property!(
ctx,
attributes,
"EndNamespaceDeclHandler",
class,
end_namespace_decl
);
create_property!(ctx, attributes, "CommentHandler", class, comment);
create_property!(
ctx,
attributes,
"StartCdataSectionHandler",
class,
start_cdata_section
);
create_property!(
ctx,
attributes,
"EndCdataSectionHandler",
class,
end_cdata_section
);
create_property!(ctx, attributes, "DefaultHandler", class, default);
create_property!(
ctx,
attributes,
"DefaultHandlerExpand",
class,
default_expand
);
create_property!(
ctx,
attributes,
"NotStandaloneHandler",
class,
not_standalone
);
create_property!(
ctx,
attributes,
"ExternalEntityRefHandler",
class,
external_entity_ref
);
create_property!(
ctx,
attributes,
"StartDoctypeDeclHandler",
class,
start_doctype_decl
);
create_property!(
ctx,
attributes,
"EndDoctypeDeclHandler",
class,
end_doctype_decl
);
create_property!(ctx, attributes, "XmlDeclHandler", class, xml_decl);
create_property!(ctx, attributes, "ElementDeclHandler", class, element_decl);
create_property!(ctx, attributes, "AttlistDeclHandler", class, attlist_decl);
create_property!(
ctx,
attributes,
"SkippedEntityHandler",
class,
skipped_entity
);
}

fn create_config(&self) -> xml::ParserConfig {
Expand Down
Loading
0