From b97e454ceb2e1719a487bfebaae3da4a706a854b Mon Sep 17 00:00:00 2001
From: NAITOH Jun <naitoh@gmail.com>
Date: Sun, 16 Feb 2025 16:48:06 +0900
Subject: [PATCH 01/12] Bump version

---
 lib/rexml/rexml.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index a653f028..bf3c0d32 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -31,7 +31,7 @@
 module REXML
   COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
   DATE = "2008/019"
-  VERSION = "3.4.1"
+  VERSION = "3.4.2"
   REVISION = ""
 
   Copyright = COPYRIGHT

From 64a709e74551d5968f2241a772876f4b0c8dea22 Mon Sep 17 00:00:00 2001
From: NAITOH Jun <naitoh@gmail.com>
Date: Sun, 2 Mar 2025 11:38:54 +0900
Subject: [PATCH 02/12] Improve CDATA parse performance (#244)

## Why?

GitHub: fix #243

## Benchmark (Comparison with rexml 3.4.1)
```
$ benchmark-driver benchmark/parse_cdata.yaml
Calculating -------------------------------------
                     rexml 3.4.1      master  3.4.1(YJIT)  master(YJIT)
                 dom     648.361      1.178k      591.590        1.046k i/s -     100.000 times in 0.154235s 0.084913s 0.169036s 0.095627s
                 sax     699.061      1.378k      651.148        1.196k i/s -     100.000 times in 0.143049s 0.072549s 0.153575s 0.083611s
                pull     699.271      1.379k      660.275        1.210k i/s -     100.000 times in 0.143006s 0.072527s 0.151452s 0.082622s
              stream     701.725      1.383k      659.483        1.228k i/s -     100.000 times in 0.142506s 0.072307s 0.151634s 0.081455s

Comparison:
                              dom
              master:      1177.7 i/s
        master(YJIT):      1045.7 i/s - 1.13x  slower
         rexml 3.4.1:       648.4 i/s - 1.82x  slower
         3.4.1(YJIT):       591.6 i/s - 1.99x  slower

                              sax
              master:      1378.4 i/s
        master(YJIT):      1196.0 i/s - 1.15x  slower
         rexml 3.4.1:       699.1 i/s - 1.97x  slower
         3.4.1(YJIT):       651.1 i/s - 2.12x  slower

                             pull
              master:      1378.8 i/s
        master(YJIT):      1210.3 i/s - 1.14x  slower
         rexml 3.4.1:       699.3 i/s - 1.97x  slower
         3.4.1(YJIT):       660.3 i/s - 2.09x  slower

                           stream
              master:      1383.0 i/s
        master(YJIT):      1227.7 i/s - 1.13x  slower
         rexml 3.4.1:       701.7 i/s - 1.97x  slower
         3.4.1(YJIT):       659.5 i/s - 2.10x  slower
```
- YJIT=ON : 1.76x - 1.83x faster
- YJIT=OFF : 1.82x - 1.97x faster

Reported by Masamune. Thanks!!!

Co-authored-by: Sutou Kouhei <kou@clear-code.com>
---
 benchmark/parse_cdata.yaml      | 50 +++++++++++++++++++++++++++++++++
 lib/rexml/parsers/baseparser.rb | 10 +++++--
 lib/rexml/source.rb             |  2 +-
 test/parse/test_cdata.rb        | 20 ++++++++++++-
 4 files changed, 77 insertions(+), 5 deletions(-)
 create mode 100644 benchmark/parse_cdata.yaml

diff --git a/benchmark/parse_cdata.yaml b/benchmark/parse_cdata.yaml
new file mode 100644
index 00000000..cde04306
--- /dev/null
+++ b/benchmark/parse_cdata.yaml
@@ -0,0 +1,50 @@
+loop_count: 100
+contexts:
+  - gems:
+      rexml: 3.2.6
+    require: false
+    prelude: require 'rexml'
+  - name: master
+    prelude: |
+      $LOAD_PATH.unshift(File.expand_path("lib"))
+      require 'rexml'
+  - name: 3.2.6(YJIT)
+    gems:
+      rexml: 3.2.6
+    require: false
+    prelude: |
+      require 'rexml'
+      RubyVM::YJIT.enable
+  - name: master(YJIT)
+    prelude: |
+      $LOAD_PATH.unshift(File.expand_path("lib"))
+      require 'rexml'
+      RubyVM::YJIT.enable
+
+prelude: |
+  require 'rexml/document'
+  require 'rexml/parsers/sax2parser'
+  require 'rexml/parsers/pullparser'
+  require 'rexml/parsers/streamparser'
+  require 'rexml/streamlistener'
+
+  def build_xml(size)
+    xml = "<?xml version=\"1.0\"?>\n" +
+           "<root>Test</root>\n" +
+           "<![CDATA[" + "a" * size + "]]>\n"
+  end
+  xml = build_xml(100000)
+
+  class Listener
+    include REXML::StreamListener
+  end
+
+benchmark:
+  'dom'        : REXML::Document.new(xml)
+  'sax'        : REXML::Parsers::SAX2Parser.new(xml).parse
+  'pull'       : |
+    parser = REXML::Parsers::PullParser.new(xml)
+    while parser.has_next?
+      parser.pull
+    end
+  'stream'     : REXML::Parsers::StreamParser.new(xml, Listener.new).parse
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 44aacfa2..e666c2af 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -471,9 +471,13 @@ def pull_event
                 end
 
                 return [ :comment, md[1] ]
-              else
-                md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
-                return [ :cdata, md[1] ] if md
+              elsif @source.match?("[CDATA[", true)
+                text = @source.read_until("]]>")
+                if text.chomp!("]]>")
+                  return [ :cdata, text ]
+                else
+                  raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
+                end
               end
               raise REXML::ParseException.new( "Declarations can only occur "+
                 "in the doctype declaration.", @source)
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index 5ba5ab12..3ec1141e 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -67,7 +67,7 @@ class Source
     module Private
       SCANNER_RESET_SIZE = 100000
       PRE_DEFINED_TERM_PATTERNS = {}
-      pre_defined_terms = ["'", '"', "<"]
+      pre_defined_terms = ["'", '"', "<", "]]>"]
       if StringScanner::Version < "3.1.1"
         pre_defined_terms.each do |term|
           PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
diff --git a/test/parse/test_cdata.rb b/test/parse/test_cdata.rb
index b5f1a3bc..c742d6a1 100644
--- a/test/parse/test_cdata.rb
+++ b/test/parse/test_cdata.rb
@@ -7,10 +7,28 @@ module REXMLTests
   class TestParseCData < Test::Unit::TestCase
     include Test::Unit::CoreAssertions
 
+    def parse(xml)
+      REXML::Document.new(xml)
+    end
+
     def test_linear_performance_gt
       seq = [10000, 50000, 100000, 150000, 200000]
       assert_linear_performance(seq, rehearsal: 10) do |n|
-        REXML::Document.new('<description><![CDATA[ ' + ">" * n + ' ]]></description>')
+        parse('<description><![CDATA[ ' + ">" * n + ' ]]></description>')
+      end
+    end
+
+    class TestInvalid < self
+      def test_unclosed_cdata
+        exception = assert_raise(REXML::ParseException) do
+          parse("<root><![CDATA[a]></root>")
+        end
+        assert_equal(<<~DETAIL, exception.to_s)
+          Malformed CDATA: Missing end ']]>'
+          Line: 1
+          Position: 25
+          Last 80 unconsumed characters:
+        DETAIL
       end
     end
   end

From 434909171ef3756c1ca2b84f5c90923a72c6a591 Mon Sep 17 00:00:00 2001
From: NAITOH Jun <naitoh@gmail.com>
Date: Mon, 3 Mar 2025 13:47:31 +0900
Subject: [PATCH 03/12] Improve comment parse performance (#245)

## Benchmark (Comparison with rexml 3.4.1)
```
$ benchmark-driver benchmark/parse_comment.yaml
Calculating -------------------------------------
                     rexml 3.4.1      master  3.4.1(YJIT)  master(YJIT)
           top_level     999.440      5.058k      922.416        3.340k i/s -     100.000 times in 0.100056s 0.019770s 0.108411s 0.029936s
          in_doctype      1.063k      4.890k      980.498        3.341k i/s -     100.000 times in 0.094116s 0.020449s 0.101989s 0.029927s
       after_doctype     638.321      1.304k      603.952        1.153k i/s -     100.000 times in 0.156661s 0.076710s 0.165576s 0.086748s

Comparison:
                        top_level
              master:      5058.2 i/s
        master(YJIT):      3340.5 i/s - 1.51x  slower
         rexml 3.4.1:       999.4 i/s - 5.06x  slower
         3.4.1(YJIT):       922.4 i/s - 5.48x  slower

                       in_doctype
              master:      4890.2 i/s
        master(YJIT):      3341.5 i/s - 1.46x  slower
         rexml 3.4.1:      1062.5 i/s - 4.60x  slower
         3.4.1(YJIT):       980.5 i/s - 4.99x  slower

                    after_doctype
              master:      1303.6 i/s
        master(YJIT):      1152.8 i/s - 1.13x  slower
         rexml 3.4.1:       638.3 i/s - 2.04x  slower
         3.4.1(YJIT):       604.0 i/s - 2.16x  slower
```

- YJIT=ON : 1.90x - 3.62x faster
- YJIT=OFF : 2.04x - 5.06x faster
---
 benchmark/parse_comment.yaml    | 36 ++++++++++++++++++++++++++++++
 lib/rexml/parsers/baseparser.rb | 39 ++++++++++++++-------------------
 test/parse/test_comment.rb      | 21 +++++++++++++-----
 3 files changed, 69 insertions(+), 27 deletions(-)
 create mode 100644 benchmark/parse_comment.yaml

diff --git a/benchmark/parse_comment.yaml b/benchmark/parse_comment.yaml
new file mode 100644
index 00000000..a0a3a771
--- /dev/null
+++ b/benchmark/parse_comment.yaml
@@ -0,0 +1,36 @@
+loop_count: 100
+contexts:
+  - gems:
+      rexml: 3.2.6
+    require: false
+    prelude: require 'rexml'
+  - name: master
+    prelude: |
+      $LOAD_PATH.unshift(File.expand_path("lib"))
+      require 'rexml'
+  - name: 3.2.6(YJIT)
+    gems:
+      rexml: 3.2.6
+    require: false
+    prelude: |
+      require 'rexml'
+      RubyVM::YJIT.enable
+  - name: master(YJIT)
+    prelude: |
+      $LOAD_PATH.unshift(File.expand_path("lib"))
+      require 'rexml'
+      RubyVM::YJIT.enable
+
+prelude: |
+  require 'rexml/document'
+
+  SIZE = 100000
+
+  top_level_xml     = "<!--" + "a" * SIZE + "-->\n"
+  in_doctype_xml    = "<!DOCTYPE foo [<!--" + "a" * SIZE + "-->]>"
+  after_doctype_xml = "<root/><!--" + "a" * SIZE + "-->"
+
+benchmark:
+  'top_level'      : REXML::Document.new(top_level_xml)
+  'in_doctype'     : REXML::Document.new(in_doctype_xml)
+  'after_doctype'  : REXML::Document.new(after_doctype_xml)
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index e666c2af..61d38ae2 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -277,14 +277,7 @@ def pull_event
             return process_instruction
           elsif @source.match?("<!", true)
             if @source.match?("--", true)
-              md = @source.match(/(.*?)-->/um, true)
-              if md.nil?
-                raise REXML::ParseException.new("Unclosed comment", @source)
-              end
-              if /--|-\z/.match?(md[1])
-                raise REXML::ParseException.new("Malformed comment", @source)
-              end
-              return [ :comment, md[1] ]
+              return [ :comment, process_comment ]
             elsif @source.match?("DOCTYPE", true)
               base_error_message = "Malformed DOCTYPE"
               unless @source.match?(/\s+/um, true)
@@ -417,12 +410,8 @@ def pull_event
                 raise REXML::ParseException.new(message, @source)
               end
               return [:notationdecl, name, *id]
-            elsif md = @source.match(/--(.*?)-->/um, true)
-              case md[1]
-              when /--/, /-\z/
-                raise REXML::ParseException.new("Malformed comment", @source)
-              end
-              return [ :comment, md[1] ] if md
+            elsif @source.match?("--", true)
+              return [ :comment, process_comment ]
             end
           elsif match = @source.match(/(%.*?;)\s*/um, true)
             return [ :externalentity, match[1] ]
@@ -463,14 +452,8 @@ def pull_event
               md = @source.match(/([^>]*>)/um)
               #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
               raise REXML::ParseException.new("Malformed node", @source) unless md
-              if md[0][0] == ?-
-                md = @source.match(/--(.*?)-->/um, true)
-
-                if md.nil? || /--|-\z/.match?(md[1])
-                  raise REXML::ParseException.new("Malformed comment", @source)
-                end
-
-                return [ :comment, md[1] ]
+              if @source.match?("--", true)
+                return [ :comment, process_comment ]
               elsif @source.match?("[CDATA[", true)
                 text = @source.read_until("]]>")
                 if text.chomp!("]]>")
@@ -738,6 +721,18 @@ def parse_id_invalid_details(accept_external_id:,
         end
       end
 
+      def process_comment
+        text = @source.read_until("-->")
+        unless text.chomp!("-->")
+          raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
+        end
+
+        if text.include? "--" or text.end_with?("-")
+          raise REXML::ParseException.new("Malformed comment", @source)
+        end
+        text
+      end
+
       def process_instruction
         name = parse_name("Malformed XML: Invalid processing instruction node")
         if @source.match?(/\s+/um, true)
diff --git a/test/parse/test_comment.rb b/test/parse/test_comment.rb
index 4475dca7..c573e711 100644
--- a/test/parse/test_comment.rb
+++ b/test/parse/test_comment.rb
@@ -17,7 +17,7 @@ def test_toplevel_unclosed_comment
           parse("<!--")
         end
         assert_equal(<<~DETAIL, exception.to_s)
-          Unclosed comment
+          Unclosed comment: Missing end '-->'
           Line: 1
           Position: 4
           Last 80 unconsumed characters:
@@ -48,6 +48,18 @@ def test_toplevel_malformed_comment_end
         DETAIL
       end
 
+      def test_doctype_unclosed_comment
+        exception = assert_raise(REXML::ParseException) do
+          parse("<!DOCTYPE foo [<!--")
+        end
+        assert_equal(<<~DETAIL, exception.to_s)
+          Unclosed comment: Missing end '-->'
+          Line: 1
+          Position: 19
+          Last 80 unconsumed characters:
+        DETAIL
+      end
+
       def test_doctype_malformed_comment_inner
         exception = assert_raise(REXML::ParseException) do
           parse("<!DOCTYPE foo [<!-- -- -->")
@@ -72,16 +84,15 @@ def test_doctype_malformed_comment_end
         DETAIL
       end
 
-      def test_after_doctype_malformed_comment_short
+      def test_after_doctype_unclosed_comment
         exception = assert_raise(REXML::ParseException) do
           parse("<a><!-->")
         end
-        assert_equal(<<~DETAIL.chomp, exception.to_s)
-          Malformed comment
+        assert_equal(<<~DETAIL, exception.to_s)
+          Unclosed comment: Missing end '-->'
           Line: 1
           Position: 8
           Last 80 unconsumed characters:
-          -->
         DETAIL
       end
 

From a5f31c49be106011c4d96cb0e308ebbba118d192 Mon Sep 17 00:00:00 2001
From: NAITOH Jun <naitoh@gmail.com>
Date: Wed, 5 Mar 2025 06:20:42 +0900
Subject: [PATCH 04/12] Improve CDATA and comment parse performance (#246)

## Why?

Since `<a><!a` and `<a><!a>` are malformed node, they do not need to be
checked before comments and CDATA.

## Benchmark : comment (after_doctype)
```
$ benchmark-driver benchmark/parse_comment.yaml
Calculating -------------------------------------
                         before       after  before(YJIT)  after(YJIT)
       after_doctype     1.306k      5.586k        1.152k       3.569k i/s -     100.000 times in 0.076563s 0.017903s 0.086822s 0.028020s

Comparison:
                    after_doctype
               after:      5585.7 i/s
         after(YJIT):      3568.9 i/s - 1.57x  slower
              before:      1306.1 i/s - 4.28x  slower
        before(YJIT):      1151.8 i/s - 4.85x  slower
```
- YJIT=ON : 3.09x faster
- YJIT=OFF : 4.28x faster

## Benchmark : CDATA
```
$ benchmark-driver benchmark/parse_cdata.yaml
Calculating -------------------------------------
                         before       after  before(YJIT)  after(YJIT)
                 dom     1.269k      5.548k        1.053k       3.072k i/s -     100.000 times in 0.078808s 0.018026s 0.094976s 0.032553s
                 sax     1.399k      8.244k        1.220k       4.460k i/s -     100.000 times in 0.071458s 0.012130s 0.081958s 0.022422s
                pull     1.411k      8.319k        1.260k       4.806k i/s -     100.000 times in 0.070883s 0.012021s 0.079335s 0.020809s
              stream     1.420k      8.320k        1.254k       4.728k i/s -     100.000 times in 0.070406s 0.012019s 0.079738s 0.021149s

Comparison:
                              dom
               after:      5547.5 i/s
         after(YJIT):      3071.9 i/s - 1.81x  slower
              before:      1268.9 i/s - 4.37x  slower
        before(YJIT):      1052.9 i/s - 5.27x  slower

                              sax
               after:      8244.0 i/s
         after(YJIT):      4459.9 i/s - 1.85x  slower
              before:      1399.4 i/s - 5.89x  slower
        before(YJIT):      1220.1 i/s - 6.76x  slower

                             pull
               after:      8318.8 i/s
         after(YJIT):      4805.6 i/s - 1.73x  slower
              before:      1410.8 i/s - 5.90x  slower
        before(YJIT):      1260.5 i/s - 6.60x  slower

                           stream
               after:      8320.2 i/s
         after(YJIT):      4728.4 i/s - 1.76x  slower
              before:      1420.3 i/s - 5.86x  slower
        before(YJIT):      1254.1 i/s - 6.63x  slower
```
- YJIT=ON : 2.91x - 3.80x faster
- YJIT=OFF : 4.37x - 5.90x faster

Co-authored-by: Sutou Kouhei <kou@clear-code.com>
---
 lib/rexml/parsers/baseparser.rb |  6 ++----
 test/parse/test_comment.rb      | 13 +++++++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 61d38ae2..de85aebd 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -449,9 +449,7 @@ def pull_event
               end
               return [ :end_element, last_tag ]
             elsif @source.match?("!", true)
-              md = @source.match(/([^>]*>)/um)
               #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
-              raise REXML::ParseException.new("Malformed node", @source) unless md
               if @source.match?("--", true)
                 return [ :comment, process_comment ]
               elsif @source.match?("[CDATA[", true)
@@ -461,9 +459,9 @@ def pull_event
                 else
                   raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
                 end
+              else
+                raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
               end
-              raise REXML::ParseException.new( "Declarations can only occur "+
-                "in the doctype declaration.", @source)
             elsif @source.match?("?", true)
               return process_instruction
             else
diff --git a/test/parse/test_comment.rb b/test/parse/test_comment.rb
index c573e711..5349c18e 100644
--- a/test/parse/test_comment.rb
+++ b/test/parse/test_comment.rb
@@ -84,6 +84,19 @@ def test_doctype_malformed_comment_end
         DETAIL
       end
 
+      def test_after_doctype_malformed_node
+        exception = assert_raise(REXML::ParseException) do
+          parse("<a><!a")
+        end
+        assert_equal(<<~DETAIL.chomp, exception.to_s)
+          Malformed node: Started with '<!' but not a comment nor CDATA
+          Line: 1
+          Position: 6
+          Last 80 unconsumed characters:
+          a
+        DETAIL
+      end
+
       def test_after_doctype_unclosed_comment
         exception = assert_raise(REXML::ParseException) do
           parse("<a><!-->")

From a85203e88c8f50f64140fb50492cf9dbe3d79301 Mon Sep 17 00:00:00 2001
From: NAITOH Jun <naitoh@gmail.com>
Date: Wed, 5 Mar 2025 09:45:19 +0900
Subject: [PATCH 05/12] Raise appropriate exception when failing to match start
 tag in DOCTYPE (#247)

## Why?
Added exception to make the process easier to understand.
---
 lib/rexml/parsers/baseparser.rb |  5 +++--
 test/parse/test_comment.rb      | 13 +++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index de85aebd..750b1697 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -412,14 +412,15 @@ def pull_event
               return [:notationdecl, name, *id]
             elsif @source.match?("--", true)
               return [ :comment, process_comment ]
+            else
+              raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
             end
           elsif match = @source.match(/(%.*?;)\s*/um, true)
             return [ :externalentity, match[1] ]
           elsif @source.match?(/\]\s*>/um, true)
             @document_status = :after_doctype
             return [ :end_doctype ]
-          end
-          if @document_status == :in_doctype
+          else
             raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
           end
         end
diff --git a/test/parse/test_comment.rb b/test/parse/test_comment.rb
index 5349c18e..6339835d 100644
--- a/test/parse/test_comment.rb
+++ b/test/parse/test_comment.rb
@@ -48,6 +48,19 @@ def test_toplevel_malformed_comment_end
         DETAIL
       end
 
+      def test_doctype_malformed_node
+        exception = assert_raise(REXML::ParseException) do
+          parse("<!DOCTYPE foo [<!a")
+        end
+        assert_equal(<<~DETAIL.chomp, exception.to_s)
+          Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION
+          Line: 1
+          Position: 18
+          Last 80 unconsumed characters:
+          a
+        DETAIL
+      end
+
       def test_doctype_unclosed_comment
         exception = assert_raise(REXML::ParseException) do
           parse("<!DOCTYPE foo [<!--")

From 5d2606a99957f1623e464d70be065a31ed0ea58b Mon Sep 17 00:00:00 2001
From: James Coleman <jtc331@gmail.com>
Date: Thu, 3 Apr 2025 03:45:35 -0400
Subject: [PATCH 06/12] Fix docs typo in code example (#248)

---
 lib/rexml/document.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index d1747dd4..1960012c 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -309,8 +309,8 @@ def stand_alone?
     end
 
     # :call-seq:
-    #    doc.write(output=$stdout, indent=-1, transtive=false, ie_hack=false, encoding=nil)
-    #    doc.write(options={:output => $stdout, :indent => -1, :transtive => false, :ie_hack => false, :encoding => nil})
+    #    doc.write(output=$stdout, indent=-1, transitive=false, ie_hack=false, encoding=nil)
+    #    doc.write(options={:output => $stdout, :indent => -1, :transitive => false, :ie_hack => false, :encoding => nil})
     #
     # Write the XML tree out, optionally with indent.  This writes out the
     # entire XML document, including XML declarations, doctype declarations,

From d944fa478a972febe9c3ad2cf35232223d391597 Mon Sep 17 00:00:00 2001
From: NAITOH Jun <naitoh@gmail.com>
Date: Sat, 3 May 2025 09:03:12 +0900
Subject: [PATCH 07/12] NEWS.md : Fix the mentioned of the PR in
 CVE-2024-35176. (#253)

I think the mentioned of CVE-2024-35176 in NEWS.md is incorrect.

```
- Improved parse performance when an attribute has many <s.
  - GH-126
```

#126 looks like fixes the issue with attribute value that contains
multiple '>' characters.
---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 51a45cab..7f95d829 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -386,7 +386,7 @@
 
     * Patch by NAITOH Jun.
 
-  * Improved parse performance when an attribute has many `<`s.
+  * Improved parse performance when an attribute has many `>`s.
 
     * GH-126
 

From de6f40ed8749dd6ab4b7c4b80494a824f7f9027a Mon Sep 17 00:00:00 2001
From: tomoya ishida <tomoyapenguin@gmail.com>
Date: Sat, 3 May 2025 09:21:27 +0900
Subject: [PATCH 08/12] Fix reverse sort in xpath_parser (#251)

The code below was failing with `REXML::XPathParser#sort': undefined
method '-@' for an instance of Array`
```ruby
d = REXML::Document.new("<a><b><c/><d/><x/></b><b><e/><x/></b></a>")
matches = REXML::XPath.match(d, "a/b/x/preceding-sibling::node()")
# Before: error
# After: [<e/>, <d/>, <c/>]
```
This pull request will fix it.
---
 lib/rexml/xpath_parser.rb | 2 +-
 test/xpath/test_base.rb   | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index 5eb1e5a9..f86a87e6 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -671,7 +671,7 @@ def sort(array_of_nodes, order)
         if order == :forward
           index
         else
-          -index
+          index.map(&:-@)
         end
       end
       ordered.collect do |_index, node|
diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb
index 1dacd69d..53264a9e 100644
--- a/test/xpath/test_base.rb
+++ b/test/xpath/test_base.rb
@@ -416,6 +416,12 @@ def test_preceding
       assert_equal( 4, cs.length )
     end
 
+    def test_preceding_sibling
+      d = REXML::Document.new("<a><b><c/><d/><x/></b><b><e/><x/></b></a>")
+      matches = REXML::XPath.match(d, "a/b/x/preceding-sibling::node()")
+      assert_equal(["e", "d", "c"], matches.map(&:name))
+    end
+
     def test_following
       d = Document.new "<a><b id='0'/><b/><b><c id='1'/><c id='2'/></b><b id='1'/></a>"
       start = XPath.first( d, "/a/b[@id='0']" )

From 249d770b4ead129abf475708e84e3f1f7908962a Mon Sep 17 00:00:00 2001
From: NAITOH Jun <naitoh@gmail.com>
Date: Tue, 6 May 2025 21:33:00 +0900
Subject: [PATCH 09/12] Fix duplicate responses in XPath following,
 following-sibling, preceding, preceding-sibling (#255)

## Why?
See: https://github.com/ruby/rexml/pull/251#issuecomment-2845103143

## Expected values

- XPath : a/d/preceding::* => ["d", "c", "b"]
```xml
<a>
  <b/> <!-- a/d/preceding::b -->
  <c/> <!-- a/d/preceding::c -->
  <d/> <!-- a/d/preceding::d -->
  <d/> <!-- self -->
  <e/>
  <f/>
</a>
```

- XPath : a/d/following::* => ["d", "e", "f"]
```xml
<a>
  <b/>
  <c/>
  <d/> <!-- self -->
  <d/> <!-- a/d/following::d -->
  <e/> <!-- a/d/following::e -->
  <f/> <!-- a/d/following::f -->
</a>
```

- XPath : a/b/x/following-sibling:* => ["c", "d", "e"]
```xml
<a>
  <b>
    <x/> <!-- self -->
    <c/> <!-- a/b/x/following-sibling::c -->
    <d/> <!-- a/b/x/following-sibling::d -->
  </b>
  <b>
    <x/> <!-- self -->
    <e/> <!-- a/b/x/following-sibling::e -->
  </b>
</a>
```

- XPath : a/b/x/following-sibling:* => ["c", "d", "x", "e"]
```xml
<a>
  <b>
    <x/> <!-- self -->
    <c/> <!-- a/b/x/following-sibling::c -->
    <d/> <!-- a/b/x/following-sibling::d -->
    <x/> <!-- a/b/x/following-sibling::x -->
    <e/> <!-- a/b/x/following-sibling::e -->
  </b>
</a>
```

- XPath : a/b/x/preceding-sibling::* => ["e", "d", "c"]
```xml
<a>
  <b>
    <c/>  <!-- a/b/x/preceding-sibling::c -->
    <d/>  <!-- a/b/x/preceding-sibling::d -->
    <x/>  <!-- self -->
  </b>
  <b>
    <e/>  <!-- a/b/x/preceding-sibling::e -->
    <x/>  <!-- self -->
  </b>
</a>
```

- XPath : a/b/x/preceding-sibling::* => ["e", "x", "d", "c"]
```xml
<a>
  <b>
    <c/>  <!-- a/b/x/preceding-sibling::c -->
    <d/>  <!-- a/b/x/preceding-sibling::d -->
    <x/>  <!-- a/b/x/preceding-sibling::x -->
    <e/>  <!-- a/b/x/preceding-sibling::e -->
    <x/>  <!-- self -->
  </b>
</a>
```

- XPath : //a/following-sibling:*[1] => ["w", "x", "y", "z"]
```xml
<div>
  <div>
    <a/> <-- self -->
    <w/> <-- //a/following-sibling:*[1] -->
  </div>
  <a/> <-- self -->
  <x/> <-- //a/following-sibling:*[1] -->
  <a/> <-- self -->
  <y/> <-- //a/following-sibling:*[1] -->
  <a/> <-- self -->
  <z/> <-- //a/following-sibling:*[1] -->
</div>
```
---
 lib/rexml/xpath_parser.rb |  2 +-
 test/xpath/test_base.rb   | 97 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index f86a87e6..cde2e5d5 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -144,7 +144,7 @@ def match(path_stack, nodeset)
       result = expr(path_stack, nodeset)
       case result
       when Array # nodeset
-        unnode(result)
+        unnode(result).uniq
       else
         [result]
       end
diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb
index 53264a9e..b923eed2 100644
--- a/test/xpath/test_base.rb
+++ b/test/xpath/test_base.rb
@@ -416,12 +416,103 @@ def test_preceding
       assert_equal( 4, cs.length )
     end
 
-    def test_preceding_sibling
-      d = REXML::Document.new("<a><b><c/><d/><x/></b><b><e/><x/></b></a>")
-      matches = REXML::XPath.match(d, "a/b/x/preceding-sibling::node()")
+    def test_preceding_multiple
+      source = <<-XML
+<a>
+  <b/><c/><d/><d/><e/><f/>
+</a>
+      XML
+      doc = REXML::Document.new(source)
+      matches = REXML::XPath.match(doc, "a/d/preceding::*")
+      assert_equal(["d", "c", "b"], matches.map(&:name))
+    end
+
+    def test_following_multiple
+      source = <<-XML
+<a>
+  <b/><c/><d/><d/><e/><f/>
+</a>
+      XML
+      doc = REXML::Document.new(source)
+      matches = REXML::XPath.match(doc, "a/d/following::*")
+      assert_equal(["d", "e", "f"], matches.map(&:name))
+    end
+
+    def test_following_sibling_across_multiple_nodes
+      source = <<-XML
+<a>
+  <b>
+    <x/><c/><d/>
+  </b>
+  <b>
+    <x/><e/>
+  </b>
+</a>
+      XML
+      doc = REXML::Document.new(source)
+      matches = REXML::XPath.match(doc, "a/b/x/following-sibling::*")
+      assert_equal(["c", "d", "e"], matches.map(&:name))
+    end
+
+    def test_following_sibling_within_single_node
+      source = <<-XML
+<a>
+  <b>
+    <x/><c/><d/><x/><e/>
+  </b>
+</a>
+      XML
+      doc = REXML::Document.new(source)
+      matches = REXML::XPath.match(doc, "a/b/x/following-sibling::*")
+      assert_equal(["c", "d", "x", "e"], matches.map(&:name))
+    end
+
+    def test_following_sibling_predicates
+      source = <<-XML
+<div>
+  <div>
+    <a/><w/>
+  </div>
+  <a/><x/>
+  <a/><y/>
+  <a/><z/>
+</div>
+      XML
+      doc = REXML::Document.new(source)
+      # Finds a node flowing <a/>
+      matches = REXML::XPath.match(doc, "//a/following-sibling::*[1]")
+      assert_equal(["w", "x", "y", "z"], matches.map(&:name))
+    end
+
+    def test_preceding_sibling_across_multiple_nodes
+      source = <<-XML
+<a>
+  <b>
+    <c/><d/><x/>
+  </b>
+  <b>
+    <e/><x/>
+  </b>
+</a>
+      XML
+      doc = REXML::Document.new(source)
+      matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*")
       assert_equal(["e", "d", "c"], matches.map(&:name))
     end
 
+    def test_preceding_sibling_within_single_node
+      source = <<-XML
+<a>
+  <b>
+    <c/><d/><x/><e/><x/>
+  </b>
+</a>
+      XML
+      doc = REXML::Document.new(source)
+      matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*")
+      assert_equal(["e", "x", "d", "c"], matches.map(&:name))
+    end
+
     def test_following
       d = Document.new "<a><b id='0'/><b/><b><c id='1'/><c id='2'/></b><b id='1'/></a>"
       start = XPath.first( d, "/a/b[@id='0']" )

From cd575a10cac58eb47f235ed186060ac65ffb5284 Mon Sep 17 00:00:00 2001
From: tomoya ishida <tomoyapenguin@gmail.com>
Date: Wed, 7 May 2025 21:02:31 +0900
Subject: [PATCH 10/12] Deprecate accepting array as an element in XPath.match,
 first and each (#252)

`XPath.match`, `XPath.first`, `XPath.each`, `XPathParser#parse` and
`XPathParser#match` accepted nodeset as element.
This pull request changes the first parameter of these method to be an
element instead of nodeset.
Passing nodeset will be deprecated.

```ruby
# Documented usage. OK
REXML::XPath.match(element, xpath)

# Undocumented usage. Deprecate in this pull request
nodeset = [element]
REXML::XPath.match(nodeset, xpath)
```

### Background
#249 will introduce a temporary cache.
```ruby
def parse path, nodeset
  path_stack = @parser.parse( path )
  nodeset.first.document.send(:enable_cache) do
    match( path_stack, nodeset )
  end
end
```
But the signature `XPathParser#match(path, nodeset)` does not guarantee
that all nodes in the nodeset has the same root document.
So cache does not work in the code below. It's still slow.
```ruby
REXML::XPath.match(2.times.map { REXML::Document.new('<a>'*400+'</a>'*400) }, 'a//a')
```

The interface is holding our back, so I propose to drop accepting array
as element.
This change is a backward incompatibility, but it just drops
undocumented feature. I think only the test code was unintentionally
using this feature.

### XPath.match with array
XPath.match only traverse the first element of the array for some
selectors.
```ruby
nodeset = [REXML::Document.new("<a><b/></a>"), REXML::Document.new("<a><c/></a>")]

REXML::XPath.match(nodeset, "a/*")
#=> [<b/>, <c/>]

REXML::XPath.match(nodeset, "//a/*")
#=> [<b/>] # I expect [<b/>, <c/>] but the second document is ignored
```
It indicates that `XPath.match` is not designed to search inside
multiple nodes/documents.

---------

Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
---
 lib/rexml/xpath.rb        |  3 ---
 lib/rexml/xpath_parser.rb | 22 ++++++++++++----------
 test/test_jaxen.rb        | 16 ++++++++++------
 test/xpath/test_base.rb   | 17 ++++++++++++++---
 4 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/lib/rexml/xpath.rb b/lib/rexml/xpath.rb
index a0921bd8..666d764f 100644
--- a/lib/rexml/xpath.rb
+++ b/lib/rexml/xpath.rb
@@ -35,7 +35,6 @@ def XPath::first(element, path=nil, namespaces=nil, variables={}, options={})
       parser.namespaces = namespaces
       parser.variables = variables
       path = "*" unless path
-      element = [element] unless element.kind_of? Array
       parser.parse(path, element).flatten[0]
     end
 
@@ -64,7 +63,6 @@ def XPath::each(element, path=nil, namespaces=nil, variables={}, options={}, &bl
       parser.namespaces = namespaces
       parser.variables = variables
       path = "*" unless path
-      element = [element] unless element.kind_of? Array
       parser.parse(path, element).each( &block )
     end
 
@@ -74,7 +72,6 @@ def XPath::match(element, path=nil, namespaces=nil, variables={}, options={})
       parser.namespaces = namespaces
       parser.variables = variables
       path = "*" unless path
-      element = [element] unless element.kind_of? Array
       parser.parse(path,element)
     end
   end
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index cde2e5d5..8440015b 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -76,19 +76,19 @@ def variables=( vars={} )
       @variables = vars
     end
 
-    def parse path, nodeset
+    def parse path, node
       path_stack = @parser.parse( path )
-      match( path_stack, nodeset )
+      match( path_stack, node )
     end
 
-    def get_first path, nodeset
+    def get_first path, node
       path_stack = @parser.parse( path )
-      first( path_stack, nodeset )
+      first( path_stack, node )
     end
 
-    def predicate path, nodeset
+    def predicate path, node
       path_stack = @parser.parse( path )
-      match( path_stack, nodeset )
+      match( path_stack, node )
     end
 
     def []=( variable_name, value )
@@ -136,11 +136,13 @@ def first( path_stack, node )
     end
 
 
-    def match(path_stack, nodeset)
-      nodeset = nodeset.collect.with_index do |node, i|
-        position = i + 1
-        XPathNode.new(node, position: position)
+    def match(path_stack, node)
+      if node.is_a?(Array)
+        Kernel.warn("REXML::XPath.each, REXML::XPath.first, REXML::XPath.match dropped support for nodeset...", uplevel: 1)
+        return [] if node.empty?
+        node = node.first
       end
+      nodeset = [XPathNode.new(node, position: 1)]
       result = expr(path_stack, nodeset)
       case result
       when Array # nodeset
diff --git a/test/test_jaxen.rb b/test/test_jaxen.rb
index 6038e88e..548120d6 100644
--- a/test/test_jaxen.rb
+++ b/test/test_jaxen.rb
@@ -56,7 +56,9 @@ def process_test_case(name)
 
     # processes a tests/document/context node
     def process_context(doc, context)
-      test_context = XPath.match(doc, context.attributes["select"])
+      matched = XPath.match(doc, context.attributes["select"])
+      assert_equal(1, matched.size)
+      test_context = matched.first
       namespaces = context.namespaces
       namespaces.delete("var")
       namespaces = nil if namespaces.empty?
@@ -101,10 +103,14 @@ def process_nominal_test(context, variables, namespaces, test)
         assert_equal(Integer(expected, 10),
                      matched.size,
                      user_message(context, xpath, matched))
+      else
+        assert_operator(matched.size, :>, 0, user_message(context, xpath, matched))
       end
 
       XPath.each(test, "valueOf") do |value_of|
-        process_value_of(matched, variables, namespaces, value_of)
+        matched.each do |subcontext|
+          process_value_of(subcontext, variables, namespaces, value_of)
+        end
       end
     end
 
@@ -118,10 +124,8 @@ def process_exceptional_test(context, variables, namespaces, test)
 
     def user_message(context, xpath, matched)
       message = ""
-      context.each_with_index do |node, i|
-        message << "Node#{i}:\n"
-        message << "#{node}\n"
-      end
+      message << "Node:\n"
+      message << "#{context}\n"
       message << "XPath: <#{xpath}>\n"
       message << "Matched <#{matched}>"
       message
diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb
index b923eed2..ab22f6f9 100644
--- a/test/xpath/test_base.rb
+++ b/test/xpath/test_base.rb
@@ -411,9 +411,10 @@ def test_preceding
 
       s = "<a><b><c id='1'/></b><b><b><c id='2'/><c id='3'/></b><c id='4'/></b><c id='NOMATCH'><c id='5'/></c></a>"
       d = REXML::Document.new(s)
-      c = REXML::XPath.match( d, "//c[@id = '5']")
-      cs = REXML::XPath.match( c, "preceding::c" )
-      assert_equal( 4, cs.length )
+      c = REXML::XPath.match(d, "//c[@id = '5']")
+      assert_equal(1, c.length)
+      cs = REXML::XPath.match(c.first, "preceding::c")
+      assert_equal(4, cs.length)
     end
 
     def test_preceding_multiple
@@ -1255,5 +1256,15 @@ def test_or_and
       end
       assert_equal(["/"], hrefs, "Bug #3842 [ruby-core:32447]")
     end
+
+    def test_match_with_deprecated_usage
+      verbose, $VERBOSE = $VERBOSE, nil
+      doc = Document.new("<a><b/></a>")
+      assert_equal(['b'], XPath.match([doc, doc], '//b').map(&:name))
+      assert_equal(['b'], XPath.match([doc], '//b').map(&:name))
+      assert_equal([], XPath.match([], '//b').map(&:name))
+    ensure
+      $VERBOSE = verbose
+    end
   end
 end

From e80ffdd12713cd138dbe33f26968452dc33d20df Mon Sep 17 00:00:00 2001
From: NAITOH Jun <naitoh@gmail.com>
Date: Mon, 12 May 2025 10:22:11 +0900
Subject: [PATCH 11/12] Improve using `//` in XPath performance (#249)

When using `//` in XPath, the deeper the tag hierarchy, the slower it
becomes due to the namespace acquisition process.
Caching namespace information improves performance when using `//` with
XPath.

## Benchmark (Comparison with rexml 3.4.1)

```
$ benchmark-driver benchmark/xpath.yaml
Calculating -------------------------------------
                                                     rexml 3.4.1      master  3.4.1(YJIT)  master(YJIT)
REXML::XPath.match(REXML::Document.new(xml), 'a//a')      29.215     234.909      108.945       492.410 i/s -     100.000 times in 3.422925s 0.425697s 0.917898s 0.203083s

Comparison:
             REXML::XPath.match(REXML::Document.new(xml), 'a//a')
                                        master(YJIT):       492.4 i/s
                                              master:       234.9 i/s - 2.10x  slower
                                         3.4.1(YJIT):       108.9 i/s - 4.52x  slower
                                         rexml 3.4.1:        29.2 i/s - 16.85x  slower
```

- YJIT=ON : 4.52x faster
- YJIT=OFF : 8.04x faster

---------

Co-authored-by: tomoya ishida <tomoyapenguin@gmail.com>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
---
 benchmark/xpath.yaml      | 32 ++++++++++++++++++++++++++++++++
 lib/rexml/attribute.rb    |  4 ++++
 lib/rexml/document.rb     | 14 ++++++++++++++
 lib/rexml/element.rb      | 33 +++++++++++++++++----------------
 lib/rexml/xpath_parser.rb | 27 ++++++++++++---------------
 test/test_core.rb         | 23 +++++++++++++++++------
 test/xpath/test_base.rb   | 10 ++++++++++
 7 files changed, 106 insertions(+), 37 deletions(-)
 create mode 100644 benchmark/xpath.yaml

diff --git a/benchmark/xpath.yaml b/benchmark/xpath.yaml
new file mode 100644
index 00000000..d6e970eb
--- /dev/null
+++ b/benchmark/xpath.yaml
@@ -0,0 +1,32 @@
+loop_count: 100
+contexts:
+  - gems:
+      rexml: 3.2.6
+    require: false
+    prelude: require 'rexml'
+  - name: master
+    prelude: |
+      $LOAD_PATH.unshift(File.expand_path("lib"))
+      require 'rexml'
+  - name: 3.2.6(YJIT)
+    gems:
+      rexml: 3.2.6
+    require: false
+    prelude: |
+      require 'rexml'
+      RubyVM::YJIT.enable
+  - name: master(YJIT)
+    prelude: |
+      $LOAD_PATH.unshift(File.expand_path("lib"))
+      require 'rexml'
+      RubyVM::YJIT.enable
+
+prelude: |
+  require 'rexml/document'
+
+  DEPTH = 100
+  xml   = '<a>' * DEPTH + '</a>' * DEPTH
+  doc   = REXML::Document.new(xml)
+
+benchmark:
+  "REXML::XPath.match(REXML::Document.new(xml), 'a//a')" : REXML::XPath.match(doc, "a//a")
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index fe48745c..7a190225 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -206,6 +206,10 @@ def xpath
       path += "/@#{self.expanded_name}"
       return path
     end
+
+    def document
+      @element&.document
+    end
   end
 end
 #vim:ts=2 sw=2 noexpandtab:
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 1960012c..1c678bef 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -448,6 +448,20 @@ def document
     end
 
     private
+
+    attr_accessor :namespaces_cache
+
+    # New document level cache is created and available in this block.
+    # This API is thread unsafe. Users can't change this document in this block.
+    def enable_cache
+      @namespaces_cache = {}
+      begin
+        yield
+      ensure
+        @namespaces_cache = nil
+      end
+    end
+
     def build( source )
       Parsers::TreeParser.new( source, self ).parse
     end
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index 4e3a60b9..b62b6cc2 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -589,10 +589,12 @@ def prefixes
     #   d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"}
     #
     def namespaces
-      namespaces = {}
-      namespaces = parent.namespaces if parent
-      namespaces = namespaces.merge( attributes.namespaces )
-      return namespaces
+      namespaces_cache = document&.__send__(:namespaces_cache)
+      if namespaces_cache
+        namespaces_cache[self] ||= calculate_namespaces
+      else
+        calculate_namespaces
+      end
     end
 
     # :call-seq:
@@ -619,17 +621,9 @@ def namespace(prefix=nil)
       if prefix.nil?
         prefix = prefix()
       end
-      if prefix == ''
-        prefix = "xmlns"
-      else
-        prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
-      end
-      ns = nil
-      target = self
-      while ns.nil? and target
-        ns = target.attributes[prefix]
-        target = target.parent
-      end
+      prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:")
+      ns = namespaces[prefix]
+
       ns = '' if ns.nil? and prefix == 'xmlns'
       return ns
     end
@@ -1516,8 +1510,15 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
       formatter.write( self, output )
     end
 
-
     private
+    def calculate_namespaces
+      if parent
+        parent.namespaces.merge(attributes.namespaces)
+      else
+        attributes.namespaces
+      end
+    end
+
     def __to_xpath_helper node
       rv = node.expanded_name.clone
       if node.parent
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index 8440015b..70ae8919 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -78,7 +78,15 @@ def variables=( vars={} )
 
     def parse path, node
       path_stack = @parser.parse( path )
-      match( path_stack, node )
+      if node.is_a?(Array)
+        Kernel.warn("REXML::XPath.each, REXML::XPath.first, REXML::XPath.match dropped support for nodeset...", uplevel: 1)
+        return [] if node.empty?
+        node = node.first
+      end
+
+      node.document.__send__(:enable_cache) do
+        match( path_stack, node )
+      end
     end
 
     def get_first path, node
@@ -137,11 +145,6 @@ def first( path_stack, node )
 
 
     def match(path_stack, node)
-      if node.is_a?(Array)
-        Kernel.warn("REXML::XPath.each, REXML::XPath.first, REXML::XPath.match dropped support for nodeset...", uplevel: 1)
-        return [] if node.empty?
-        node = node.first
-      end
       nodeset = [XPathNode.new(node, position: 1)]
       result = expr(path_stack, nodeset)
       case result
@@ -494,14 +497,10 @@ def node_test(path_stack, nodesets, any_type: :element)
                 if strict?
                   raw_node.name == name and raw_node.namespace == ""
                 else
-                  # FIXME: This DOUBLES the time XPath searches take
-                  ns = get_namespace(raw_node, prefix)
-                  raw_node.name == name and raw_node.namespace == ns
+                  raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
                 end
               else
-                # FIXME: This DOUBLES the time XPath searches take
-                ns = get_namespace(raw_node, prefix)
-                raw_node.name == name and raw_node.namespace == ns
+                raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
               end
             when :attribute
               if prefix.nil?
@@ -509,9 +508,7 @@ def node_test(path_stack, nodesets, any_type: :element)
               elsif prefix.empty?
                 raw_node.name == name and raw_node.namespace == ""
               else
-                # FIXME: This DOUBLES the time XPath searches take
-                ns = get_namespace(raw_node.element, prefix)
-                raw_node.name == name and raw_node.namespace == ns
+                raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix)
               end
             else
               false
diff --git a/test/test_core.rb b/test/test_core.rb
index 34fe9e07..651056f2 100644
--- a/test/test_core.rb
+++ b/test/test_core.rb
@@ -653,18 +653,23 @@ def test_namespace
       assert_equal "<sean:blah>Some text</sean:blah>", out
     end
 
-
     def test_add_namespace
       e = Element.new 'a'
+      assert_equal("", e.namespace)
+      assert_nil(e.namespace('foo'))
       e.add_namespace 'someuri'
       e.add_namespace 'foo', 'otheruri'
       e.add_namespace 'xmlns:bar', 'thirduri'
-      assert_equal 'someuri', e.attributes['xmlns']
-      assert_equal 'otheruri', e.attributes['xmlns:foo']
-      assert_equal 'thirduri', e.attributes['xmlns:bar']
+      assert_equal("someuri", e.namespace)
+      assert_equal("otheruri", e.namespace('foo'))
+      assert_equal("otheruri", e.namespace('xmlns:foo'))
+      assert_equal("thirduri", e.namespace('bar'))
+      assert_equal("thirduri", e.namespace('xmlns:bar'))
+      assert_equal('someuri', e.attributes['xmlns'])
+      assert_equal('otheruri', e.attributes['xmlns:foo'])
+      assert_equal('thirduri', e.attributes['xmlns:bar'])
     end
 
-
     def test_big_documentation
       d = File.open(fixture_path("documentation.xml")) {|f| Document.new f }
       assert_equal "Sean Russell", d.elements["documentation/head/author"].text.tr("\n\t", " ").squeeze(" ")
@@ -764,9 +769,15 @@ def test_attributes_each
 
     def test_delete_namespace
       doc = Document.new "<a xmlns='1' xmlns:x='2'/>"
+      assert_equal("1", doc.root.namespace)
+      assert_equal("2", doc.root.namespace('x'))
+      assert_equal("2", doc.root.namespace('xmlns:x'))
       doc.root.delete_namespace
       doc.root.delete_namespace 'x'
-      assert_equal "<a/>", doc.to_s
+      assert_equal("<a/>", doc.to_s)
+      assert_equal("", doc.root.namespace)
+      assert_nil(doc.root.namespace('x'))
+      assert_nil(doc.root.namespace('xmlns:x'))
     end
 
     def test_each_element_with_attribute
diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb
index ab22f6f9..764171ab 100644
--- a/test/xpath/test_base.rb
+++ b/test/xpath/test_base.rb
@@ -1193,6 +1193,16 @@ def test_namespaces_0
       assert_equal( 1,  XPath.match( d, "//x:*" ).size )
     end
 
+    def test_namespaces_cache
+      doc = Document.new("<a xmlns='1'><b/></a>")
+      assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='1']").to_s)
+      assert_nil(XPath.first(doc, "//b[namespace-uri()='']"))
+
+      doc.root.delete_namespace
+      assert_nil(XPath.first(doc, "//b[namespace-uri()='1']"))
+      assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='']").to_s)
+    end
+
     def test_ticket_71
       doc = Document.new(%Q{<root xmlns:ns1="xyz" xmlns:ns2="123"><element ns1:attrname="foo" ns2:attrname="bar"/></root>})
       el = doc.root.elements[1]

From 3dc9eca877f8444b7ac1d6008feb724cbfdc239a Mon Sep 17 00:00:00 2001
From: NAITOH Jun <naitoh@gmail.com>
Date: Thu, 29 May 2025 10:14:32 +0900
Subject: [PATCH 12/12] Improve `Text.check` performance (#256)

The doctype parameter of Text.check is not being used.
Changing the doctype parameter to an optional parameter improves the
parsing speed of the DOM.

## Benchmark

```
                         before       after  before(YJIT)  after(YJIT)
                 dom     19.854      23.805        33.969       37.712 i/s -     100.000 times in 5.036779s 4.200839s 2.943877s 2.651709s
                 sax     29.436      30.494        54.070       55.089 i/s -     100.000 times in 3.397155s 3.279348s 1.849463s 1.815255s
                pull     34.908      34.857        62.969       64.895 i/s -     100.000 times in 2.864651s 2.868842s 1.588082s 1.540939s
              stream     34.570      34.281        60.616       60.355 i/s -     100.000 times in 2.892656s 2.917080s 1.649737s 1.656866s

Comparison:
                              dom
         after(YJIT):        37.7 i/s
        before(YJIT):        34.0 i/s - 1.11x  slower
               after:        23.8 i/s - 1.58x  slower
              before:        19.9 i/s - 1.90x  slower

                              sax
         after(YJIT):        55.1 i/s
        before(YJIT):        54.1 i/s - 1.02x  slower
               after:        30.5 i/s - 1.81x  slower
              before:        29.4 i/s - 1.87x  slower

                             pull
         after(YJIT):        64.9 i/s
        before(YJIT):        63.0 i/s - 1.03x  slower
              before:        34.9 i/s - 1.86x  slower
               after:        34.9 i/s - 1.86x  slower

                           stream
        before(YJIT):        60.6 i/s
         after(YJIT):        60.4 i/s - 1.00x  slower
              before:        34.6 i/s - 1.75x  slower
               after:        34.3 i/s - 1.77x  slower
```

- YJIT=ON : 1.00x - 1.11x faster (dom: 1.11x faster)
- YJIT=OFF : 1.00x - 1.20x faster (dom: 1.20x faster)
---
 lib/rexml/attribute.rb  | 2 +-
 lib/rexml/text.rb       | 6 +++---
 test/test_text_check.rb | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index 7a190225..ba49207c 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -173,7 +173,7 @@ def element=( element )
       @element = element
 
       if @normalized
-        Text.check( @normalized, NEEDS_A_SECOND_CHECK, doctype )
+        Text.check( @normalized, NEEDS_A_SECOND_CHECK )
       end
 
       self
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb
index 2bf480fb..6f821472 100644
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@@ -104,16 +104,16 @@ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
       @entity_filter = entity_filter if entity_filter
       clear_cache
 
-      Text.check(@string, illegal, doctype) if @raw
+      Text.check(@string, illegal) if @raw
     end
 
     def parent= parent
       super(parent)
-      Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
+      Text.check(@string, NEEDS_A_SECOND_CHECK) if @raw and @parent
     end
 
     # check for illegal characters
-    def Text.check string, pattern, doctype
+    def Text.check string, pattern, doctype = nil
 
       # illegal anywhere
       if !string.match?(VALID_XML_CHARS)
diff --git a/test/test_text_check.rb b/test/test_text_check.rb
index 11cf65a3..3f2f7864 100644
--- a/test/test_text_check.rb
+++ b/test/test_text_check.rb
@@ -4,7 +4,7 @@ module REXMLTests
   class TextCheckTester < Test::Unit::TestCase
 
     def check(string)
-      REXML::Text.check(string, REXML::Text::NEEDS_A_SECOND_CHECK, nil)
+      REXML::Text.check(string, REXML::Text::NEEDS_A_SECOND_CHECK)
     end
 
     def assert_check(string)