Philipp Hörist pushed to branch master at gajim / python-nbxmpp


Commits:
f9a1cb90 by Philipp Hörist at 2025-08-07T23:24:45+02:00
imprv: Better cache invalid xml regex

- - - - -


3 changed files:

- nbxmpp/dispatcher.py
- nbxmpp/old_dispatcher.py
- nbxmpp/util.py


Changes:

=====================================
nbxmpp/dispatcher.py
=====================================
@@ -12,7 +12,6 @@ from typing import overload
 from typing import TYPE_CHECKING
 
 import logging
-import re
 import time
 from collections.abc import Callable
 from xml.parsers.expat import ExpatError
@@ -91,8 +90,8 @@ from nbxmpp.protocol import StreamErrorNode
 from nbxmpp.simplexml import Node
 from nbxmpp.simplexml import NodeBuilder
 from nbxmpp.structs import StanzaHandler
-from nbxmpp.util import get_invalid_xml_regex
 from nbxmpp.util import get_properties_struct
+from nbxmpp.util import INVALID_XML_RX
 from nbxmpp.util import is_websocket_close
 from nbxmpp.util import is_websocket_stream_error
 from nbxmpp.util import LogAdapter
@@ -251,8 +250,6 @@ class StanzaDispatcher(Observable):
             "error": StreamErrorNode,
         }
 
-        self.invalid_chars_re = get_invalid_xml_regex()
-
         self._register_namespace("unknown")
         self._register_namespace(Namespace.STREAMS)
         self._register_namespace(Namespace.CLIENT)
@@ -449,7 +446,7 @@ class StanzaDispatcher(Observable):
         self._parser.dispatch = self.dispatch
 
     def replace_non_character(self, data: str) -> str:
-        return re.sub(self.invalid_chars_re, "\ufffd", data)
+        return INVALID_XML_RX.sub("\ufffd", data)
 
     def process_data(self, data: str) -> None:
         # Parse incoming data


=====================================
nbxmpp/old_dispatcher.py
=====================================
@@ -10,7 +10,6 @@ different handlers to different XMPP stanzas and namespaces
 
 import inspect
 import logging
-import re
 import sys
 import uuid
 from xml.parsers.expat import ExpatError
@@ -69,6 +68,7 @@ from nbxmpp.protocol import Presence
 from nbxmpp.protocol import Protocol
 from nbxmpp.simplexml import NodeBuilder
 from nbxmpp.util import get_properties_struct
+from nbxmpp.util import INVALID_XML_RX
 
 log = logging.getLogger("nbxmpp.dispatcher")
 
@@ -145,24 +145,6 @@ class XMPPDispatcher(PlugIn):
             self.get_module,
         ]
 
-        # \ufddo -> \ufdef range
-        c = "\ufdd0"
-        r = c
-        while c < "\ufdef":
-            c = chr(ord(c) + 1)
-            r += "|" + c
-
-        # \ufffe-\uffff, \u1fffe-\u1ffff, ..., \u10fffe-\u10ffff
-        c = "\ufffe"
-        r += "|" + c
-        r += "|" + chr(ord(c) + 1)
-        while c < "\U0010fffe":
-            c = chr(ord(c) + 0x10000)
-            r += "|" + c
-            r += "|" + chr(ord(c) + 1)
-
-        self.invalid_chars_re = re.compile(r)
-
     def getAnID(self):
         return str(uuid.uuid4())
 
@@ -293,7 +275,7 @@ class XMPPDispatcher(PlugIn):
             )
 
     def replace_non_character(self, data):
-        return re.sub(self.invalid_chars_re, "\ufffd", data)
+        return INVALID_XML_RX.sub("\ufffd", data)
 
     def ProcessNonBlocking(self, data):
         """


=====================================
nbxmpp/util.py
=====================================
@@ -376,6 +376,9 @@ def get_invalid_xml_regex() -> re.Pattern[str]:
     return re.compile(r)
 
 
+INVALID_XML_RX = get_invalid_xml_regex()
+
+
 def get_tls_error_phrase(tls_error: Gio.TlsCertificateFlags) -> str | None:
     phrase = GIO_TLS_ERRORS.get(tls_error)
     if phrase is None:



View it on GitLab: 
https://dev.gajim.org/gajim/python-nbxmpp/-/commit/f9a1cb90a7d3fea2a0b63f237e2077666d6df609

-- 
View it on GitLab: 
https://dev.gajim.org/gajim/python-nbxmpp/-/commit/f9a1cb90a7d3fea2a0b63f237e2077666d6df609
You're receiving this email because of your account on dev.gajim.org.


_______________________________________________
Commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to