8000 ♻️(back) replace Ypy by pycrdt · ag-python/docs@a5b9169 · GitHub
[go: up one dir, main page]

Skip to content

Commit a5b9169

Browse files
committed
♻️(back) replace Ypy by pycrdt
Ypy is deprecated and unmaintained. We have problem with parsing existing documents. We replace it by pycrdt, library actively maintained and without the issues we have with Ypy.
1 parent c0dfb4b commit a5b9169

7 files changed

+55
-123
lines changed

src/backend/core/tests/documents/test_api_documents_duplicate.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
from django.core.files.storage import default_storage
1212
from django.utils import timezone
1313

14+
import pycrdt
1415
import pytest
1516
import requests
16-
import y_py
1717
from rest_framework.test import APIClient
1818

1919
from core import factories, models
@@ -84,13 +84,14 @@ def test_api_documents_duplicate_success(index):
8484
image_refs = [get_image_refs(doc_id) for doc_id in document_ids]
8585

8686
# Create document content with the first image only
87-
ydoc = y_py.YDoc() # pylint: disable=no-member
88-
with ydoc.begin_transaction() as txn:
89-
xml_fragment = ydoc.get_xml_element("document-store")
90-
xml_fragment.push_xml_element(txn, "image").set_attribute(
91-
txn, "src", image_refs[0][1]
92-
)
93-
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
87+
ydoc = pycrdt.Doc()
88+
fragment = pycrdt.XmlFragment(
89+
[
90+
pycrdt.XmlElement("img", {"src": image_refs[0][1]}),
91+
]
92+
)
93+
ydoc["document-store"] = fragment
94+
update = ydoc.get_update()
9495
base64_content = base64.b64encode(update).decode("utf-8")
9596

9697
# Create documents

src/backend/core/tests/documents/test_api_documents_update_extract_attachments.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
import base64
66
from uuid import uuid4
77

8+
import pycrdt
89
import pytest
9-
import y_py
1010
from rest_framework.test import APIClient
1111

1212
from core import factories
@@ -16,14 +16,15 @@
1616

1717
def get_ydoc_with_mages(image_keys):
1818
"""Return a ydoc from text for testing purposes."""
19-
ydoc = y_py.YDoc() # pylint: disable=no-member
20-
with ydoc.begin_transaction() as txn:
21-
xml_fragment = ydoc.get_xml_element("document-store")
22-
for key in image_keys:
23-
xml_image = xml_fragment.push_xml_element(txn, "image")
24-
xml_image.set_attribute(txn, "src", f"http://localhost/media/{key:s}")
25-
26-
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
19+
ydoc = pycrdt.Doc()
20+
fragment = pycrdt.XmlFragment(
21+
[
22+
pycrdt.XmlElement("img", {"src": f"http://localhost/media/{key:s}"})
23+
for key in image_keys
24+
]
25+
)
26+
ydoc["document-store"] = fragment
27+
update = ydoc.get_update()
2728
return base64.b64encode(update).decode("utf-8")
2829

2930

src/backend/core/tests/migrations/test_migrations_0020_remove_is_public_add_field_attachments_and_duplicated_from.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from django.core.files.base import ContentFile
55
from django.core.files.storage import default_storage
66

7+
import pycrdt
78
import pytest
8-
import y_py
99

1010
from core import models
1111

@@ -27,14 +27,13 @@ def test_populate_attachments_on_all_documents(migrator):
2727

2828
# Create document content with an image
2929
file_key = f"{old_doc_with_attachments.id!s}/file"
30-
ydoc = y_py.YDoc() # pylint: disable=no-member
3130
image_key = f"{old_doc_with_attachments.id!s}/attachments/{uuid.uuid4()!s}.png"
32-
with ydoc.begin_transaction() as txn:
33-
xml_fragment = ydoc.get_xml_element("document-store")
34-
xml_fragment.push_xml_element(txn, "image").set_attribute(
35-
txn, "src", f"http://localhost/media/{image_key:s}"
36-
)
37-
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
31+
ydoc = pycrdt.Doc()
32+
fragment = pycrdt.XmlFragment(
33+
[pycrdt.XmlElement("img", {"src": f"http://localhost/media/{image_key:s}"})]
34+
)
35+
ydoc["document-store"] = fragment
36+
update = ydoc.get_update()
3837
base64_content = base64.b64encode(update).decode("utf-8")
3938
bytes_content = base64_content.encode("utf-8")
4039
content_file = ContentFile(bytes_content)

src/backend/core/tests/test_utils.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import base64
44
import uuid
55

6-
import y_py
6+
import pycrdt
77

88
from core import utils
99

@@ -29,17 +29,22 @@
2929

3030
def test_utils_base64_yjs_to_text():
3131
"""Test extract text from saved yjs document"""
32-
assert utils.base64_yjs_to_text(TEST_BASE64_STRING) == "Hello world"
32+
assert utils.base64_yjs_to_text(TEST_BASE64_STRING) == "Hello w or ld"
3333

3434

3535
def test_utils_base64_yjs_to_xml():
3636
"""Test extract xml from saved yjs document"""
3737
content = utils.base64_yjs_to_xml(TEST_BASE64_STRING)
3838
assert (
39-
'<heading "level"="1" "textAlignment"="left">Hello</heading>' in content
40-
or '<heading "textAlignment"="left" "level"="1">Hello</heading>' in content
39+
'<heading textAlignment="left" level="1"><italic>Hello</italic></heading>'
40+
in content
41+
or '<heading level="1" textAlignment="left"><italic>Hello</italic></heading>'
42+
in content
43+
)
44+
assert (
45+
'<bulletListItem textAlignment="left">w<bold>or</bold>ld</bulletListItem>'
46+
in content
4147
)
42-
assert '<bulletListItem "textAlignment"="left">world</bulletListItem>' in content
4348

4449

4550
def test_utils_extract_attachments():
@@ -56,22 +61,17 @@ def test_utils_extract_attachments():
5661
image_key3 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
5762
image_url3 = f"http://localhost/media/{image_key3:s}"
5863

59-
ydoc = y_py.YDoc() # pylint: disable=no-member
60-
with ydoc.begin_transaction() as txn:
61-
xml_fragment = ydoc.get_xml_element("document-store")
62-
63-
xml_image = xml_fragment.push_xml_element(txn, "image")
64-
xml_image.set_attribute(txn, "src", image_url1)
65-
66-
xml_image = xml_fragment.push_xml_element(txn, "image")
67-
xml_image.set_attribute(txn, "src", image_url2)
68-
69-
xml_paragraph = xml_fragment.push_xml_element(txn, "paragraph")
70-
xml_text = xml_paragraph.push_xml_text(txn)
71-
xml_text.push(txn, image_url3)
64+
ydoc = pycrdt.Doc()
65+
frag = pycrdt.XmlFragment(
66+
[
67+
pycrdt.XmlElement("img", {"src": image_url1}),
68+
pycrdt.XmlElement("img", {"src": image_url2}),
69+
pycrdt.XmlElement("p", {}, [pycrdt.XmlText(image_url3)]),
70+
]
71+
)
72+
ydoc["document-store"] = frag
7273

73-
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
74+
update = ydoc.get_update()
7475
base64_string = base64.b64encode(update).decode("utf-8")
75-
7676
# image_key2 is missing the "/media/" part and shouldn't get extracted
7777
assert utils.extract_attachments(base64_string) == [image_key1, image_key3]

src/backend/core/tests/test_utils_base64_yjs_to_text.py

Lines changed: 0 additions & 70 deletions
This file was deleted.

src/backend/core/utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import base64
44
import re
55

6-
import y_py as Y
6+
import pycrdt
77
from bs4 import BeautifulSoup
88

99
from core import enums
@@ -52,19 +52,19 @@ def base64_yjs_to_xml(base64_string):
5252
"""Extract xml from base64 yjs document."""
5353

5454
decoded_bytes = base64.b64decode(base64_string)
55-
uint8_array = bytearray(decoded_bytes)
55+
# uint8_array = bytearray(decoded_bytes)
5656

57-
doc = Y.YDoc() # pylint: disable=E1101
58-
Y.apply_update(doc, uint8_array) # pylint: disable=E1101
59-
return str(doc.get_xml_element("document-store"))
57+
doc = pycrdt.Doc()
58+
doc.apply_update(decoded_bytes)
59+
return str(doc.get("document-store", type=pycrdt.XmlFragment))
6060

6161

6262
def base64_yjs_to_text(base64_string):
6363
"""Extract text from base64 yjs document."""
6464

6565
blocknote_structure = base64_yjs_to_xml(base64_string)
66-
soup = BeautifulSoup(blocknote_structure, "html.parser")
67-
return soup.get_text(separator=" ").strip()
66+
soup = BeautifulSoup(blocknote_structure, "lxml-xml")
67+
return soup.get_text(separator=" ", strip=True)
6868

6969

7070
def extract_attachments(content):

src/backend/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,19 @@ dependencies = [
4747
"factory_boy==3.3.3",
4848
"gunicorn==23.0.0",
4949
"jsonschema==4.23.0",
50+
"lxml==5.3.1",
5051
"markdown==3.7",
5152
"mozilla-django-oidc==4.0.1",
5253
"nested-multipart-parser==1.5.0",
5354
"openai==1.68.2",
5455
"psycopg[binary]==3.2.6",
56+
"pycrdt==0.12.10",
5557
"PyJWT==2.10.1",
5658
"python-magic==0.4.27",
5759
"requests==2.32.3",
5860
"sentry-sdk==2.24.0",
5961
"url-normalize==1.4.3",
6062
"whitenoise==6.9.0",
61-
"y-py==0.6.2",
6263
]
6364

6465
[project.urls]

0 commit comments

Comments
 (0)
0