@@ -404,6 +404,260 @@ public function testAcceptsJsonPath()
404
404
$ this ->assertSame ('red ' , $ result [0 ]['color ' ]);
405
405
}
406
406
407
+ /**
408
+ * @dataProvider provideUnicodeEscapeSequencesProvider
409
+ */
410
+ public function testUnicodeEscapeSequences (string $ jsonPath , array $ expected )
411
+ {
412
+ $ this ->assertSame ($ expected , self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ));
413
+ }
414
+
415
+ public static function provideUnicodeEscapeSequencesProvider (): array
416
+ {
417
+ return [
418
+ [
419
+ '$["caf\u00e9"] ' ,
420
+ ['coffee ' ],
421
+ ],
422
+ [
423
+ '$["\u65e5\u672c"] ' ,
424
+ ['Japan ' ],
425
+ ],
426
+ [
427
+ '$["M\u00fcller"] ' ,
428
+ [],
429
+ ],
430
+
431
+ [
432
+ '$["emoji\ud83d\ude00"] ' ,
433
+ ['smiley ' ],
434
+ ],
435
+
436
+ [
437
+ '$["tab\there"] ' ,
438
+ ['with tab ' ],
439
+ ],
440
+ [
441
+ '$["new\nline"] ' ,
442
+ ['with newline ' ],
443
+ ],
444
+ [
445
+ '$["quote\"here"] ' ,
446
+ ['with quote ' ],
447
+ ],
448
+ [
449
+ '$["backslash \\\\here"] ' ,
450
+ ['with backslash ' ],
451
+ ],
452
+ [
453
+ '$["apostrophe \'here"] ' ,
454
+ ['with apostrophe ' ],
455
+ ],
456
+
457
+ [
458
+ '$["control\u0001char"] ' ,
459
+ ['with control char ' ],
460
+ ],
461
+
462
+ [
463
+ '$["\u0063af\u00e9"] ' ,
464
+ ['coffee ' ],
465
+ ]
466
+ ];
467
+ }
468
+
469
+ /**
470
+ * @dataProvider provideSingleQuotedStringProvider
471
+ */
472
+ public function testSingleQuotedStrings (string $ jsonPath , array $ expected )
473
+ {
474
+ $ this ->assertSame ($ expected , self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ));
475
+ }
476
+
477
+ public static function provideSingleQuotedStringProvider (): array
478
+ {
479
+ return [
480
+ [
481
+ "$['caf \\u00e9'] " ,
482
+ ['coffee ' ],
483
+ ],
484
+ [
485
+ "$[' \\u65e5 \\u672c'] " ,
486
+ ['Japan ' ],
487
+ ],
488
+ [
489
+ "$['quote \"here'] " ,
490
+ ['with quote ' ],
491
+ ],
492
+ [
493
+ "$['M \\u00fcller'] " ,
494
+ [],
495
+ ],
496
+
497
+ [
498
+ "$['emoji \\ud83d \\ude00'] " ,
499
+ ['smiley ' ],
500
+ ],
501
+
502
+ [
503
+ "$['tab \\there'] " ,
504
+ ['with tab ' ],
505
+ ],
506
+ [
507
+ "$['quote \\\"here'] " ,
508
+ ['with quote ' ],
509
+ ],
510
+ [
511
+ "$['backslash \\\\here'] " ,
512
+ ['with backslash ' ],
513
+ ],
514
+ [
515
+ "$['apostrophe \\'here'] " ,
516
+ ['with apostrophe ' ],
517
+ ],
518
+
519
+ [
520
+ "$['control \\u0001char'] " ,
521
+ ['with control char ' ],
522
+ ],
523
+
524
+ [
525
+ "$[' \\u0063af \\u00e9'] " ,
526
+ ['coffee ' ],
527
+ ]
528
+ ];
529
+ }
530
+
531
+ /**
532
+ * @dataProvider provideFilterWithUnicodeProvider
533
+ */
534
+ public function testFilterWithUnicodeStrings (string $ jsonPath , int $ expectedCount , string $ expectedCountry )
535
+ {
536
+ $ result = self ::getUnicodeDocumentCrawler ()->find ($ jsonPath );
537
+
538
+ $ this ->assertCount ($ expectedCount , $ result );
539
+
540
+ if ($ expectedCount > 0 ) {
541
+ $ this ->assertSame ($ expectedCountry , $ result [0 ]['country ' ]);
542
+ }
543
+ }
544
+
545
+ public static function provideFilterWithUnicodeProvider (): array
546
+ {
547
+ return [
548
+ [
549
+ '$.users[?(@.name == "caf\u00e9")] ' ,
550
+ 1 ,
551
+ 'France ' ,
552
+ ],
553
+ [
554
+ '$.users[?(@.name == "\u65e5\u672c\u592a\u90ce")] ' ,
555
+ 1 ,
556
+ 'Japan ' ,
557
+ ],
558
+ [
559
+ '$.users[?(@.name == "Jos\u00e9")] ' ,
560
+ 1 ,
561
+ 'Spain ' ,
562
+ ],
563
+ [
564
+ '$.users[?(@.name == "John")] ' ,
565
+ 1 ,
566
+ 'USA ' ,
567
+ ],
568
+ [
569
+ '$.users[?(@.name == "NonExistent\u0020Name")] ' ,
570
+ 0 ,
571
+ '' ,
572
+ ]
573
+ ];
574
+ }
575
+
576
+ /**
577
+ * @dataProvider provideInvalidUnicodeSequenceProvider
578
+ */
579
+ public function testInvalidUnicodeSequencesAreProcessedAsLiterals (string $ jsonPath )
580
+ {
581
+ $ this ->assertIsArray (self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ), 'invalid unicode sequence should be treated as literal and not throw ' );
582
+ }
583
+
584
+ public static function provideInvalidUnicodeSequenceProvider (): array
585
+ {
586
+ return [
587
+ [
588
+ '$["test\uZZZZ"] ' ,
589
+ ],
590
+ [
591
+ '$["test\u123"] ' ,
592
+ ],
593
+ [
594
+ '$["test\u"] ' ,
595
+ ]
596
+ ];
597
+ }
598
+
599
+ /**
600
+ * @dataProvider provideComplexUnicodePath
601
+ */
602
+ public function testComplexUnicodePaths (string $ jsonPath , array $ expected )
603
+ {
604
+ $ complexJson = [
605
+ 'データ ' => [
606
+ 'ユーザー ' => [
607
+ ['名前 ' => 'テスト ' , 'ID ' => 1 ],
608
+ ['名前 ' => 'サンプル ' , 'ID ' => 2 ]
609
+ ]
610
+ ],
611
+ 'special🔑 ' => [
612
+ 'value💎 ' => 'treasure '
613
+ ]
614
+ ];
615
+
616
+ $ crawler = new JsonCrawler (json_encode ($ complexJson ));
617
+
618
+ $ this ->assertSame ($ expected , $ crawler ->find ($ jsonPath ));
619
+ }
620
+
621
+ public static function provideComplexUnicodePath (): array
622
+ {
623
+ return [
624
+ [
625
+ '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][0]["\u540d\u524d"] ' ,
626
+ ['テスト ' ],
627
+ ],
628
+ [
629
+ '$["special\ud83d\udd11"]["value\ud83d\udc8e"] ' ,
630
+ ['treasure ' ],
631
+ ],
632
+ [
633
+ '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][*]["\u540d\u524d"] ' ,
634
+ ['テスト ' , 'サンプル ' ],
635
+ ]
636
+ ];
637
+ }
638
+
639
+ public function testSurrogatePairHandling ()
640
+ {
641
+ $ json = ['𝒽𝑒𝓁𝓁𝑜 ' => 'mathematical script hello ' ];
642
+ $ crawler = new JsonCrawler (json_encode ($ json ));
643
+
644
+ // mathematical script "hello" requires surrogate pairs for each character
645
+ $ result = $ crawler ->find ('$["\ud835\udcbd\ud835\udc52\ud835\udcc1\ud835\udcc1\ud835\udc5c"] ' );
646
+ $ this ->assertSame (['mathematical script hello ' ], $ result );
647
+ }
648
+
649
+ public function testMixedQuoteTypes ()
650
+ {
651
+ $ json = ['key"with"quotes ' => 'value1 ' , "key'with'apostrophes " => 'value2 ' ];
652
+ $ crawler = new JsonCrawler (json_encode ($ json ));
653
+
654
+ $ result = $ crawler ->find ('$[ \'key"with"quotes \'] ' );
655
+ $ this ->assertSame (['value1 ' ], $ result );
656
+
657
+ $ result = $ crawler ->find ('$["key \'with \'apostrophes"] ' );
658
+ $ this ->assertSame (['value2 ' ], $ result );
659
+ }
660
+
407
661
private static function getBookstoreCrawler (): JsonCrawler
408
662
{
409
663
return new JsonCrawler (<<<JSON
@@ -453,4 +707,28 @@ private static function getSimpleCollectionCrawler(): JsonCrawler
453
707
{"a": [3, 5, 1, 2, 4, 6]}
454
708
JSON );
455
709
}
710
+
711
+ private static function getUnicodeDocumentCrawler (): JsonCrawler
712
+ {
713
+ $ json = [
714
+ 'café ' => 'coffee ' ,
715
+ '日本 ' => 'Japan ' ,
716
+ 'emoji😀 ' => 'smiley ' ,
717
+ 'tab here ' => 'with tab ' ,
718
+ "new \nline " => 'with newline ' ,
719
+ 'quote"here ' => 'with quote ' ,
720
+ 'backslash \\here ' => 'with backslash ' ,
721
+ 'apostrophe \'here ' => 'with apostrophe ' ,
722
+ "control \x01char " => 'with control char ' ,
723
+ 'users ' => [
724
+ ['name ' => 'café ' , 'country ' => 'France ' ],
725
+ ['name ' => '日本太郎 ' , 'country ' => 'Japan ' ],
726
+ ['name ' => 'John ' , 'country ' => 'USA ' ],
727
+ ['name ' => 'Müller ' , 'country ' => 'Germany ' ],
728
+ ['name ' => 'José ' , 'country ' => 'Spain ' ]
729
+ ]
730
+ ];
731
+
732
+ return new JsonCrawler (json_encode ($ json ));
733
+ }
456
734
}
0 commit comments