@@ -404,6 +404,228 @@ public function testAcceptsJsonPath()
404
404
$ this ->assertSame ('red ' , $ result [0 ]['color ' ]);
405
405
}
406
406
407
+ /**
408
+ * @dataProvider provideUnicodeEscapeSequencesProvider
409
+ */
410
+ public function testUnicodeEscapeSequences (string $ jsonPath , array $ expected )
411
+ {
412
+ $ this ->assertSame ($ expected , self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ));
413
+ }
414
+
415
+ public static function provideUnicodeEscapeSequencesProvider (): array
416
+ {
417
+ return [
418
+ [
419
+ '$["caf\u00e9"] ' ,
420
+ ['coffee ' ],
421
+ ],
422
+ [
423
+ '$["\u65e5\u672c"] ' ,
424
+ ['Japan ' ],
425
+ ],
426
+ [
427
+ '$["M\u00fcller"] ' ,
428
+ [],
429
+ ],
430
+
431
+ [
432
+ '$["emoji\ud83d\ude00"] ' ,
433
+ ['smiley ' ],
434
+ ],
435
+
436
+ [
437
+ '$["tab\there"] ' ,
438
+ ['with tab ' ],
439
+ ],
440
+ [
441
+ '$["new\nline"] ' ,
442
+ ['with newline ' ],
443
+ ],
444
+ [
445
+ '$["quote\"here"] ' ,
446
+ ['with quote ' ],
447
+ ],
448
+ [
449
+ '$["backslash \\\\here"] ' ,
450
+ ['with backslash ' ],
451
+ ],
452
+ [
453
+ '$["apostrophe \'here"] ' ,
454
+ ['with apostrophe ' ],
455
+ ],
456
+
457
+ [
458
+ '$["control\u0001char"] ' ,
459
+ ['with control char ' ],
460
+ ],
461
+
462
+ [
463
+ '$["\u0063af\u00e9"] ' ,
464
+ ['coffee ' ],
465
+ ]
466
+ ];
467
+ }
468
+
469
+ /**
470
+ * @dataProvider provideSingleQuotedStringProvider
471
+ */
472
+ public function testSingleQuotedStrings (string $ jsonPath , array $ expected )
473
+ {
474
+ $ this ->assertSame ($ expected ,
E377
self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ));
475
+ }
476
+
477
+ public static function provideSingleQuotedStringProvider (): array
478
+ {
479
+ return [
480
+ [
481
+ '$[ \'caf\u00e9 \'] ' ,
482
+ ['coffee ' ],
483
+ ],
484
+ [
485
+ '$[ \'\u65e5\u672c \'] ' ,
486
+ ['Japan ' ],
487
+ ],
488
+ [
489
+ '$[ \'quote"here \'] ' ,
490
+ ['with quote ' ],
491
+ ],
492
+ [
493
+ '$[ \'apostrophe \\\'here \'] ' ,
494
+ ['with apostrophe ' ],
495
+ ]
496
+ ];
497
+ }
498
+
499
+ /**
500
+ * @dataProvider provideFilterWithUnicodeProvider
501
+ */
502
+ public function testFilterWithUnicodeStrings (string $ jsonPath , int $ expectedCount , string $ expectedCountry )
503
+ {
504
+ $ result = self ::getUnicodeDocumentCrawler ()->find ($ jsonPath );
505
+
506
+ $ this ->assertCount ($ expectedCount , $ result );
507
+
508
+ if ($ expectedCount > 0 ) {
509
+ $ this ->assertSame ($ expectedCountry , $ result [0 ]['country ' ]);
510
+ }
511
+ }
512
+
513
+ public static function provideFilterWithUnicodeProvider (): array
514
+ {
515
+ return [
516
+ [
517
+ '$.users[?(@.name == "caf\u00e9")] ' ,
518
+ 1 ,
519
+ 'France ' ,
520
+ ],
521
+ [
522
+ '$.users[?(@.name == "\u65e5\u672c\u592a\u90ce")] ' ,
523
+ 1 ,
524
+ 'Japan ' ,
525
+ ],
526
+ [
527
+ '$.users[?(@.name == "Jos\u00e9")] ' ,
528
+ 1 ,
529
+ 'Spain ' ,
530
+ ],
531
+ [
532
+ '$.users[?(@.name == "John")] ' ,
533
+ 1 ,
534
+ 'USA ' ,
535
+ ],
536
+ [
537
+ '$.users[?(@.name == "NonExistent\u0020Name")] ' ,
538
+ 0 ,
539
+ '' ,
540
+ ]
541
+ ];
542
+ }
543
+
544
+ /**
545
+ * @dataProvider provideInvalidUnicodeSequenceProvider
546
+ */
547
+ public function testInvalidUnicodeSequencesAreProcessedAsLiterals (string $ jsonPath )
548
+ {
549
+ $ this ->assertIsArray (self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ), 'invalid unicode sequence should be treated as literal and not throw ' );
550
+ }
551
+
552
+ public static function provideInvalidUnicodeSequenceProvider (): array
553
+ {
554
+ return [
555
+ [
556
+ '$["test\uZZZZ"] ' ,
557
+ ],
558
+ [
559
+ '$["test\u123"] ' ,
560
+ ],
561
+ [
562
+ '$["test\u"] ' ,
563
+ ]
564
+
10000
];
565
+ }
566
+
567
+ /**
568
+ * @dataProvider provideComplexUnicodePath
569
+ */
570
+ public function testComplexUnicodePaths (string $ jsonPath , array $ expected )
571
+ {
572
+ $ complexJson = [
573
+ 'データ ' => [
574
+ 'ユーザー ' => [
575
+ ['名前 ' => 'テスト ' , 'ID ' => 1 ],
576
+ ['名前 ' => 'サンプル ' , 'ID ' => 2 ]
577
+ ]
578
+ ],
579
+ 'special🔑 ' => [
580
+ 'value💎 ' => 'treasure '
581
+ ]
582
+ ];
583
+
584
+ $ crawler = new JsonCrawler (json_encode ($ complexJson ));
585
+
586
+ $ this ->assertSame ($ expected , $ crawler ->find ($ jsonPath ));
587
+ }
588
+
589
+ public static function provideComplexUnicodePath (): array
590
+ {
591
+ return [
592
+ [
593
+ '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][0]["\u540d\u524d"] ' ,
594
+ ['テスト ' ],
595
+ ],
596
+ [
597
+ '$["special\ud83d\udd11"]["value\ud83d\udc8e"] ' ,
598
+ ['treasure ' ],
599
+ ],
600
+ [
601
+ '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][*]["\u540d\u524d"] ' ,
602
+ ['テスト ' , 'サンプル ' ],
603
+ ]
604
+ ];
605
+ }
606
+
607
+ public function testSurrogatePairHandling ()
608
+ {
609
+ $ json = ['𝒽𝑒𝓁𝓁𝑜 ' => 'mathematical script hello ' ];
610
+ $ crawler = new JsonCrawler (json_encode ($ json ));
611
+
612
+ // mathematical script "hello" requires surrogate pairs for each character
613
+ $ result = $ crawler ->find ('$["\ud835\udcbd\ud835\udc52\ud835\udcc1\ud835\udcc1\ud835\udc5c"] ' );
614
+ $ this ->assertSame (['mathematical script hello ' ], $ result );
615
+ }
616
+
617
+ public function testMixedQuoteTypes ()
618
+ {
619
+ $ json = ['key"with"quotes ' => 'value1 ' , "key'with'apostrophes " => 'value2 ' ];
620
+ $ crawler = new JsonCrawler (json_encode ($ json ));
621
+
622
+ $ result = $ crawler ->find ('$[ \'key"with"quotes \'] ' );
623
+ $ this ->assertSame (['value1 ' ], $ result );
624
+
625
+ $ result = $ crawler ->find ('$["key \'with \'apostrophes"] ' );
626
+ $ this ->assertSame (['value2 ' ], $ result );
627
+ }
628
+
407
629
private static function getBookstoreCrawler (): JsonCrawler
408
630
{
409
631
return new JsonCrawler (<<<JSON
@@ -453,4 +675,28 @@ private static function getSimpleCollectionCrawler(): JsonCrawler
453
675
{"a": [3, 5, 1, 2, 4, 6]}
454
676
JSON );
455
677
}
678
+
679
+ private static function getUnicodeDocumentCrawler (): JsonCrawler
680
+ {
681
+ $ json = [
682
+ 'café ' => 'coffee ' ,
683
+ '日本 ' => 'Japan ' ,
684
+ 'emoji😀 ' => 'smiley ' ,
685
+ 'tab here ' => 'with tab ' ,
686
+ "new \nline " => 'with newline ' ,
687
+ 'quote"here ' => 'with quote ' ,
688
+ 'backslash \\here ' => 'with backslash ' ,
689
+ 'apostrophe \'here ' => 'with apostrophe ' ,
690
+ "control \x01char " => 'with control char ' ,
691
+ 'users ' => [
692
+ ['name ' => 'café ' , 'country ' => 'France ' ],
693
+ ['name ' => '日本太郎 ' , 'country ' => 'Japan ' ],
694
+ ['name ' => 'John ' , 'country ' => 'USA ' ],
695
+ ['name ' => 'Müller ' , 'country ' => 'Germany ' ],
696
+ ['name ' => 'José ' , 'country ' => 'Spain ' ]
697
+ ]
698
+ ];
699
+
700
+ return new JsonCrawler (json_encode ($ json ));
701
+ }
456
702
}
0 commit comments