10000 [PRISM] Support UTF-8 symbols · ruby/ruby@955dfef · GitHub
[go: up one dir, main page]

Skip to content

Commit 955dfef

Browse files
committed
[PRISM] Support UTF-8 symbols
Fixes ruby/prism#2242.
1 parent 59bb78e commit 955dfef

File tree

2 files changed

+16
-9
lines changed

2 files changed

+16
-9
lines changed

prism_compile.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -200,23 +200,24 @@ parse_string_encoded(const pm_node_t *node, const pm_string_t *string, const pm_
200200
}
201201

202202
static inline ID
203-
parse_symbol(const uint8_t *start, const uint8_t *end, const pm_parser_t *parser)
203+
parse_symbol(const uint8_t *start, const uint8_t *end, const char *encoding)
204204
{
205-
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(parser->encoding->name));
205+
rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(encoding));
206206
return rb_intern3((const char *) start, end - start, enc);
207207
}
208208

209209
static inline ID
210-
parse_string_symbol(const pm_string_t *string, const pm_parser_t *parser)
210+
parse_string_symbol(const pm_symbol_node_t *symbol, const pm_parser_t *parser)
211211
{
212-
const uint8_t *start = pm_string_source(string);
213-
return parse_symbol(start, start + pm_string_length(string), parser);
212+
const char *encoding = symbol->base.flags & PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING ? "UTF-8" : parser->encoding->name;
213+
const uint8_t *start = pm_string_source(&symbol->unescaped);
214+
return parse_symbol(start, start + pm_string_length(&symbol->unescaped), encoding);
214215
}
215216

216217
static inline ID
217218
parse_location_symbol(const pm_location_t *location, const pm_parser_t *parser)
218219
{
219-
return parse_symbol(location->start, location->end, parser);
220+
return parse_symbol(location->start, location->end, parser->encoding->name);
220221
}
221222

222223
static int
@@ -395,7 +396,7 @@ pm_static_literal_value(const pm_node_t *node, const pm_scope_node_t *scope_node
395396
case PM_STRING_NODE:
396397
return parse_string(&((pm_string_node_t *) node)->unescaped, parser);
397398
case PM_SYMBOL_NODE:
398-
return ID2SYM(parse_string_symbol(&((pm_symbol_node_t *) node)->unescaped, parser));
399+
return ID2SYM(parse_string_symbol((pm_symbol_node_t *)node, parser));
399400
case PM_TRUE_NODE:
400401
return Qtrue;
401402
default:
@@ -1870,7 +1871,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t
18701871
const pm_node_t *key = ((const pm_assoc_node_t *) element)->key;
18711872
assert(PM_NODE_TYPE_P(key, PM_SYMBOL_NODE));
18721873

1873-
VALUE symbol = ID2SYM(parse_string_symbol(&((const pm_symbol_node_t *) key)->unescaped, scope_node->parser));
1874+
VALUE symbol = ID2SYM(parse_string_symbol((const pm_symbol_node_t *)key, scope_node->parser));
18741875
rb_ary_push(keys, symbol);
18751876
}
18761877
}
@@ -1915,7 +1916,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t
19151916
const pm_node_t *key = assoc->key;
19161917
assert(PM_NODE_TYPE_P(key, PM_SYMBOL_NODE));
19171918

1918-
VALUE symbol = ID2SYM(parse_string_symbol(&((const pm_symbol_node_t *) key)->unescaped, scope_node->parser));
1919+
VALUE symbol = ID2SYM(parse_string_symbol((const pm_symbol_node_t *)key, scope_node->parser));
19191920
ADD_INSN(ret, &line.node, dup);
19201921
ADD_INSN1(ret, &line.node, putobject, symbol);
19211922
ADD_SEND(ret, &line.node, rb_intern("key?"), INT2FIX(1));

test/ruby/test_compile_prism.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,12 @@ def test_StringNode
779779

780780
def test_SymbolNode
781781
assert_prism_eval(":pit")
782+
783+
# Test UTF-8 symbol in a US-ASCII file
784+
assert_prism_eval(<<~'RUBY', raw: true)
785+
# -*- coding: us-ascii -*-
786+
:"\u{e9}"
787+
RUBY
782788
end
783789

784790
def test_XStringNode

0 commit comments

Comments
 (0)
0