From 3bc41f4f0b0823e37ac0e89f7943dfe181e005b9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 16 Nov 2023 12:45:35 -0500 Subject: [PATCH 01/21] [ruby/prism] Add macGreek encoding https://github.com/ruby/prism/commit/c36d3fc647 --- prism/enc/pm_encoding.h | 1 + prism/enc/pm_tables.c | 35 +++++++++++++++++++++++++++++++++++ prism/prism.c | 1 + test/prism/encoding_test.rb | 1 + 4 files changed, 38 insertions(+) diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 4a6a6a5142c26c..7dca9382418163 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -190,6 +190,7 @@ extern pm_encoding_t pm_encoding_iso_8859_14; extern pm_encoding_t pm_encoding_iso_8859_15; extern pm_encoding_t pm_encoding_iso_8859_16; extern pm_encoding_t pm_encoding_koi8_r; +extern pm_encoding_t pm_encoding_mac_greek; extern pm_encoding_t pm_encoding_mac_iceland; extern pm_encoding_t pm_encoding_mac_romania; extern pm_encoding_t pm_encoding_shift_jis; diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c index ed4b219dbfeffd..8463c3efdb7d89 100644 --- a/prism/enc/pm_tables.c +++ b/prism/enc/pm_tables.c @@ -720,6 +720,30 @@ static uint8_t pm_encoding_koi8_r_table[256] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Fx }; +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macGreek character. + */ +static uint8_t pm_encoding_mac_greek_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x + 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + /** * Each element of the following table contains a bitfield that indicates a * piece of information about the corresponding macIceland character. @@ -1078,6 +1102,7 @@ PRISM_ENCODING_TABLE(iso_8859_14) PRISM_ENCODING_TABLE(iso_8859_15) PRISM_ENCODING_TABLE(iso_8859_16) PRISM_ENCODING_TABLE(koi8_r) +PRISM_ENCODING_TABLE(mac_greek) PRISM_ENCODING_TABLE(mac_iceland) PRISM_ENCODING_TABLE(mac_romania) PRISM_ENCODING_TABLE(windows_1250) @@ -1402,6 +1427,16 @@ pm_encoding_t pm_encoding_koi8_r = { .multibyte = false }; +/** macGreek */ +pm_encoding_t pm_encoding_mac_greek = { + .name = "macGreek", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_mac_greek_alnum_char, + .alpha_char = pm_encoding_mac_greek_alpha_char, + .isupper_char = pm_encoding_mac_greek_isupper_char, + .multibyte = false +}; + /** macIceland */ pm_encoding_t pm_encoding_mac_iceland = { .name = "macIceland", diff --git a/prism/prism.c b/prism/prism.c index 882a2688876292..ac2b572808b7a1 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6135,6 +6135,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("locale", pm_encoding_utf_8); break; case 'M': case 'm': + ENCODING1("macGreek", pm_encoding_mac_greek); ENCODING1("macIceland", pm_encoding_mac_iceland); ENCODING1("macRomania", pm_encoding_mac_romania); break; diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 8d0ff4ce28f264..4be230015a1215 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -38,6 +38,7 @@ class EncodingTest < TestCase Encoding::ISO_8859_15 => 0x00...0x100, Encoding::ISO_8859_16 => 0x00...0x100, Encoding::KOI8_R => 0x00...0x100, + Encoding::MACGREEK => 0x00...0x100, Encoding::MACICELAND => 0x00...0x100, Encoding::MACROMANIA => 0x00...0x100, Encoding::Windows_1250 => 0x00...0x100, From 498b086c374608005278c0f7d105df1925e13a22 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Fri, 17 Nov 2023 11:01:55 +0900 Subject: [PATCH 02/21] Skip test_ForwardingArgumentsNode due to a failure on a CI http://ci.rvm.jp/results/trunk-iseq_binary@ruby-sp2-docker/4779277 ``` expected: == disasm: #:2 (2,8)-(4,11)> local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: 1]) [ 1] "..."@0 0000 putself ( 3) 0001 getlocal_WC_0 ?@-2 0003 splatarray false 0005 getblockparamproxy ?@-1, 0 0008 send , nil 0011 leave ( 2) actual: == disasm: #:2 (2,8)-(4,11)> local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: 1]) [ 1] "..."@0 0000 putself ( 3) 0001 getlocal_WC_0 ?@-2 0003 splatarray false 0005 getblockparamproxy "!"@-1, 0 0008 send , nil 0011 leave ( 2) /tmp/ruby/src/trunk-iseq_binary/tool/lib/iseq_loader_checker.rb:36:in `exit': exit (SystemExit) ``` --- test/ruby/test_compile_prism.rb | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/ruby/test_compile_prism.rb b/test/ruby/test_compile_prism.rb index 6656889388951c..269b6953ca3e10 100644 --- a/test/ruby/test_compile_prism.rb +++ b/test/ruby/test_compile_prism.rb @@ -862,6 +862,31 @@ def PrismTestSubclass.test_call_operator_write_node=(val) end def test_ForwardingArgumentsNode + # http://ci.rvm.jp/results/trunk-iseq_binary@ruby-sp2-docker/4779277 + # + # expected: + # == disasm: #:2 (2,8)-(4,11)> + # local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: 1]) + # [ 1] "..."@0 + # 0000 putself ( 3) + # 0001 getlocal_WC_0 ?@-2 + # 0003 splatarray false + # 0005 getblockparamproxy ?@-1, 0 + # 0008 send , nil + # 0011 leave ( 2) + # actual: + # == disasm: #:2 (2,8)-(4,11)> + # local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: 1]) + # [ 1] "..."@0 + # 0000 putself ( 3) + # 0001 getlocal_WC_0 ?@-2 + # 0003 splatarray false + # 0005 getblockparamproxy "!"@-1, 0 + # 0008 send , nil + # 0011 leave ( 2) + + omit "fails on trunk-iseq_binary" + assert_prism_eval(<<-CODE) def prism_test_forwarding_arguments_node(...); end; def prism_test_forwarding_arguments_node1(...) From 94c9f166632a901e563463933efd42e618432d70 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 16 Nov 2023 17:50:21 +0100 Subject: [PATCH 03/21] Refactor rb_obj_evacuate_ivs_to_hash_table That function is a bit too low level to called from multiple places. It's always used in tandem with `rb_shape_set_too_complex` and both have to know how the object is laid out to update the `iv_ptr`. So instead we can provide two higher level function: - `rb_obj_copy_ivs_to_hash_table` to prepare a `st_table` from an arbitrary oject. - `rb_obj_convert_to_too_complex` to assign the new `st_table` to the old object, and safely free the old `iv_ptr`. Unfortunately both can't be combined into one, because `rb_obj_copy_ivar` need `rb_obj_copy_ivs_to_hash_table` to copy from one object to another. --- internal/variable.h | 3 +- object.c | 14 +++---- shape.c | 7 ---- shape.h | 1 - variable.c | 90 +++++++++++++++++++++++++-------------------- 5 files changed, 57 insertions(+), 58 deletions(-) diff --git a/internal/variable.h b/internal/variable.h index 021e7cc6a8e220..63b074a30884d6 100644 --- a/internal/variable.h +++ b/internal/variable.h @@ -47,7 +47,8 @@ VALUE rb_mod_set_temporary_name(VALUE, VALUE); struct gen_ivtbl; int rb_gen_ivtbl_get(VALUE obj, ID id, struct gen_ivtbl **ivtbl); -int rb_obj_evacuate_ivs_to_hash_table(ID key, VALUE val, st_data_t arg); +void rb_obj_copy_ivs_to_hash_table(VALUE obj, st_table *table); +void rb_obj_convert_to_too_complex(VALUE obj, st_table *table); void rb_evict_ivars_to_hash(VALUE obj); RUBY_SYMBOL_EXPORT_BEGIN diff --git a/object.c b/object.c index f442a562ea9983..a8090d0763db06 100644 --- a/object.c +++ b/object.c @@ -293,12 +293,10 @@ rb_obj_copy_ivar(VALUE dest, VALUE obj) rb_shape_t * src_shape = rb_shape_get_shape(obj); if (rb_shape_obj_too_complex(obj)) { + // obj is TOO_COMPLEX so we can copy its iv_hash st_table * table = rb_st_init_numtable_with_size(rb_st_table_size(ROBJECT_IV_HASH(obj))); - - rb_ivar_foreach(obj, rb_obj_evacuate_ivs_to_hash_table, (st_data_t)table); - rb_shape_set_too_complex(dest); - - ROBJECT(dest)->as.heap.ivptr = (VALUE *)table; + st_replace(table, ROBJECT_IV_HASH(obj)); + rb_obj_convert_to_too_complex(dest, table); return; } @@ -328,10 +326,8 @@ rb_obj_copy_ivar(VALUE dest, VALUE obj) shape_to_set_on_dest = rb_shape_rebuild_shape(initial_shape, src_shape); if (UNLIKELY(rb_shape_id(shape_to_set_on_dest) == OBJ_TOO_COMPLEX_SHAPE_ID)) { st_table * table = rb_st_init_numtable_with_size(src_num_ivs); - - rb_ivar_foreach(obj, rb_obj_evacuate_ivs_to_hash_table, (st_data_t)table); - rb_shape_set_too_complex(dest); - ROBJECT(dest)->as.heap.ivptr = (VALUE *)table; + rb_obj_copy_ivs_to_hash_table(obj, table); + rb_obj_convert_to_too_complex(dest, table); return; } diff --git a/shape.c b/shape.c index 588c24604db69a..7ccf82b246a6d2 100644 --- a/shape.c +++ b/shape.c @@ -915,13 +915,6 @@ rb_shape_obj_too_complex(VALUE obj) return rb_shape_get_shape_id(obj) == OBJ_TOO_COMPLEX_SHAPE_ID; } -void -rb_shape_set_too_complex(VALUE obj) -{ - RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); - rb_shape_set_shape_id(obj, OBJ_TOO_COMPLEX_SHAPE_ID); -} - size_t rb_shape_edges_count(rb_shape_t *shape) { diff --git a/shape.h b/shape.h index f1823258da88c9..5b8420a402ed24 100644 --- a/shape.h +++ b/shape.h @@ -226,7 +226,6 @@ rb_shape_t *rb_shape_traverse_from_new_root(rb_shape_t *initial_shape, rb_shape_ bool rb_shape_set_shape_id(VALUE obj, shape_id_t shape_id); VALUE rb_obj_debug_shape(VALUE self, VALUE obj); -void rb_shape_set_too_complex(VALUE obj); // For ext/objspace RUBY_SYMBOL_EXPORT_BEGIN diff --git a/variable.c b/variable.c index d5c425472be3c6..793516209869e2 100644 --- a/variable.c +++ b/variable.c @@ -1371,55 +1371,59 @@ rb_attr_delete(VALUE obj, ID id) } void -rb_evict_ivars_to_hash(VALUE obj) +rb_obj_convert_to_too_complex(VALUE obj, st_table *table) { RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); - st_table *table = st_init_numtable_with_size(rb_ivar_count(obj)); - - // Evacuate all previous values from shape into id_table - rb_ivar_foreach(obj, rb_obj_evacuate_ivs_to_hash_table, (st_data_t)table); + VALUE *old_ivptr = NULL; switch (BUILTIN_TYPE(obj)) { - case T_OBJECT: - rb_shape_set_too_complex(obj); - - if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) { - xfree(ROBJECT(obj)->as.heap.ivptr); - } - - ROBJECT_SET_IV_HASH(obj, table); - break; - case T_CLASS: - case T_MODULE: - rb_shape_set_too_complex(obj); - - xfree(RCLASS_IVPTR(obj)); - RCLASS_SET_IV_HASH(obj, table); - break; - default: - RB_VM_LOCK_ENTER(); - { - struct st_table *gen_ivs = generic_ivtbl_no_ractor_check(obj); - st_data_t old_ivtbl; - struct gen_ivtbl *ivtbl = NULL; - - if (st_delete(gen_ivs, &obj, &old_ivtbl)) { - ivtbl = (struct gen_ivtbl *)old_ivtbl; - } - - ivtbl = xrealloc(ivtbl, sizeof(struct gen_ivtbl)); - ivtbl->as.complex.table = table; + case T_OBJECT: + if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) { + old_ivptr = ROBJECT_IVPTR(obj); + } + rb_shape_set_shape_id(obj, OBJ_TOO_COMPLEX_SHAPE_ID); + ROBJECT_SET_IV_HASH(obj, table); + break; + case T_CLASS: + case T_MODULE: + old_ivptr = RCLASS_IVPTR(obj); + rb_shape_set_shape_id(obj, OBJ_TOO_COMPLEX_SHAPE_ID); + RCLASS_SET_IV_HASH(obj, table); + break; + default: + RB_VM_LOCK_ENTER(); + { + struct st_table *gen_ivs = generic_ivtbl_no_ractor_check(obj); + st_lookup(gen_ivs, (st_data_t)&obj, (st_data_t *)&old_ivptr); + + struct gen_ivtbl *ivtbl = xmalloc(sizeof(struct gen_ivtbl)); + ivtbl->as.complex.table = table; #if SHAPE_IN_BASIC_FLAGS - rb_shape_set_too_complex(obj); + rb_shape_set_shape_id(obj, OBJ_TOO_COMPLEX_SHAPE_ID); #else - ivtbl->shape_id = OBJ_TOO_COMPLEX_SHAPE_ID; + ivtbl->shape_id = OBJ_TOO_COMPLEX_SHAPE_ID; #endif + st_insert(gen_ivs, (st_data_t)obj, (st_data_t)ivtbl); + } + RB_VM_LOCK_LEAVE(); + } - st_insert(gen_ivs, (st_data_t)obj, (st_data_t)ivtbl); - } - RB_VM_LOCK_LEAVE(); + if (old_ivptr) { + xfree(old_ivptr); } +} + +void +rb_evict_ivars_to_hash(VALUE obj) +{ + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); + + st_table *table = st_init_numtable_with_size(rb_ivar_count(obj)); + + // Evacuate all previous values from shape into id_table + rb_obj_copy_ivs_to_hash_table(obj, table); + rb_obj_convert_to_too_complex(obj, table); RUBY_ASSERT(rb_shape_obj_too_complex(obj)); } @@ -1637,12 +1641,18 @@ rb_ensure_iv_list_size(VALUE obj, uint32_t current_capacity, uint32_t new_capaci } int -rb_obj_evacuate_ivs_to_hash_table(ID key, VALUE val, st_data_t arg) +rb_obj_copy_ivs_to_hash_table_i(ID key, VALUE val, st_data_t arg) { st_insert((st_table *)arg, (st_data_t)key, (st_data_t)val); return ST_CONTINUE; } +void +rb_obj_copy_ivs_to_hash_table(VALUE obj, st_table *table) +{ + rb_ivar_foreach(obj, rb_obj_copy_ivs_to_hash_table_i, (st_data_t)table); +} + static VALUE * obj_ivar_set_shape_ivptr(VALUE obj, void *_data) { From 940f2e7f1893af17be6e35ef8f7ad09949a12709 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 17 Nov 2023 13:40:37 +0100 Subject: [PATCH 04/21] size_pool_idx_for_size: Include debugging info in error message We ran into that case on our CI, including some sizes would help debug it much easier. --- gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gc.c b/gc.c index 7710fec7d444cd..de0b9bf79e46d5 100644 --- a/gc.c +++ b/gc.c @@ -2769,7 +2769,7 @@ size_pool_idx_for_size(size_t size) size_t size_pool_idx = 64 - nlz_int64(slot_count - 1); if (size_pool_idx >= SIZE_POOL_COUNT) { - rb_bug("size_pool_idx_for_size: allocation size too large"); + rb_bug("size_pool_idx_for_size: allocation size too large (size=%lu, size_pool_idx=%lu)", size, size_pool_idx); } #if RGENGC_CHECK_MODE From 4a26a65e428fbe4088c86068d5525d1f343702ee Mon Sep 17 00:00:00 2001 From: Haldun Bayhantopcu Date: Thu, 16 Nov 2023 19:32:21 +0100 Subject: [PATCH 05/21] [ruby/prism] Add macTurkish https://github.com/ruby/prism/commit/2232d4b6a0 --- prism/enc/pm_encoding.h | 1 + prism/enc/pm_tables.c | 35 +++++++++++++++++++++++++++++++++++ prism/prism.c | 1 + test/prism/encoding_test.rb | 1 + 4 files changed, 38 insertions(+) diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 7dca9382418163..1408423a1091d0 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -193,6 +193,7 @@ extern pm_encoding_t pm_encoding_koi8_r; extern pm_encoding_t pm_encoding_mac_greek; extern pm_encoding_t pm_encoding_mac_iceland; extern pm_encoding_t pm_encoding_mac_romania; +extern pm_encoding_t pm_encoding_mac_turkish; extern pm_encoding_t pm_encoding_shift_jis; extern pm_encoding_t pm_encoding_utf_8; extern pm_encoding_t pm_encoding_utf8_mac; diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c index 8463c3efdb7d89..d6d5df97f76b6c 100644 --- a/prism/enc/pm_tables.c +++ b/prism/enc/pm_tables.c @@ -792,6 +792,30 @@ static uint8_t pm_encoding_mac_romania_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx }; +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macTurkish character. + */ +static uint8_t pm_encoding_mac_turkish_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x + 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + /** * Each element of the following table contains a bitfield that indicates a * piece of information about the corresponding windows-1250 character. @@ -1105,6 +1129,7 @@ PRISM_ENCODING_TABLE(koi8_r) PRISM_ENCODING_TABLE(mac_greek) PRISM_ENCODING_TABLE(mac_iceland) PRISM_ENCODING_TABLE(mac_romania) +PRISM_ENCODING_TABLE(mac_turkish) PRISM_ENCODING_TABLE(windows_1250) PRISM_ENCODING_TABLE(windows_1251) PRISM_ENCODING_TABLE(windows_1252) @@ -1457,6 +1482,16 @@ pm_encoding_t pm_encoding_mac_romania = { .multibyte = false }; +/** macTurkish */ +pm_encoding_t pm_encoding_mac_turkish = { + .name = "macTurkish", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_mac_turkish_alnum_char, + .alpha_char = pm_encoding_mac_turkish_alpha_char, + .isupper_char = pm_encoding_mac_turkish_isupper_char, + .multibyte = false +}; + /** Windows-1250 */ pm_encoding_t pm_encoding_windows_1250 = { .name = "Windows-1250", diff --git a/prism/prism.c b/prism/prism.c index ac2b572808b7a1..b8b58ddf4b99f3 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6138,6 +6138,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("macGreek", pm_encoding_mac_greek); ENCODING1("macIceland", pm_encoding_mac_iceland); ENCODING1("macRomania", pm_encoding_mac_romania); + ENCODING1("macTurkish", pm_encoding_mac_turkish); break; case 'P': case 'p': ENCODING1("PCK", pm_encoding_windows_31j); diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 4be230015a1215..21c87b6c6c0d38 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -41,6 +41,7 @@ class EncodingTest < TestCase Encoding::MACGREEK => 0x00...0x100, Encoding::MACICELAND => 0x00...0x100, Encoding::MACROMANIA => 0x00...0x100, + Encoding::MACTURKISH => 0x00...0x100, Encoding::Windows_1250 => 0x00...0x100, Encoding::Windows_1251 => 0x00...0x100, Encoding::Windows_1252 => 0x00...0x100, From 9ba49c61c28691c131e97d5ea0daf35ab9ea353f Mon Sep 17 00:00:00 2001 From: MSP-Greg Date: Thu, 16 Nov 2023 14:46:27 -0600 Subject: [PATCH 06/21] mingw.yml - remove encoding, run tests in cmd shell --- .github/workflows/mingw.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mingw.yml b/.github/workflows/mingw.yml index 5ddcb59e76c3bb..597fc44a671a25 100644 --- a/.github/workflows/mingw.yml +++ b/.github/workflows/mingw.yml @@ -125,6 +125,7 @@ jobs: - name: test timeout-minutes: 30 run: make test + shell: cmd env: GNUMAKEFLAGS: '' RUBY_TESTOPTS: '-v --tty=no' @@ -132,9 +133,8 @@ jobs: - name: test-all timeout-minutes: 45 + shell: cmd run: | - # Actions uses UTF8, causes test failures, similar to normal OS setup - chcp.com 437 make ${{ StartsWith(matrix.test_task, 'test/') && matrix.test_task || 'test-all' }} env: RUBY_TESTOPTS: >- @@ -147,6 +147,7 @@ jobs: timeout-minutes: 10 run: | make ${{ StartsWith(matrix.test_task, 'spec/') && matrix.test_task || 'test-spec' }} + shell: cmd if: ${{ matrix.test_task == 'check' || matrix.test_task == 'test-spec' || StartsWith(matrix.test_task, 'spec/') }} - uses: ./src/.github/actions/slack From c2f2090da6ef28764f7ba3cdeb984aec7243f4ca Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 17 Nov 2023 09:50:24 -0500 Subject: [PATCH 07/21] [ruby/prism] Do not test locale encoding on windows https://github.com/ruby/prism/commit/8f40536431 --- test/prism/encoding_test.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 21c87b6c6c0d38..657b157e2f8a1b 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -70,7 +70,10 @@ class EncodingTest < TestCase end encodings.each do |encoding, range| - encoding.names.each do |name| + names = encoding.names + names.delete("locale") if encoding == Encoding::UTF_8 && RUBY_PLATFORM.include?("mingw") + + names.each do |name| define_method(:"test_encoding_#{name}") do assert_encoding(encoding, name, range) end From db4303f953c85b2b59d3d1af177727240353c2e5 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 17 Nov 2023 10:27:58 -0500 Subject: [PATCH 08/21] [ruby/prism] Never test locale encoding https://github.com/ruby/prism/commit/f0f057b055 --- test/prism/encoding_test.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 657b157e2f8a1b..a0303be536e1b7 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -70,10 +70,9 @@ class EncodingTest < TestCase end encodings.each do |encoding, range| - names = encoding.names - names.delete("locale") if encoding == Encoding::UTF_8 && RUBY_PLATFORM.include?("mingw") + encoding.names.each do |name| + next if name == "locale" - names.each do |name| define_method(:"test_encoding_#{name}") do assert_encoding(encoding, name, range) end From e5d6b4099e9f4027dbaaeb8b825ada572279b066 Mon Sep 17 00:00:00 2001 From: Haldun Bayhantopcu Date: Mon, 13 Nov 2023 10:12:54 +0100 Subject: [PATCH 09/21] [ruby/prism] Do not allow trailing commas in calls without parenthesis https://github.com/ruby/prism/commit/f1d56da58f --- prism/prism.c | 7 +++++++ test/prism/errors_test.rb | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/prism/prism.c b/prism/prism.c index b8b58ddf4b99f3..7bae22f07269b7 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -11922,6 +11922,13 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept // argument to this method call. parse_arguments(parser, arguments, true, PM_TOKEN_EOF); + // If we have done with the arguments and still not consumed the comma, + // then we have a trailing comma where we need to check whether it is + // allowed or not. + if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) { + pm_parser_err_previous(parser, PM_ERR_EXPECT_ARGUMENT); + } + pm_accepts_block_stack_pop(parser); } diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index 5d4b95a2240068..f13327c198cfff 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -1676,6 +1676,12 @@ def test_void_value_expression_in_binary_call ], compare_ripper: false # Ripper does not check 'void value expression'. end + def test_trailing_comma_in_calls + assert_errors expression("foo 1,"), "foo 1,", [ + ["Expected an argument", 5..6] + ] + end + private def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby") From 85dcfef23a16ce75575177b24e6726f8ab12d276 Mon Sep 17 00:00:00 2001 From: Haldun Bayhantopcu Date: Fri, 17 Nov 2023 16:44:07 +0100 Subject: [PATCH 10/21] [ruby/prism] Add macUkraine https://github.com/ruby/prism/commit/440557fddc --- prism/enc/pm_encoding.h | 1 + prism/enc/pm_tables.c | 35 +++++++++++++++++++++++++++++++++++ prism/prism.c | 1 + 3 files changed, 37 insertions(+) diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 1408423a1091d0..38a0cfc811e60e 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -194,6 +194,7 @@ extern pm_encoding_t pm_encoding_mac_greek; extern pm_encoding_t pm_encoding_mac_iceland; extern pm_encoding_t pm_encoding_mac_romania; extern pm_encoding_t pm_encoding_mac_turkish; +extern pm_encoding_t pm_encoding_mac_ukraine; extern pm_encoding_t pm_encoding_shift_jis; extern pm_encoding_t pm_encoding_utf_8; extern pm_encoding_t pm_encoding_utf8_mac; diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c index d6d5df97f76b6c..3b3871beb81c1e 100644 --- a/prism/enc/pm_tables.c +++ b/prism/enc/pm_tables.c @@ -816,6 +816,30 @@ static uint8_t pm_encoding_mac_turkish_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx }; +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macUkraine character. + */ +static uint8_t pm_encoding_mac_ukraine_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x + 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + /** * Each element of the following table contains a bitfield that indicates a * piece of information about the corresponding windows-1250 character. @@ -1130,6 +1154,7 @@ PRISM_ENCODING_TABLE(mac_greek) PRISM_ENCODING_TABLE(mac_iceland) PRISM_ENCODING_TABLE(mac_romania) PRISM_ENCODING_TABLE(mac_turkish) +PRISM_ENCODING_TABLE(mac_ukraine) PRISM_ENCODING_TABLE(windows_1250) PRISM_ENCODING_TABLE(windows_1251) PRISM_ENCODING_TABLE(windows_1252) @@ -1492,6 +1517,16 @@ pm_encoding_t pm_encoding_mac_turkish = { .multibyte = false }; +/** macUkraine */ +pm_encoding_t pm_encoding_mac_ukraine = { + .name = "macUkraine", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_mac_ukraine_alnum_char, + .alpha_char = pm_encoding_mac_ukraine_alpha_char, + .isupper_char = pm_encoding_mac_ukraine_isupper_char, + .multibyte = false +}; + /** Windows-1250 */ pm_encoding_t pm_encoding_windows_1250 = { .name = "Windows-1250", diff --git a/prism/prism.c b/prism/prism.c index 7bae22f07269b7..5442a33e0fdb67 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6139,6 +6139,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("macIceland", pm_encoding_mac_iceland); ENCODING1("macRomania", pm_encoding_mac_romania); ENCODING1("macTurkish", pm_encoding_mac_turkish); + ENCODING1("macUkraine", pm_encoding_mac_ukraine); break; case 'P': case 'p': ENCODING1("PCK", pm_encoding_windows_31j); From 0a081a33eb036101e7e61cf61e6390481cfe73c3 Mon Sep 17 00:00:00 2001 From: Haldun Bayhantopcu Date: Fri, 17 Nov 2023 16:58:58 +0100 Subject: [PATCH 11/21] [ruby/prism] Add macRoman https://github.com/ruby/prism/commit/42b20ee399 --- prism/enc/pm_encoding.h | 1 + prism/enc/pm_tables.c | 35 +++++++++++++++++++++++++++++++++++ prism/prism.c | 1 + test/prism/encoding_test.rb | 1 + 4 files changed, 38 insertions(+) diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 38a0cfc811e60e..e377f6caa7b72c 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -192,6 +192,7 @@ extern pm_encoding_t pm_encoding_iso_8859_16; extern pm_encoding_t pm_encoding_koi8_r; extern pm_encoding_t pm_encoding_mac_greek; extern pm_encoding_t pm_encoding_mac_iceland; +extern pm_encoding_t pm_encoding_mac_roman; extern pm_encoding_t pm_encoding_mac_romania; extern pm_encoding_t pm_encoding_mac_turkish; extern pm_encoding_t pm_encoding_mac_ukraine; diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c index 3b3871beb81c1e..814a4510a3e1a9 100644 --- a/prism/enc/pm_tables.c +++ b/prism/enc/pm_tables.c @@ -768,6 +768,30 @@ static uint8_t pm_encoding_mac_iceland_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx }; +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macRoman character. + */ +static uint8_t pm_encoding_mac_roman_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x + 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + /** * Each element of the following table contains a bitfield that indicates a * piece of information about the corresponding macRomania character. @@ -1152,6 +1176,7 @@ PRISM_ENCODING_TABLE(iso_8859_16) PRISM_ENCODING_TABLE(koi8_r) PRISM_ENCODING_TABLE(mac_greek) PRISM_ENCODING_TABLE(mac_iceland) +PRISM_ENCODING_TABLE(mac_roman) PRISM_ENCODING_TABLE(mac_romania) PRISM_ENCODING_TABLE(mac_turkish) PRISM_ENCODING_TABLE(mac_ukraine) @@ -1497,6 +1522,16 @@ pm_encoding_t pm_encoding_mac_iceland = { .multibyte = false }; +/** macRoman */ +pm_encoding_t pm_encoding_mac_roman = { + .name = "macRoman", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_mac_roman_alnum_char, + .alpha_char = pm_encoding_mac_roman_alpha_char, + .isupper_char = pm_encoding_mac_roman_isupper_char, + .multibyte = false +}; + /** macRomania */ pm_encoding_t pm_encoding_mac_romania = { .name = "macRomania", diff --git a/prism/prism.c b/prism/prism.c index 5442a33e0fdb67..f8d5d6876f53f4 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6137,6 +6137,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star case 'M': case 'm': ENCODING1("macGreek", pm_encoding_mac_greek); ENCODING1("macIceland", pm_encoding_mac_iceland); + ENCODING1("macRoman", pm_encoding_mac_roman); ENCODING1("macRomania", pm_encoding_mac_romania); ENCODING1("macTurkish", pm_encoding_mac_turkish); ENCODING1("macUkraine", pm_encoding_mac_ukraine); diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index a0303be536e1b7..3b9f5436e59c90 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -40,6 +40,7 @@ class EncodingTest < TestCase Encoding::KOI8_R => 0x00...0x100, Encoding::MACGREEK => 0x00...0x100, Encoding::MACICELAND => 0x00...0x100, + Encoding::MACROMAN => 0x00...0x100, Encoding::MACROMANIA => 0x00...0x100, Encoding::MACTURKISH => 0x00...0x100, Encoding::Windows_1250 => 0x00...0x100, From 50b7b927a3a99d5959c94f48e8b084bab937fea1 Mon Sep 17 00:00:00 2001 From: Haldun Bayhantopcu Date: Fri, 17 Nov 2023 16:53:48 +0100 Subject: [PATCH 12/21] [ruby/prism] Add macThai https://github.com/ruby/prism/commit/f654058f50 --- prism/enc/pm_encoding.h | 1 + prism/enc/pm_tables.c | 35 +++++++++++++++++++++++++++++++++++ prism/prism.c | 1 + test/prism/encoding_test.rb | 1 + 4 files changed, 38 insertions(+) diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index e377f6caa7b72c..cb45b02b5cdd8b 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -194,6 +194,7 @@ extern pm_encoding_t pm_encoding_mac_greek; extern pm_encoding_t pm_encoding_mac_iceland; extern pm_encoding_t pm_encoding_mac_roman; extern pm_encoding_t pm_encoding_mac_romania; +extern pm_encoding_t pm_encoding_mac_thai; extern pm_encoding_t pm_encoding_mac_turkish; extern pm_encoding_t pm_encoding_mac_ukraine; extern pm_encoding_t pm_encoding_shift_jis; diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c index 814a4510a3e1a9..dd502100344d4c 100644 --- a/prism/enc/pm_tables.c +++ b/prism/enc/pm_tables.c @@ -816,6 +816,30 @@ static uint8_t pm_encoding_mac_romania_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx }; +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macThai character. + */ +static uint8_t pm_encoding_mac_thai_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x + 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + /** * Each element of the following table contains a bitfield that indicates a * piece of information about the corresponding macTurkish character. @@ -1178,6 +1202,7 @@ PRISM_ENCODING_TABLE(mac_greek) PRISM_ENCODING_TABLE(mac_iceland) PRISM_ENCODING_TABLE(mac_roman) PRISM_ENCODING_TABLE(mac_romania) +PRISM_ENCODING_TABLE(mac_thai) PRISM_ENCODING_TABLE(mac_turkish) PRISM_ENCODING_TABLE(mac_ukraine) PRISM_ENCODING_TABLE(windows_1250) @@ -1542,6 +1567,16 @@ pm_encoding_t pm_encoding_mac_romania = { .multibyte = false }; +/** macThai */ +pm_encoding_t pm_encoding_mac_thai = { + .name = "macThai", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_mac_thai_alnum_char, + .alpha_char = pm_encoding_mac_thai_alpha_char, + .isupper_char = pm_encoding_mac_thai_isupper_char, + .multibyte = false +}; + /** macTurkish */ pm_encoding_t pm_encoding_mac_turkish = { .name = "macTurkish", diff --git a/prism/prism.c b/prism/prism.c index f8d5d6876f53f4..9f0881751ce9fd 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6139,6 +6139,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("macIceland", pm_encoding_mac_iceland); ENCODING1("macRoman", pm_encoding_mac_roman); ENCODING1("macRomania", pm_encoding_mac_romania); + ENCODING1("macThai", pm_encoding_mac_thai); ENCODING1("macTurkish", pm_encoding_mac_turkish); ENCODING1("macUkraine", pm_encoding_mac_ukraine); break; diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 3b9f5436e59c90..78b2e7195545ca 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -42,6 +42,7 @@ class EncodingTest < TestCase Encoding::MACICELAND => 0x00...0x100, Encoding::MACROMAN => 0x00...0x100, Encoding::MACROMANIA => 0x00...0x100, + Encoding::MACTHAI => 0x00...0x100, Encoding::MACTURKISH => 0x00...0x100, Encoding::Windows_1250 => 0x00...0x100, Encoding::Windows_1251 => 0x00...0x100, From 229f6e5bb42d24838afb3f5820a5e951f8115788 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 17 Nov 2023 15:03:22 -0500 Subject: [PATCH 13/21] [ruby/prism] Update spacing in encoding_test.rb https://github.com/ruby/prism/commit/56508c2201 --- test/prism/encoding_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 78b2e7195545ca..1e69bc9bd6b55b 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -42,7 +42,7 @@ class EncodingTest < TestCase Encoding::MACICELAND => 0x00...0x100, Encoding::MACROMAN => 0x00...0x100, Encoding::MACROMANIA => 0x00...0x100, - Encoding::MACTHAI => 0x00...0x100, + Encoding::MACTHAI => 0x00...0x100, Encoding::MACTURKISH => 0x00...0x100, Encoding::Windows_1250 => 0x00...0x100, Encoding::Windows_1251 => 0x00...0x100, From 585fdfe1f59951bcfe5c426601330c113c5a1e06 Mon Sep 17 00:00:00 2001 From: Peter Cai <222655+pcai@users.noreply.github.com> Date: Fri, 17 Nov 2023 04:55:41 +0000 Subject: [PATCH 14/21] [ruby/prism] add Windows-874 encoding https://github.com/ruby/prism/commit/0670dd3b9a --- prism/enc/pm_encoding.h | 1 + prism/enc/pm_tables.c | 35 +++++++++++++++++++++++++++++++++++ prism/prism.c | 2 ++ test/prism/encoding_test.rb | 1 + 4 files changed, 39 insertions(+) diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index cb45b02b5cdd8b..cfc90b4d96d121 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -210,5 +210,6 @@ extern pm_encoding_t pm_encoding_windows_1256; extern pm_encoding_t pm_encoding_windows_1257; extern pm_encoding_t pm_encoding_windows_1258; extern pm_encoding_t pm_encoding_windows_31j; +extern pm_encoding_t pm_encoding_windows_874; #endif diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c index dd502100344d4c..2bec68f4588735 100644 --- a/prism/enc/pm_tables.c +++ b/prism/enc/pm_tables.c @@ -1104,6 +1104,30 @@ static uint8_t pm_encoding_windows_1258_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx }; +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-874 character. + */ +static uint8_t pm_encoding_windows_874_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x + 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + /** * Returns the size of the next character in the ASCII encoding. This basically * means that if the top bit is not set, the character is 1 byte long. @@ -1214,6 +1238,7 @@ PRISM_ENCODING_TABLE(windows_1255) PRISM_ENCODING_TABLE(windows_1256) PRISM_ENCODING_TABLE(windows_1257) PRISM_ENCODING_TABLE(windows_1258) +PRISM_ENCODING_TABLE(windows_874) #undef PRISM_ENCODING_TABLE @@ -1686,3 +1711,13 @@ pm_encoding_t pm_encoding_windows_1258 = { .isupper_char = pm_encoding_windows_1258_isupper_char, .multibyte = false }; + +/** Windows-874 */ +pm_encoding_t pm_encoding_windows_874 = { + .name = "Windows-874", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_windows_874_alnum_char, + .alpha_char = pm_encoding_windows_874_alpha_char, + .isupper_char = pm_encoding_windows_874_isupper_char, + .multibyte = false +}; diff --git a/prism/prism.c b/prism/prism.c index 9f0881751ce9fd..3df36b0d2e29a9 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6075,6 +6075,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("CP860", pm_encoding_ibm860); ENCODING1("CP861", pm_encoding_ibm861); ENCODING1("CP862", pm_encoding_ibm862); + ENCODING1("CP874", pm_encoding_windows_874); ENCODING1("CP878", pm_encoding_koi8_r); ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j); ENCODING1("CP936", pm_encoding_gbk); @@ -6156,6 +6157,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star break; case 'W': case 'w': ENCODING1("Windows-31J", pm_encoding_windows_31j); + ENCODING1("Windows-874", pm_encoding_windows_874); ENCODING1("Windows-1250", pm_encoding_windows_1250); ENCODING1("Windows-1251", pm_encoding_windows_1251); ENCODING1("Windows-1252", pm_encoding_windows_1252); diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 1e69bc9bd6b55b..07f43312eb278e 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -53,6 +53,7 @@ class EncodingTest < TestCase Encoding::Windows_1256 => 0x00...0x100, Encoding::Windows_1257 => 0x00...0x100, Encoding::Windows_1258 => 0x00...0x100, + Encoding::Windows_874 => 0x00...0x100, Encoding::Big5 => 0x00...0x10000, Encoding::CP51932 => 0x00...0x10000, Encoding::GBK => 0x00...0x10000, From cbdac2f031888fb1cf5fcbabb80c180c3c547adb Mon Sep 17 00:00:00 2001 From: Haldun Bayhantopcu Date: Fri, 17 Nov 2023 16:22:09 +0100 Subject: [PATCH 15/21] [ruby/prism] Silence clang analyzer warnings for the memory leaks https://github.com/ruby/prism/commit/68112c556e --- prism/prism.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/prism/prism.c b/prism/prism.c index 3df36b0d2e29a9..f1b7088ecd55e1 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -10445,6 +10445,8 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) { name[length] = '='; // Now switch the name to the new string. + // This silences clang analyzer warning about leak of memory pointed by `name`. + // NOLINTNEXTLINE(clang-analyzer-*) *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1); } @@ -15707,6 +15709,9 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * if (memory == NULL) abort(); memcpy(memory, source, length); + + // This silences clang analyzer warning about leak of memory pointed by `memory`. + // NOLINTNEXTLINE(clang-analyzer-*) local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length); if (token_is_numbered_parameter(source, source + length)) { From 7c99e43c3f050244b06dbd18de4f605ea70d234c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 13 Nov 2023 15:05:49 -0500 Subject: [PATCH 16/21] [ruby/prism] Ensure serialized file is little endian https://github.com/ruby/prism/commit/0c762ee68a --- prism/defines.h | 10 +++++++++ prism/templates/lib/prism/serialize.rb.erb | 8 +++---- prism/templates/src/serialize.c.erb | 25 +++++++++++++++++----- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/prism/defines.h b/prism/defines.h index f89a0bed8e49da..28f4da11dfd810 100644 --- a/prism/defines.h +++ b/prism/defines.h @@ -74,4 +74,14 @@ # define snprintf _snprintf #endif +/** + * Defined PRISM_WORDS_BIGENDIAN so we can ensure our serialization happens in + * little endian format regardless of platform. + */ +#if defined(WORDS_BIGENDIAN) +# define PRISM_WORDS_BIGENDIAN +#elif defined(AC_APPLE_UNIVERSAL_BUILD) && defined(__BIG_ENDIAN__) +# define PRISM_WORDS_BIGENDIAN +#endif + #endif diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index 28375045439616..517d4e8a24ae82 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -137,7 +137,7 @@ module Prism comments, magic_comments, errors, warnings = load_metadata - @constant_pool_offset = io.read(4).unpack1("L") + @constant_pool_offset = io.read(4).unpack1("L<") @constant_pool = Array.new(load_varint, nil) [load_node, comments, magic_comments, errors, warnings] @@ -167,7 +167,7 @@ module Prism end def load_serialized_length - io.read(4).unpack1("L") + io.read(4).unpack1("L<") end def load_optional_node @@ -206,8 +206,8 @@ module Prism unless constant offset = constant_pool_offset + index * 8 - start = serialized.unpack1("L", offset: offset) - length = serialized.unpack1("L", offset: offset + 4) + start = serialized.unpack1("L<", offset: offset) + length = serialized.unpack1("L<", offset: offset + 4) constant = if start.nobits?(1 << 31) diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb index db4c91e0cd88ee..2ecf7299c6b5db 100644 --- a/prism/templates/src/serialize.c.erb +++ b/prism/templates/src/serialize.c.erb @@ -47,6 +47,21 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe } } +/** + * Serialize a 32-bit integer to the given address always in little-endian. + */ +static void +pm_serialize_32(char *address, uint32_t value) { +#ifdef PRISM_WORDS_BIGENDIAN + address[0] = (char) ((value >> 24) & 0xFF); + address[1] = (char) ((value >> 16) & 0xFF); + address[2] = (char) ((value >> 8) & 0xFF); + address[3] = (char) (value & 0xFF); +#else + memcpy(address, &value, sizeof(uint32_t)); +#endif +} + static void pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node)); @@ -118,7 +133,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { <%- if node.needs_serialized_length? -%> // serialize length uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t)); - memcpy(buffer->value + length_offset, &length, sizeof(uint32_t)); + pm_serialize_32(buffer->value + length_offset, length); <%- end -%> break; } @@ -231,7 +246,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) // Now we're going to serialize the offset of the constant pool back where // we left space for it. uint32_t length = pm_sizet_to_u32(buffer->length); - memcpy(buffer->value + offset, &length, sizeof(uint32_t)); + pm_serialize_32(buffer->value + offset, length); // Now we're going to serialize the constant pool. offset = buffer->length; @@ -258,18 +273,18 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) assert(content_offset < owned_mask); content_offset |= owned_mask; - memcpy(buffer->value + buffer_offset, &content_offset, 4); + pm_serialize_32(buffer->value + buffer_offset, content_offset); pm_buffer_append_bytes(buffer, constant->start, constant->length); } else { // Since this is a shared constant, we are going to write its // source offset directly into the buffer. uint32_t source_offset = pm_ptrdifft_to_u32(constant->start - parser->start); - memcpy(buffer->value + buffer_offset, &source_offset, 4); + pm_serialize_32(buffer->value + buffer_offset, source_offset); } // Now we can write the length of the constant into the buffer. uint32_t constant_length = pm_sizet_to_u32(constant->length); - memcpy(buffer->value + buffer_offset + 4, &constant_length, 4); + pm_serialize_32(buffer->value + buffer_offset + 4, constant_length); } } } From 3dd77bc056de19a1cc0ad3fafdb8e49c429dec5a Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 17 Nov 2023 14:05:37 -0500 Subject: [PATCH 17/21] Fix corruption when out of shape during ivar remove Reproduction script: ``` o = Object.new 10.times { |i| o.instance_variable_set(:"@a#{i}", i) } i = 0 a = Object.new while RubyVM::Shape.shapes_available > 2 a.instance_variable_set(:"@i#{i}", 1) i += 1 end o.remove_instance_variable(:@a0) puts o.instance_variable_get(:@a1) ``` Before this patch, it would incorrectly output `2` and now it correctly outputs `1`. --- shape.c | 83 ++++++++++++++++------------------------ test/ruby/test_shapes.rb | 21 ++++++++++ 2 files changed, 55 insertions(+), 49 deletions(-) diff --git a/shape.c b/shape.c index 7ccf82b246a6d2..5a4a271b309d22 100644 --- a/shape.c +++ b/shape.c @@ -528,29 +528,8 @@ rb_shape_frozen_shape_p(rb_shape_t* shape) return SHAPE_FROZEN == (enum shape_type)shape->type; } -static void -move_iv(VALUE obj, ID id, attr_index_t from, attr_index_t to) -{ - switch(BUILTIN_TYPE(obj)) { - case T_CLASS: - case T_MODULE: - RCLASS_IVPTR(obj)[to] = RCLASS_IVPTR(obj)[from]; - break; - case T_OBJECT: - RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); - ROBJECT_IVPTR(obj)[to] = ROBJECT_IVPTR(obj)[from]; - break; - default: { - struct gen_ivtbl *ivtbl; - rb_gen_ivtbl_get(obj, id, &ivtbl); - ivtbl->as.shape.ivptr[to] = ivtbl->as.shape.ivptr[from]; - break; - } - } -} - static rb_shape_t * -remove_shape_recursive(VALUE obj, ID id, rb_shape_t * shape, VALUE * removed) +remove_shape_recursive(rb_shape_t *shape, ID id, rb_shape_t **removed_shape) { if (shape->parent_id == INVALID_SHAPE_ID) { // We've hit the top of the shape tree and couldn't find the @@ -559,30 +538,13 @@ remove_shape_recursive(VALUE obj, ID id, rb_shape_t * shape, VALUE * removed) } else { if (shape->type == SHAPE_IVAR && shape->edge_name == id) { - // We've hit the edge we wanted to remove, return it's _parent_ - // as the new parent while we go back down the stack. - attr_index_t index = shape->next_iv_index - 1; - - switch(BUILTIN_TYPE(obj)) { - case T_CLASS: - case T_MODULE: - *removed = RCLASS_IVPTR(obj)[index]; - break; - case T_OBJECT: - *removed = ROBJECT_IVPTR(obj)[index]; - break; - default: { - struct gen_ivtbl *ivtbl; - rb_gen_ivtbl_get(obj, id, &ivtbl); - *removed = ivtbl->as.shape.ivptr[index]; - break; - } - } + *removed_shape = shape; + return rb_shape_get_parent(shape); } else { // This isn't the IV we want to remove, keep walking up. - rb_shape_t * new_parent = remove_shape_recursive(obj, id, rb_shape_get_parent(shape), removed); + rb_shape_t *new_parent = remove_shape_recursive(rb_shape_get_parent(shape), id, removed_shape); // We found a new parent. Create a child of the new parent that // has the same attributes as this shape. @@ -592,17 +554,13 @@ remove_shape_recursive(VALUE obj, ID id, rb_shape_t * shape, VALUE * removed) } bool dont_care; - rb_shape_t * new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care, true); + rb_shape_t *new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care, true); if (UNLIKELY(new_child->type == SHAPE_OBJ_TOO_COMPLEX)) { return new_child; } RUBY_ASSERT(new_child->capacity <= shape->capacity); - if (new_child->type == SHAPE_IVAR) { - move_iv(obj, id, shape->next_iv_index - 1, new_child->next_iv_index - 1); - } - return new_child; } else { @@ -615,18 +573,45 @@ remove_shape_recursive(VALUE obj, ID id, rb_shape_t * shape, VALUE * removed) } bool -rb_shape_transition_shape_remove_ivar(VALUE obj, ID id, rb_shape_t *shape, VALUE * removed) +rb_shape_transition_shape_remove_ivar(VALUE obj, ID id, rb_shape_t *shape, VALUE *removed) { if (UNLIKELY(shape->type == SHAPE_OBJ_TOO_COMPLEX)) { return false; } - rb_shape_t * new_shape = remove_shape_recursive(obj, id, shape, removed); + rb_shape_t *removed_shape = NULL; + rb_shape_t *new_shape = remove_shape_recursive(shape, id, &removed_shape); if (new_shape) { + RUBY_ASSERT(removed_shape != NULL); + if (UNLIKELY(new_shape->type == SHAPE_OBJ_TOO_COMPLEX)) { return false; } + RUBY_ASSERT(new_shape->next_iv_index == shape->next_iv_index - 1); + + VALUE *ivptr; + switch(BUILTIN_TYPE(obj)) { + case T_CLASS: + case T_MODULE: + ivptr = RCLASS_IVPTR(obj); + break; + case T_OBJECT: + ivptr = ROBJECT_IVPTR(obj); + break; + default: { + struct gen_ivtbl *ivtbl; + rb_gen_ivtbl_get(obj, id, &ivtbl); + ivptr = ivtbl->as.shape.ivptr; + break; + } + } + + *removed = ivptr[removed_shape->next_iv_index - 1]; + + memmove(&ivptr[removed_shape->next_iv_index - 1], &ivptr[removed_shape->next_iv_index], + ((new_shape->next_iv_index + 1) - removed_shape->next_iv_index) * sizeof(VALUE)); + rb_shape_set_shape(obj, new_shape); } return true; diff --git a/test/ruby/test_shapes.rb b/test/ruby/test_shapes.rb index 9f455e4389655e..f985f8c611e9df 100644 --- a/test/ruby/test_shapes.rb +++ b/test/ruby/test_shapes.rb @@ -414,6 +414,27 @@ module A assert_equal true, A.instance_variable_defined?(:@a) end; end + + def test_run_out_of_shape_during_remove_instance_variable + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + o = Object.new + 10.times { |i| o.instance_variable_set(:"@a#{i}", i) } + + i = 0 + a = Object.new + while RubyVM::Shape.shapes_available > 2 + a.instance_variable_set(:"@i#{i}", 1) + i += 1 + end + + o.remove_instance_variable(:@a0) + (1...10).each do |i| + assert_equal(i, o.instance_variable_get(:"@a#{i}")) + end + end; + end + def test_run_out_of_shape_remove_instance_variable assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") begin; From ef72970a046270cc4b1d4ed029128876a9cbab88 Mon Sep 17 00:00:00 2001 From: Dana Sherson Date: Sat, 18 Nov 2023 10:41:30 +1300 Subject: [PATCH 18/21] Fix File.directory? doc hidding File::Stat#directory? doc Now the documentation that was already in the codebase for `File::Stat#directory?` shows up. --- file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/file.c b/file.c index 1b526219284e81..6a314c80694013 100644 --- a/file.c +++ b/file.c @@ -1593,8 +1593,6 @@ rb_access(VALUE fname, int mode) */ /* - * Document-method: directory? - * * call-seq: * File.directory?(path) -> true or false * From 24fe22a5da21c9df8584a4ce6b6d1ce18ac41cc2 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 17 Nov 2023 17:57:25 -0500 Subject: [PATCH 19/21] Fix ordering for auto compaction in get_overloaded_cme() Found through GC.stress + GC.auto_compact crashes in GH-8932. Previously, the compaction run within `rb_method_entry_alloc()` could move the `def->body.iseq.cref` and `iseqptr` set up before the call and leave the `def` pointing to moved addresses. Nothing was marking `def` during that GC run. Low probability reproducer: GC.stress = true GC.auto_compact = true arr = [] alloc = 1000.times.map { [] } alloc = nil a = arr.first GC.start --- vm_method.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vm_method.c b/vm_method.c index d2b642e6e156e0..245c58ac3e8dfc 100644 --- a/vm_method.c +++ b/vm_method.c @@ -1033,15 +1033,15 @@ get_overloaded_cme(const rb_callable_method_entry_t *cme) else { // create rb_method_definition_t *def = rb_method_definition_create(VM_METHOD_TYPE_ISEQ, cme->def->original_id); - def->body.iseq.cref = cme->def->body.iseq.cref; - def->body.iseq.iseqptr = ISEQ_BODY(cme->def->body.iseq.iseqptr)->mandatory_only_iseq; - rb_method_entry_t *me = rb_method_entry_alloc(cme->called_id, cme->owner, cme->defined_class, def, false); + RB_OBJ_WRITE(me, &def->body.iseq.cref, cme->def->body.iseq.cref); + RB_OBJ_WRITE(me, &def->body.iseq.iseqptr, ISEQ_BODY(cme->def->body.iseq.iseqptr)->mandatory_only_iseq); + ASSERT_vm_locking(); st_insert(overloaded_cme_table(), (st_data_t)cme, (st_data_t)me); From f479e629ab497f325091096819fa5bf60c0d03b2 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 17 Nov 2023 20:28:11 -0500 Subject: [PATCH 20/21] [ruby/prism] Revert "Ensure serialized file is little endian" https://github.com/ruby/prism/commit/4cec275fff --- prism/defines.h | 10 --------- prism/templates/lib/prism/serialize.rb.erb | 8 +++---- prism/templates/src/serialize.c.erb | 25 +++++----------------- 3 files changed, 9 insertions(+), 34 deletions(-) diff --git a/prism/defines.h b/prism/defines.h index 28f4da11dfd810..f89a0bed8e49da 100644 --- a/prism/defines.h +++ b/prism/defines.h @@ -74,14 +74,4 @@ # define snprintf _snprintf #endif -/** - * Defined PRISM_WORDS_BIGENDIAN so we can ensure our serialization happens in - * little endian format regardless of platform. - */ -#if defined(WORDS_BIGENDIAN) -# define PRISM_WORDS_BIGENDIAN -#elif defined(AC_APPLE_UNIVERSAL_BUILD) && defined(__BIG_ENDIAN__) -# define PRISM_WORDS_BIGENDIAN -#endif - #endif diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index 517d4e8a24ae82..28375045439616 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -137,7 +137,7 @@ module Prism comments, magic_comments, errors, warnings = load_metadata - @constant_pool_offset = io.read(4).unpack1("L<") + @constant_pool_offset = io.read(4).unpack1("L") @constant_pool = Array.new(load_varint, nil) [load_node, comments, magic_comments, errors, warnings] @@ -167,7 +167,7 @@ module Prism end def load_serialized_length - io.read(4).unpack1("L<") + io.read(4).unpack1("L") end def load_optional_node @@ -206,8 +206,8 @@ module Prism unless constant offset = constant_pool_offset + index * 8 - start = serialized.unpack1("L<", offset: offset) - length = serialized.unpack1("L<", offset: offset + 4) + start = serialized.unpack1("L", offset: offset) + length = serialized.unpack1("L", offset: offset + 4) constant = if start.nobits?(1 << 31) diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb index 2ecf7299c6b5db..db4c91e0cd88ee 100644 --- a/prism/templates/src/serialize.c.erb +++ b/prism/templates/src/serialize.c.erb @@ -47,21 +47,6 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe } } -/** - * Serialize a 32-bit integer to the given address always in little-endian. - */ -static void -pm_serialize_32(char *address, uint32_t value) { -#ifdef PRISM_WORDS_BIGENDIAN - address[0] = (char) ((value >> 24) & 0xFF); - address[1] = (char) ((value >> 16) & 0xFF); - address[2] = (char) ((value >> 8) & 0xFF); - address[3] = (char) (value & 0xFF); -#else - memcpy(address, &value, sizeof(uint32_t)); -#endif -} - static void pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node)); @@ -133,7 +118,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { <%- if node.needs_serialized_length? -%> // serialize length uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t)); - pm_serialize_32(buffer->value + length_offset, length); + memcpy(buffer->value + length_offset, &length, sizeof(uint32_t)); <%- end -%> break; } @@ -246,7 +231,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) // Now we're going to serialize the offset of the constant pool back where // we left space for it. uint32_t length = pm_sizet_to_u32(buffer->length); - pm_serialize_32(buffer->value + offset, length); + memcpy(buffer->value + offset, &length, sizeof(uint32_t)); // Now we're going to serialize the constant pool. offset = buffer->length; @@ -273,18 +258,18 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) assert(content_offset < owned_mask); content_offset |= owned_mask; - pm_serialize_32(buffer->value + buffer_offset, content_offset); + memcpy(buffer->value + buffer_offset, &content_offset, 4); pm_buffer_append_bytes(buffer, constant->start, constant->length); } else { // Since this is a shared constant, we are going to write its // source offset directly into the buffer. uint32_t source_offset = pm_ptrdifft_to_u32(constant->start - parser->start); - pm_serialize_32(buffer->value + buffer_offset, source_offset); + memcpy(buffer->value + buffer_offset, &source_offset, 4); } // Now we can write the length of the constant into the buffer. uint32_t constant_length = pm_sizet_to_u32(constant->length); - pm_serialize_32(buffer->value + buffer_offset + 4, constant_length); + memcpy(buffer->value + buffer_offset + 4, &constant_length, 4); } } } From 97fb07db518e768d621dda9e4962e3179bacee07 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 16 Nov 2023 18:13:13 +0100 Subject: [PATCH 21/21] Pin object ivars while they are being copied in a hash When transitioning an object to TOO_COMPLEX we copy all its ivar in a table, but if GC (and compaction) trigger in the middle of the transition, the old `iv_ptr` might still be the canonical ivar list and will be updated by the GC, but the reference we copied in the table will be outdated. So we must pin these reference until they are all copied and the object `iv_ptr` is pointing to the table. To do this we allocate a temporary TOO_COMPLEX object which we use as the host for the copy. TOO_COMPLEX objects are marked with `mark_tbl` which does pin references. Co-Authored-By: Alan Wu --- internal/variable.h | 3 +- object.c | 3 +- test/ruby/test_shapes.rb | 101 ++++++++++++++++++++++++++++++++++++++- variable.c | 49 ++++++++++++++++--- 4 files changed, 146 insertions(+), 10 deletions(-) diff --git a/internal/variable.h b/internal/variable.h index 63b074a30884d6..14a5d4bbb41e48 100644 --- a/internal/variable.h +++ b/internal/variable.h @@ -47,7 +47,8 @@ VALUE rb_mod_set_temporary_name(VALUE, VALUE); struct gen_ivtbl; int rb_gen_ivtbl_get(VALUE obj, ID id, struct gen_ivtbl **ivtbl); -void rb_obj_copy_ivs_to_hash_table(VALUE obj, st_table *table); +VALUE rb_obj_copy_ivs_to_hash_table(VALUE obj, st_table *table); +void rb_obj_copy_ivs_to_hash_table_complete(VALUE ivar_pinner); void rb_obj_convert_to_too_complex(VALUE obj, st_table *table); void rb_evict_ivars_to_hash(VALUE obj); diff --git a/object.c b/object.c index a8090d0763db06..709e8ae1df9052 100644 --- a/object.c +++ b/object.c @@ -326,8 +326,9 @@ rb_obj_copy_ivar(VALUE dest, VALUE obj) shape_to_set_on_dest = rb_shape_rebuild_shape(initial_shape, src_shape); if (UNLIKELY(rb_shape_id(shape_to_set_on_dest) == OBJ_TOO_COMPLEX_SHAPE_ID)) { st_table * table = rb_st_init_numtable_with_size(src_num_ivs); - rb_obj_copy_ivs_to_hash_table(obj, table); + VALUE ivar_pinner = rb_obj_copy_ivs_to_hash_table(obj, table); rb_obj_convert_to_too_complex(dest, table); + rb_obj_copy_ivs_to_hash_table_complete(ivar_pinner); return; } diff --git a/test/ruby/test_shapes.rb b/test/ruby/test_shapes.rb index f985f8c611e9df..8c6dfa9f8e70a5 100644 --- a/test/ruby/test_shapes.rb +++ b/test/ruby/test_shapes.rb @@ -256,9 +256,9 @@ def test_run_out_of_shape_for_class_ivar assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") begin; i = 0 + o = Object.new while RubyVM::Shape.shapes_available > 0 - c = Class.new - c.instance_variable_set(:"@i#{i}", 1) + o.instance_variable_set(:"@i#{i}", 1) i += 1 end @@ -275,6 +275,103 @@ def test_run_out_of_shape_for_class_ivar end; end + def test_evacuate_class_ivar_and_compaction + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + + begin; + count = 20 + + c = Class.new + count.times do |ivar| + c.instance_variable_set("@i#{ivar}", "ivar-#{ivar}") + end + + i = 0 + o = Object.new + while RubyVM::Shape.shapes_available > 0 + o.instance_variable_set("@i#{i}", 1) + i += 1 + end + + GC.auto_compact = true + GC.stress = true + # Cause evacuation + c.instance_variable_set(:@a, o = Object.new) + assert_equal(o, c.instance_variable_get(:@a)) + GC.stress = false + + count.times do |ivar| + assert_equal "ivar-#{ivar}", c.instance_variable_get("@i#{ivar}") + end + end; + end + + def test_evacuate_generic_ivar_and_compaction + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + + begin; + count = 20 + + c = Hash.new + count.times do |ivar| + c.instance_variable_set("@i#{ivar}", "ivar-#{ivar}") + end + + i = 0 + o = Object.new + while RubyVM::Shape.shapes_available > 0 + o.instance_variable_set("@i#{i}", 1) + i += 1 + end + + GC.auto_compact = true + GC.stress = true + + # Cause evacuation + c.instance_variable_set(:@a, o = Object.new) + assert_equal(o, c.instance_variable_get(:@a)) + + GC.stress = false + + count.times do |ivar| + assert_equal "ivar-#{ivar}", c.instance_variable_get("@i#{ivar}") + end + end; + end + + def test_evacuate_object_ivar_and_compaction + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + + begin; + count = 20 + + c = Object.new + count.times do |ivar| + c.instance_variable_set("@i#{ivar}", "ivar-#{ivar}") + end + + i = 0 + o = Object.new + while RubyVM::Shape.shapes_available > 0 + o.instance_variable_set("@i#{i}", 1) + i += 1 + end + + GC.auto_compact = true + GC.stress = true + + # Cause evacuation + c.instance_variable_set(:@a, o = Object.new) + assert_equal(o, c.instance_variable_get(:@a)) + + GC.stress = false + + count.times do |ivar| + assert_equal "ivar-#{ivar}", c.instance_variable_get("@i#{ivar}") + end + end; + end + def test_run_out_of_shape_for_module_ivar assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") begin; diff --git a/variable.c b/variable.c index 793516209869e2..efc435114f0485 100644 --- a/variable.c +++ b/variable.c @@ -1370,6 +1370,13 @@ rb_attr_delete(VALUE obj, ID id) return rb_ivar_delete(obj, id, Qnil); } +static int +rb_obj_write_barrier_ivars_i(ID key, VALUE val, st_data_t arg) +{ + RB_OBJ_WRITTEN((VALUE)arg, Qundef, val); + return ST_CONTINUE; +} + void rb_obj_convert_to_too_complex(VALUE obj, st_table *table) { @@ -1377,6 +1384,8 @@ rb_obj_convert_to_too_complex(VALUE obj, st_table *table) VALUE *old_ivptr = NULL; + rb_ivar_foreach(obj, rb_obj_write_barrier_ivars_i, (st_data_t)obj); + switch (BUILTIN_TYPE(obj)) { case T_OBJECT: if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) { @@ -1422,8 +1431,9 @@ rb_evict_ivars_to_hash(VALUE obj) st_table *table = st_init_numtable_with_size(rb_ivar_count(obj)); // Evacuate all previous values from shape into id_table - rb_obj_copy_ivs_to_hash_table(obj, table); + VALUE ivar_pinner = rb_obj_copy_ivs_to_hash_table(obj, table); rb_obj_convert_to_too_complex(obj, table); + rb_obj_copy_ivs_to_hash_table_complete(ivar_pinner); RUBY_ASSERT(rb_shape_obj_too_complex(obj)); } @@ -1640,17 +1650,44 @@ rb_ensure_iv_list_size(VALUE obj, uint32_t current_capacity, uint32_t new_capaci } } -int -rb_obj_copy_ivs_to_hash_table_i(ID key, VALUE val, st_data_t arg) +struct rb_evacuate_arg { + st_table *table; + VALUE host; +}; + +static int +copy_ivs_to_hash_table_i(ID key, VALUE val, st_data_t arg) { - st_insert((st_table *)arg, (st_data_t)key, (st_data_t)val); + struct rb_evacuate_arg *evac_arg = (struct rb_evacuate_arg *)arg; + st_insert(evac_arg->table, (st_data_t)key, (st_data_t)val); + RB_OBJ_WRITTEN(evac_arg->host, Qundef, val); return ST_CONTINUE; } -void +VALUE rb_obj_copy_ivs_to_hash_table(VALUE obj, st_table *table) { - rb_ivar_foreach(obj, rb_obj_copy_ivs_to_hash_table_i, (st_data_t)table); + // There can be compaction runs between each ivar we copy out of obj, so we + // need an object to mark each ivar to make sure every reference is valid + // by the time we're done. Use a special T_OBJECT for marking. + VALUE ivar_pinner = rb_wb_protected_newobj_of(GET_EC(), rb_cBasicObject, T_OBJECT | ROBJECT_EMBED, RVALUE_SIZE); + rb_shape_set_shape_id(ivar_pinner, OBJ_TOO_COMPLEX_SHAPE_ID); + ROBJECT_SET_IV_HASH(ivar_pinner, table); + + // Evacuate all previous values from shape into id_table + struct rb_evacuate_arg evac_arg = { .table = table, .host = ivar_pinner }; + rb_ivar_foreach(obj, copy_ivs_to_hash_table_i, (st_data_t)&evac_arg); + + return ivar_pinner; +} + +void +rb_obj_copy_ivs_to_hash_table_complete(VALUE ivar_pinner) +{ + // done with pinning, now set pinner to 0 ivars + ROBJECT_SET_IV_HASH(ivar_pinner, NULL); + rb_shape_set_shape(ivar_pinner, rb_shape_get_root_shape()); + RB_GC_GUARD(ivar_pinner); } static VALUE *