From cba70c3532c34803bae065745b799103635ec67a Mon Sep 17 00:00:00 2001 From: Vladimir Dementyev Date: Tue, 21 Apr 2026 23:34:44 +0900 Subject: [PATCH 1/2] - iseq.c: fix passing frozen option to compile_file_prism --- iseq.c | 18 ++++++++++++++++-- test/ruby/test_iseq.rb | 14 ++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/iseq.c b/iseq.c index eb85a995e28877..9c8ec0d1381ac9 100644 --- a/iseq.c +++ b/iseq.c @@ -1916,16 +1916,30 @@ iseqw_s_compile_file_prism(int argc, VALUE *argv, VALUE self) rb_execution_context_t *ec = GET_EC(); VALUE v = rb_vm_push_frame_fname(ec, file); + make_compile_option(&option, opt); + pm_parse_result_t result; pm_parse_result_init(&result); result.node.coverage_enabled = 1; + switch (option.frozen_string_literal) { + case ISEQ_FROZEN_STRING_LITERAL_UNSET: + break; + case ISEQ_FROZEN_STRING_LITERAL_DISABLED: + pm_options_frozen_string_literal_set(result.options, false); + break; + case ISEQ_FROZEN_STRING_LITERAL_ENABLED: + pm_options_frozen_string_literal_set(result.options, true); + break; + default: + rb_bug("iseqw_s_compile_file_prism: invalid frozen_string_literal=%d", option.frozen_string_literal); + break; + } + VALUE script_lines; VALUE error = pm_load_parse_file(&result, file, ruby_vm_keep_script_lines ? &script_lines : NULL); if (error == Qnil) { - make_compile_option(&option, opt); - int error_state; rb_iseq_t *iseq = pm_iseq_new_with_opt(&result.node, rb_fstring_lit("
"), file, diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb index 43b1f0f6206410..9b69ddbed912f7 100644 --- a/test/ruby/test_iseq.rb +++ b/test/ruby/test_iseq.rb @@ -355,6 +355,20 @@ def test_frozen_string_literal_compile_option assert_not_predicate(s4, :frozen?) end + def test_frozen_string_literal_compile_option_file + Tempfile.create(%w[fsl .rb]) do |f| + f.write("['foo', 'foo', \"\#{$f}foo\", \"\#{'foo'}\"]\n") + f.flush + $f = 'f' + s1, s2, s3, s4 = RubyVM::InstructionSequence + .compile_file(f.path, frozen_string_literal: true).eval + assert_predicate(s1, :frozen?) + assert_predicate(s2, :frozen?) + assert_not_predicate(s3, :frozen?) + assert_not_predicate(s4, :frozen?) + end + end + # Safe call chain is not optimized when Coverage is running. # So we can test it only when Coverage is not running. def test_safe_call_chain From 5bd3e85ea1e3c3af51e30222406852ea931a65cb Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sun, 19 Apr 2026 11:25:27 +0900 Subject: [PATCH 2/2] Make `rb_enc_str_coderange` inlinable by default This is a generalization of the optimization done in re.c as part of d0fbdb005cecd8513aeacb234365d71f9a9b521e. Code that deal with coderange can benefit significantly from avoiding that function call, assuming coderange is often already known. Ref: https://github.com/ruby/json/pull/974 --- include/ruby/internal/encoding/string.h | 29 +++++++++++++++++++++++++ re.c | 16 +++----------- string.c | 13 +++++++++-- tool/leaked-globals | 2 +- 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h index 2cfa91c01e1e81..ea78cf23f31e60 100644 --- a/include/ruby/internal/encoding/string.h +++ b/include/ruby/internal/encoding/string.h @@ -263,6 +263,14 @@ VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to); */ VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts); +/** + * @private + * + * This is an implementation detail of rb_enc_str_coderange(). Don't use this + * directly. + **/ +int rbimpl_enc_str_coderange_scan(VALUE str); + /** * Scans the passed string to collect its code range. Because a Ruby's string * is mutable, its contents change from time to time; so does its code range. @@ -274,6 +282,27 @@ VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ec */ int rb_enc_str_coderange(VALUE str); +/** + * Scans the passed string to collect its code range. Because a Ruby's string + * is mutable, its contents change from time to time; so does its code range. + * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN. + * This API scans it and re-assigns a fine-grained code range constant. + * + * @param[out] str A string. + * @return An enum ::ruby_coderange_type. + */ +static inline int +rb_enc_str_coderange_inline(VALUE str) +{ + int cr = ENC_CODERANGE(str); + if (cr == ENC_CODERANGE_UNKNOWN) { + cr = rbimpl_enc_str_coderange_scan(str); + } + return cr; +} + +#define rb_enc_str_coderange rb_enc_str_coderange_inline + /** * Scans the passed string until it finds something odd. Returns the number of * bytes scanned. As the name implies this is suitable for repeated call. One diff --git a/re.c b/re.c index 55a9cb4c88d237..45a8aea5875bed 100644 --- a/re.c +++ b/re.c @@ -1584,21 +1584,11 @@ reg_enc_error(VALUE re, VALUE str) rb_enc_inspect_name(rb_enc_get(str))); } -static inline int -str_coderange(VALUE str) -{ - int cr = ENC_CODERANGE(str); - if (cr == ENC_CODERANGE_UNKNOWN) { - cr = rb_enc_str_coderange(str); - } - return cr; -} - static rb_encoding* rb_reg_prepare_enc(VALUE re, VALUE str, int warn) { rb_encoding *enc = 0; - int cr = str_coderange(str); + int cr = rb_enc_str_coderange(str); if (cr == ENC_CODERANGE_BROKEN) { rb_raise(rb_eArgError, @@ -3276,7 +3266,7 @@ rb_reg_preprocess_dregexp(VALUE ary, int options) src_enc = rb_enc_get(str); if (options & ARG_ENCODING_NONE && src_enc != ascii8bit) { - if (str_coderange(str) != ENC_CODERANGE_7BIT) + if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) rb_raise(rb_eRegexpError, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); else src_enc = ascii8bit; @@ -3397,7 +3387,7 @@ rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err, if (options & ARG_ENCODING_NONE) { rb_encoding *ascii8bit = rb_ascii8bit_encoding(); if (enc != ascii8bit) { - if (str_coderange(str) != ENC_CODERANGE_7BIT) { + if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); return -1; } diff --git a/string.c b/string.c index 106e6562eaf390..0875f73cc80674 100644 --- a/string.c +++ b/string.c @@ -941,17 +941,26 @@ rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc) return enc_coderange_scan(str, enc); } +int +rbimpl_enc_str_coderange_scan(VALUE str) +{ + int cr = enc_coderange_scan(str, get_encoding(str)); + ENC_CODERANGE_SET(str, cr); + return cr; +} + +#undef rb_enc_str_coderange int rb_enc_str_coderange(VALUE str) { int cr = ENC_CODERANGE(str); if (cr == ENC_CODERANGE_UNKNOWN) { - cr = enc_coderange_scan(str, get_encoding(str)); - ENC_CODERANGE_SET(str, cr); + cr = rbimpl_enc_str_coderange_scan(str); } return cr; } +#define rb_enc_str_coderange rb_enc_str_coderange_inline static inline bool rb_enc_str_asciicompat(VALUE str) diff --git a/tool/leaked-globals b/tool/leaked-globals index 6118cd56e8153b..73da769318ad37 100755 --- a/tool/leaked-globals +++ b/tool/leaked-globals @@ -96,7 +96,7 @@ Pipe.new(NM + ARGV).each do |line| next when /\Aruby_static_id_/ next unless so - when /\A(?:RUBY_|ruby_|rb_)/ + when /\A(?:RUBY_|ruby_|rb_|rbimpl_)/ next unless so and /_(threadptr|ec)_/ =~ n when *SYMBOLS_IN_EMPTYLIB next