diff --git a/include/ruby/internal/encoding/string.h b/include/ruby/internal/encoding/string.h index 2cfa91c01e1e81..ea78cf23f31e60 100644 --- a/include/ruby/internal/encoding/string.h +++ b/include/ruby/internal/encoding/string.h @@ -263,6 +263,14 @@ VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to); */ VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts); +/** + * @private + * + * This is an implementation detail of rb_enc_str_coderange(). Don't use this + * directly. + **/ +int rbimpl_enc_str_coderange_scan(VALUE str); + /** * Scans the passed string to collect its code range. Because a Ruby's string * is mutable, its contents change from time to time; so does its code range. @@ -274,6 +282,27 @@ VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ec */ int rb_enc_str_coderange(VALUE str); +/** + * Scans the passed string to collect its code range. Because a Ruby's string + * is mutable, its contents change from time to time; so does its code range. + * A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN. + * This API scans it and re-assigns a fine-grained code range constant. + * + * @param[out] str A string. + * @return An enum ::ruby_coderange_type. + */ +static inline int +rb_enc_str_coderange_inline(VALUE str) +{ + int cr = ENC_CODERANGE(str); + if (cr == ENC_CODERANGE_UNKNOWN) { + cr = rbimpl_enc_str_coderange_scan(str); + } + return cr; +} + +#define rb_enc_str_coderange rb_enc_str_coderange_inline + /** * Scans the passed string until it finds something odd. Returns the number of * bytes scanned. As the name implies this is suitable for repeated call. One diff --git a/iseq.c b/iseq.c index eb85a995e28877..9c8ec0d1381ac9 100644 --- a/iseq.c +++ b/iseq.c @@ -1916,16 +1916,30 @@ iseqw_s_compile_file_prism(int argc, VALUE *argv, VALUE self) rb_execution_context_t *ec = GET_EC(); VALUE v = rb_vm_push_frame_fname(ec, file); + make_compile_option(&option, opt); + pm_parse_result_t result; pm_parse_result_init(&result); result.node.coverage_enabled = 1; + switch (option.frozen_string_literal) { + case ISEQ_FROZEN_STRING_LITERAL_UNSET: + break; + case ISEQ_FROZEN_STRING_LITERAL_DISABLED: + pm_options_frozen_string_literal_set(result.options, false); + break; + case ISEQ_FROZEN_STRING_LITERAL_ENABLED: + pm_options_frozen_string_literal_set(result.options, true); + break; + default: + rb_bug("iseqw_s_compile_file_prism: invalid frozen_string_literal=%d", option.frozen_string_literal); + break; + } + VALUE script_lines; VALUE error = pm_load_parse_file(&result, file, ruby_vm_keep_script_lines ? &script_lines : NULL); if (error == Qnil) { - make_compile_option(&option, opt); - int error_state; rb_iseq_t *iseq = pm_iseq_new_with_opt(&result.node, rb_fstring_lit("
"), file, diff --git a/re.c b/re.c index 55a9cb4c88d237..45a8aea5875bed 100644 --- a/re.c +++ b/re.c @@ -1584,21 +1584,11 @@ reg_enc_error(VALUE re, VALUE str) rb_enc_inspect_name(rb_enc_get(str))); } -static inline int -str_coderange(VALUE str) -{ - int cr = ENC_CODERANGE(str); - if (cr == ENC_CODERANGE_UNKNOWN) { - cr = rb_enc_str_coderange(str); - } - return cr; -} - static rb_encoding* rb_reg_prepare_enc(VALUE re, VALUE str, int warn) { rb_encoding *enc = 0; - int cr = str_coderange(str); + int cr = rb_enc_str_coderange(str); if (cr == ENC_CODERANGE_BROKEN) { rb_raise(rb_eArgError, @@ -3276,7 +3266,7 @@ rb_reg_preprocess_dregexp(VALUE ary, int options) src_enc = rb_enc_get(str); if (options & ARG_ENCODING_NONE && src_enc != ascii8bit) { - if (str_coderange(str) != ENC_CODERANGE_7BIT) + if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) rb_raise(rb_eRegexpError, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); else src_enc = ascii8bit; @@ -3397,7 +3387,7 @@ rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err, if (options & ARG_ENCODING_NONE) { rb_encoding *ascii8bit = rb_ascii8bit_encoding(); if (enc != ascii8bit) { - if (str_coderange(str) != ENC_CODERANGE_7BIT) { + if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); return -1; } diff --git a/string.c b/string.c index 106e6562eaf390..0875f73cc80674 100644 --- a/string.c +++ b/string.c @@ -941,17 +941,26 @@ rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc) return enc_coderange_scan(str, enc); } +int +rbimpl_enc_str_coderange_scan(VALUE str) +{ + int cr = enc_coderange_scan(str, get_encoding(str)); + ENC_CODERANGE_SET(str, cr); + return cr; +} + +#undef rb_enc_str_coderange int rb_enc_str_coderange(VALUE str) { int cr = ENC_CODERANGE(str); if (cr == ENC_CODERANGE_UNKNOWN) { - cr = enc_coderange_scan(str, get_encoding(str)); - ENC_CODERANGE_SET(str, cr); + cr = rbimpl_enc_str_coderange_scan(str); } return cr; } +#define rb_enc_str_coderange rb_enc_str_coderange_inline static inline bool rb_enc_str_asciicompat(VALUE str) diff --git a/test/ruby/test_iseq.rb b/test/ruby/test_iseq.rb index 43b1f0f6206410..9b69ddbed912f7 100644 --- a/test/ruby/test_iseq.rb +++ b/test/ruby/test_iseq.rb @@ -355,6 +355,20 @@ def test_frozen_string_literal_compile_option assert_not_predicate(s4, :frozen?) end + def test_frozen_string_literal_compile_option_file + Tempfile.create(%w[fsl .rb]) do |f| + f.write("['foo', 'foo', \"\#{$f}foo\", \"\#{'foo'}\"]\n") + f.flush + $f = 'f' + s1, s2, s3, s4 = RubyVM::InstructionSequence + .compile_file(f.path, frozen_string_literal: true).eval + assert_predicate(s1, :frozen?) + assert_predicate(s2, :frozen?) + assert_not_predicate(s3, :frozen?) + assert_not_predicate(s4, :frozen?) + end + end + # Safe call chain is not optimized when Coverage is running. # So we can test it only when Coverage is not running. def test_safe_call_chain diff --git a/tool/leaked-globals b/tool/leaked-globals index 6118cd56e8153b..73da769318ad37 100755 --- a/tool/leaked-globals +++ b/tool/leaked-globals @@ -96,7 +96,7 @@ Pipe.new(NM + ARGV).each do |line| next when /\Aruby_static_id_/ next unless so - when /\A(?:RUBY_|ruby_|rb_)/ + when /\A(?:RUBY_|ruby_|rb_|rbimpl_)/ next unless so and /_(threadptr|ec)_/ =~ n when *SYMBOLS_IN_EMPTYLIB next