diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index 1341e6bfc..600e5e030 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -1384,6 +1384,8 @@ public void visit(StringNode node) { short opcode; if (node.isVString) { opcode = Opcodes.LOAD_VSTRING; + } else if (node.forceByteString) { + opcode = Opcodes.LOAD_BYTE_STRING; } else if (emitterContext != null && emitterContext.symbolTable != null && !emitterContext.symbolTable.isStrictOptionEnabled(Strict.HINT_UTF8) && !emitterContext.compilerOptions.isUnicodeSource) { diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index 226b7099b..96a2af895 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -2056,7 +2056,10 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c Opcodes.ABS, Opcodes.BINARY_NOT, Opcodes.BITWISE_NOT, Opcodes.INTEGER_BITWISE_NOT, Opcodes.ORD, Opcodes.ORD_BYTES, Opcodes.OCT, Opcodes.HEX, Opcodes.SRAND, Opcodes.CHR, Opcodes.CHR_BYTES, Opcodes.LENGTH_BYTES, Opcodes.QUOTEMETA, Opcodes.FC, Opcodes.LC, - Opcodes.LCFIRST, Opcodes.UC, Opcodes.UCFIRST, Opcodes.SLEEP, Opcodes.TELL, + Opcodes.LCFIRST, Opcodes.UC, Opcodes.UCFIRST, Opcodes.FC_BYTES, Opcodes.LC_BYTES, + Opcodes.LCFIRST_BYTES, Opcodes.UC_BYTES, Opcodes.UCFIRST_BYTES, Opcodes.FC_UNICODE, + Opcodes.LC_UNICODE, Opcodes.LCFIRST_UNICODE, Opcodes.UC_UNICODE, Opcodes.UCFIRST_UNICODE, + Opcodes.TO_BYTES_STRING, Opcodes.SLEEP, Opcodes.TELL, Opcodes.RMDIR, Opcodes.CLOSEDIR, Opcodes.REWINDDIR, Opcodes.TELLDIR, Opcodes.CHDIR, Opcodes.EXIT -> { pc = ScalarUnaryOpcodeHandler.execute(opcode, bytecode, pc, registers); diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java b/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java index d3b015015..160ee033b 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java @@ -20,6 +20,16 @@ private static void compileScalarOperand(BytecodeCompiler bc, OperatorNode node, } } + private static short selectCaseOpcode(BytecodeCompiler bc, short normalOpcode, short bytesOpcode, short unicodeOpcode) { + if (bc.isBytesEnabled()) { + return bytesOpcode; + } + if (bc.symbolTable != null && bc.symbolTable.isFeatureCategoryEnabled("unicode_strings")) { + return unicodeOpcode; + } + return normalOpcode; + } + private static int compileArrayForExistsDelete(BytecodeCompiler bc, BinaryOperatorNode arrayAccess, int tokenIndex) { if (!(arrayAccess.left instanceof OperatorNode leftOp) || !leftOp.operator.equals("$") || !(leftOp.operand instanceof IdentifierNode)) { @@ -682,11 +692,16 @@ public static void visitOperator(BytecodeCompiler bytecodeCompiler, OperatorNode case "chrBytes" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, Opcodes.CHR_BYTES); case "lengthBytes" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, Opcodes.LENGTH_BYTES); case "quotemeta" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, Opcodes.QUOTEMETA); - case "fc" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, bytecodeCompiler.isBytesEnabled() ? Opcodes.FC_BYTES : Opcodes.FC); - case "lc" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, bytecodeCompiler.isBytesEnabled() ? Opcodes.LC_BYTES : Opcodes.LC); - case "lcfirst" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, bytecodeCompiler.isBytesEnabled() ? Opcodes.LCFIRST_BYTES : Opcodes.LCFIRST); - case "uc" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, bytecodeCompiler.isBytesEnabled() ? Opcodes.UC_BYTES : Opcodes.UC); - case "ucfirst" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, bytecodeCompiler.isBytesEnabled() ? Opcodes.UCFIRST_BYTES : Opcodes.UCFIRST); + case "fc" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, + selectCaseOpcode(bytecodeCompiler, Opcodes.FC, Opcodes.FC_BYTES, Opcodes.FC_UNICODE)); + case "lc" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, + selectCaseOpcode(bytecodeCompiler, Opcodes.LC, Opcodes.LC_BYTES, Opcodes.LC_UNICODE)); + case "lcfirst" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, + selectCaseOpcode(bytecodeCompiler, Opcodes.LCFIRST, Opcodes.LCFIRST_BYTES, Opcodes.LCFIRST_UNICODE)); + case "uc" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, + selectCaseOpcode(bytecodeCompiler, Opcodes.UC, Opcodes.UC_BYTES, Opcodes.UC_UNICODE)); + case "ucfirst" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, + selectCaseOpcode(bytecodeCompiler, Opcodes.UCFIRST, Opcodes.UCFIRST_BYTES, Opcodes.UCFIRST_UNICODE)); case "tell" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, Opcodes.TELL); case "rmdir" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, Opcodes.RMDIR); case "closedir" -> visitSimpleUnaryWithDefault(bytecodeCompiler, node, Opcodes.CLOSEDIR); diff --git a/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java b/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java index d440a10ed..1faceb126 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java @@ -1574,14 +1574,19 @@ public static String disassemble(InterpretedCode interpretedCode) { case Opcodes.QUOTEMETA: case Opcodes.FC: case Opcodes.FC_BYTES: + case Opcodes.FC_UNICODE: case Opcodes.LC: case Opcodes.LC_BYTES: + case Opcodes.LC_UNICODE: case Opcodes.LCFIRST: case Opcodes.LCFIRST_BYTES: + case Opcodes.LCFIRST_UNICODE: case Opcodes.UC: case Opcodes.UC_BYTES: + case Opcodes.UC_UNICODE: case Opcodes.UCFIRST: case Opcodes.UCFIRST_BYTES: + case Opcodes.UCFIRST_UNICODE: case Opcodes.TO_BYTES_STRING: case Opcodes.SLEEP: case Opcodes.TELL: diff --git a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java index 5cf146072..5bb1c6feb 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java @@ -2321,6 +2321,31 @@ public class Opcodes { */ public static final short HASH_DEREF_FETCH_NONSTRICT_FOR_LOCAL = 484; + /** + * Fold case under unicode_strings: rd = StringOperators.fcUnicode(rs) + */ + public static final short FC_UNICODE = 485; + + /** + * Lowercase under unicode_strings: rd = StringOperators.lcUnicode(rs) + */ + public static final short LC_UNICODE = 486; + + /** + * Lowercase first under unicode_strings: rd = StringOperators.lcfirstUnicode(rs) + */ + public static final short LCFIRST_UNICODE = 487; + + /** + * Uppercase under unicode_strings: rd = StringOperators.ucUnicode(rs) + */ + public static final short UC_UNICODE = 488; + + /** + * Uppercase first under unicode_strings: rd = StringOperators.ucfirstUnicode(rs) + */ + public static final short UCFIRST_UNICODE = 489; + private Opcodes() { } // Utility class - no instantiation } diff --git a/src/main/java/org/perlonjava/backend/bytecode/ScalarUnaryOpcodeHandler.java b/src/main/java/org/perlonjava/backend/bytecode/ScalarUnaryOpcodeHandler.java index 8aefeaac1..4aedd386b 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/ScalarUnaryOpcodeHandler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/ScalarUnaryOpcodeHandler.java @@ -43,14 +43,19 @@ public static int execute(int opcode, int[] bytecode, int pc, case Opcodes.QUOTEMETA -> StringOperators.quotemeta((RuntimeScalar) registers[rs]); case Opcodes.FC -> StringOperators.fc((RuntimeScalar) registers[rs]); case Opcodes.FC_BYTES -> StringOperators.fcBytes((RuntimeScalar) registers[rs]); + case Opcodes.FC_UNICODE -> StringOperators.fcUnicode((RuntimeScalar) registers[rs]); case Opcodes.LC -> StringOperators.lc((RuntimeScalar) registers[rs]); case Opcodes.LC_BYTES -> StringOperators.lcBytes((RuntimeScalar) registers[rs]); + case Opcodes.LC_UNICODE -> StringOperators.lcUnicode((RuntimeScalar) registers[rs]); case Opcodes.LCFIRST -> StringOperators.lcfirst((RuntimeScalar) registers[rs]); case Opcodes.LCFIRST_BYTES -> StringOperators.lcfirstBytes((RuntimeScalar) registers[rs]); + case Opcodes.LCFIRST_UNICODE -> StringOperators.lcfirstUnicode((RuntimeScalar) registers[rs]); case Opcodes.UC -> StringOperators.uc((RuntimeScalar) registers[rs]); case Opcodes.UC_BYTES -> StringOperators.ucBytes((RuntimeScalar) registers[rs]); + case Opcodes.UC_UNICODE -> StringOperators.ucUnicode((RuntimeScalar) registers[rs]); case Opcodes.UCFIRST -> StringOperators.ucfirst((RuntimeScalar) registers[rs]); case Opcodes.UCFIRST_BYTES -> StringOperators.ucfirstBytes((RuntimeScalar) registers[rs]); + case Opcodes.UCFIRST_UNICODE -> StringOperators.ucfirstUnicode((RuntimeScalar) registers[rs]); case Opcodes.TO_BYTES_STRING -> StringOperators.toBytesString((RuntimeScalar) registers[rs]); case Opcodes.SLEEP -> Time.sleep((RuntimeScalar) registers[rs]); case Opcodes.TELL -> IOOperator.tell((RuntimeScalar) registers[rs]); @@ -104,18 +109,28 @@ public static int disassemble(int opcode, int[] bytecode, int pc, case Opcodes.FC -> sb.append("FC r").append(rd).append(" = fc(r").append(rs).append(")\n"); case Opcodes.FC_BYTES -> sb.append("FC_BYTES r").append(rd).append(" = fcBytes(r").append(rs).append(")\n"); + case Opcodes.FC_UNICODE -> + sb.append("FC_UNICODE r").append(rd).append(" = fcUnicode(r").append(rs).append(")\n"); case Opcodes.LC -> sb.append("LC r").append(rd).append(" = lc(r").append(rs).append(")\n"); case Opcodes.LC_BYTES -> sb.append("LC_BYTES r").append(rd).append(" = lcBytes(r").append(rs).append(")\n"); + case Opcodes.LC_UNICODE -> + sb.append("LC_UNICODE r").append(rd).append(" = lcUnicode(r").append(rs).append(")\n"); case Opcodes.LCFIRST -> sb.append("LCFIRST r").append(rd).append(" = lcfirst(r").append(rs).append(")\n"); case Opcodes.LCFIRST_BYTES -> sb.append("LCFIRST_BYTES r").append(rd).append(" = lcfirstBytes(r").append(rs).append(")\n"); + case Opcodes.LCFIRST_UNICODE -> + sb.append("LCFIRST_UNICODE r").append(rd).append(" = lcfirstUnicode(r").append(rs).append(")\n"); case Opcodes.UC -> sb.append("UC r").append(rd).append(" = uc(r").append(rs).append(")\n"); case Opcodes.UC_BYTES -> sb.append("UC_BYTES r").append(rd).append(" = ucBytes(r").append(rs).append(")\n"); + case Opcodes.UC_UNICODE -> + sb.append("UC_UNICODE r").append(rd).append(" = ucUnicode(r").append(rs).append(")\n"); case Opcodes.UCFIRST -> sb.append("UCFIRST r").append(rd).append(" = ucfirst(r").append(rs).append(")\n"); case Opcodes.UCFIRST_BYTES -> sb.append("UCFIRST_BYTES r").append(rd).append(" = ucfirstBytes(r").append(rs).append(")\n"); + case Opcodes.UCFIRST_UNICODE -> + sb.append("UCFIRST_UNICODE r").append(rd).append(" = ucfirstUnicode(r").append(rs).append(")\n"); case Opcodes.TO_BYTES_STRING -> sb.append("TO_BYTES_STRING r").append(rd).append(" = toBytesString(r").append(rs).append(")\n"); case Opcodes.SLEEP -> sb.append("SLEEP r").append(rd).append(" = sleep(r").append(rs).append(")\n"); diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitLiteral.java b/src/main/java/org/perlonjava/backend/jvm/EmitLiteral.java index fc341ab18..8fc5c06ed 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitLiteral.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitLiteral.java @@ -255,7 +255,8 @@ public static void emitString(EmitterContext ctx, StringNode node) { return; } - if (!ctx.symbolTable.isStrictOptionEnabled(HINT_UTF8) && !ctx.compilerOptions.isUnicodeSource) { + if (node.forceByteString + || (!ctx.symbolTable.isStrictOptionEnabled(HINT_UTF8) && !ctx.compilerOptions.isUnicodeSource)) { // Under `no utf8` - create an octet string, unless it contains wide characters (> 255) // Wide characters (like \x{100}) force the string to be UTF-8 even without `use utf8` boolean hasWideChars = false; diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java b/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java index 681d25675..73f168c6d 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java @@ -1507,7 +1507,7 @@ static void handleFcOperator(OperatorNode node, EmitterVisitor emitterVisitor) { } else { mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/operators/StringOperators", - "fc", + isUnicodeStringsEnabled(emitterVisitor) ? "fcUnicode" : "fc", "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false); } @@ -1531,7 +1531,7 @@ static void handleLcOperator(OperatorNode node, EmitterVisitor emitterVisitor) { } else { mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/operators/StringOperators", - "lc", + isUnicodeStringsEnabled(emitterVisitor) ? "lcUnicode" : "lc", "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false); } @@ -1555,7 +1555,7 @@ static void handleUcOperator(OperatorNode node, EmitterVisitor emitterVisitor) { } else { mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/operators/StringOperators", - "uc", + isUnicodeStringsEnabled(emitterVisitor) ? "ucUnicode" : "uc", "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false); } @@ -1579,7 +1579,7 @@ static void handleLcfirstOperator(OperatorNode node, EmitterVisitor emitterVisit } else { mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/operators/StringOperators", - "lcfirst", + isUnicodeStringsEnabled(emitterVisitor) ? "lcfirstUnicode" : "lcfirst", "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false); } @@ -1603,13 +1603,18 @@ static void handleUcfirstOperator(OperatorNode node, EmitterVisitor emitterVisit } else { mv.visitMethodInsn(Opcodes.INVOKESTATIC, "org/perlonjava/runtime/operators/StringOperators", - "ucfirst", + isUnicodeStringsEnabled(emitterVisitor) ? "ucfirstUnicode" : "ucfirst", "(Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;)Lorg/perlonjava/runtime/runtimetypes/RuntimeScalar;", false); } handleVoidContext(emitterVisitor); } + private static boolean isUnicodeStringsEnabled(EmitterVisitor emitterVisitor) { + return emitterVisitor.ctx.symbolTable != null + && emitterVisitor.ctx.symbolTable.isFeatureCategoryEnabled("unicode_strings"); + } + /** * Handles array-specific unary builtin operators. * diff --git a/src/main/java/org/perlonjava/frontend/astnode/StringNode.java b/src/main/java/org/perlonjava/frontend/astnode/StringNode.java index 16d035eb2..cdb210b27 100644 --- a/src/main/java/org/perlonjava/frontend/astnode/StringNode.java +++ b/src/main/java/org/perlonjava/frontend/astnode/StringNode.java @@ -20,6 +20,13 @@ public class StringNode extends AbstractNode { */ public final boolean isVString; + /** + * Force this literal to be emitted as a byte string even in a C + * scope. Perl keeps ASCII and fixed-byte escapes such as "\xFC" unupgraded; + * actual non-ASCII source characters still use normal UTF-8 string emission. + */ + public final boolean forceByteString; + /** * Constructs a new StringNode with the specified string value. * @@ -29,6 +36,7 @@ public StringNode(String value, int tokenIndex) { this.value = value; this.tokenIndex = tokenIndex; this.isVString = false; + this.forceByteString = false; } /** @@ -42,6 +50,14 @@ public StringNode(String value, boolean isVString, int tokenIndex) { this.value = value; this.tokenIndex = tokenIndex; this.isVString = isVString; + this.forceByteString = false; + } + + public StringNode(String value, boolean isVString, boolean forceByteString, int tokenIndex) { + this.value = value; + this.tokenIndex = tokenIndex; + this.isVString = isVString; + this.forceByteString = forceByteString; } /** @@ -67,4 +83,3 @@ public void accept(Visitor visitor) { visitor.visit(this); } } - diff --git a/src/main/java/org/perlonjava/frontend/parser/ParserTables.java b/src/main/java/org/perlonjava/frontend/parser/ParserTables.java index 82a4dc8a8..28bb66c57 100644 --- a/src/main/java/org/perlonjava/frontend/parser/ParserTables.java +++ b/src/main/java/org/perlonjava/frontend/parser/ParserTables.java @@ -30,7 +30,7 @@ public class ParserTables { "die", "do", "dump", "exec", "exit", "fork", - "getpwuid", "glob", + "gethostbyname", "getpwuid", "glob", "hex", "kill", "oct", "open", diff --git a/src/main/java/org/perlonjava/frontend/parser/StringSegmentParser.java b/src/main/java/org/perlonjava/frontend/parser/StringSegmentParser.java index 54d9760dc..7506b1361 100644 --- a/src/main/java/org/perlonjava/frontend/parser/StringSegmentParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/StringSegmentParser.java @@ -20,6 +20,7 @@ import static org.perlonjava.frontend.parser.ParseBlock.parseBlock; import static org.perlonjava.frontend.parser.Variable.parseArrayHashAccess; +import static org.perlonjava.runtime.perlmodule.Strict.HINT_UTF8; /** * Base class for parsing strings with segments and variable interpolation. @@ -74,6 +75,9 @@ public abstract class StringSegmentParser { * Buffer for accumulating literal text segments */ protected final StringBuilder currentSegment; + private boolean currentSegmentHasSourceNonAscii = false; + private boolean inRegexCharClass = false; + private boolean regexCharClassFirst = false; /** * List of AST nodes representing string segments (literals and interpolated expressions) */ @@ -128,6 +132,35 @@ protected void appendToCurrentSegment(String text) { currentSegment.append(text); } + protected void appendLiteralToCurrentSegment(String text) { + appendToCurrentSegment(text); + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + updateRegexCharClassState(c); + if (c > 127) { + currentSegmentHasSourceNonAscii = true; + } + } + } + + protected boolean isInsideRegexCharClass() { + return isRegex && inRegexCharClass; + } + + private void updateRegexCharClassState(char c) { + if (!isRegex) { + return; + } + if (c == '[' && !inRegexCharClass) { + inRegexCharClass = true; + regexCharClassFirst = true; + } else if (c == ']' && inRegexCharClass && !regexCharClassFirst) { + inRegexCharClass = false; + } else if (inRegexCharClass && regexCharClassFirst && c != '^') { + regexCharClassFirst = false; + } + } + /** * Adds a string segment node to the segments list. * @@ -150,9 +183,28 @@ protected void addStringSegment(Node node) { */ protected void flushCurrentSegment() { if (!currentSegment.isEmpty()) { - addStringSegment(new StringNode(currentSegment.toString(), tokenIndex)); + String value = currentSegment.toString(); + boolean forceByteString = shouldForceByteStringLiteral(value); + addStringSegment(new StringNode(value, false, forceByteString, tokenIndex)); currentSegment.setLength(0); + currentSegmentHasSourceNonAscii = false; + } + } + + private boolean shouldForceByteStringLiteral(String value) { + if (!ctx.symbolTable.isStrictOptionEnabled(HINT_UTF8) + && !ctx.compilerOptions.isUnicodeSource) { + return false; + } + if (currentSegmentHasSourceNonAscii) { + return false; + } + for (int i = 0; i < value.length(); i++) { + if (value.charAt(i) > 255) { + return false; + } } + return true; } /** @@ -639,7 +691,7 @@ public Node parse() { continue; } else { // No heredocs pending, append the newline normally - appendToCurrentSegment(token.text); + appendLiteralToCurrentSegment(token.text); } continue; } @@ -650,7 +702,7 @@ public Node parse() { } // Default: append literal text to current segment - appendToCurrentSegment(text); + appendLiteralToCurrentSegment(text); } if (CompilerOptions.DEBUG_ENABLED) ctx.logDebug("StringSegmentParser.parse: Finished parsing, segments count: " + segments.size()); @@ -1337,4 +1389,4 @@ void handleUnicodeNameEscape() { appendToCurrentSegment("N{" + nameBuilder); } } -} \ No newline at end of file +} diff --git a/src/main/java/org/perlonjava/runtime/operators/Directory.java b/src/main/java/org/perlonjava/runtime/operators/Directory.java index abe071f92..92d828966 100644 --- a/src/main/java/org/perlonjava/runtime/operators/Directory.java +++ b/src/main/java/org/perlonjava/runtime/operators/Directory.java @@ -92,8 +92,14 @@ public static RuntimeScalar chdir(RuntimeScalar runtimeScalar) { File absoluteDir = RuntimeIO.resolveFile(dirName); if (absoluteDir.exists() && absoluteDir.isDirectory()) { - // Normalize the path to remove redundant . and .. components - System.setProperty("user.dir", absoluteDir.toPath().normalize().toString()); + try { + // Match getcwd(3): collapse . and .., and resolve symlinks like + // macOS /var -> /private/var after chdir(). + System.setProperty("user.dir", absoluteDir.getCanonicalPath()); + } catch (IOException e) { + handleIOException(e, "chdir failed"); + return scalarFalse; + } return scalarTrue; } else { // Set errno to ENOENT (No such file or directory) diff --git a/src/main/java/org/perlonjava/runtime/operators/StringOperators.java b/src/main/java/org/perlonjava/runtime/operators/StringOperators.java index 2ed00e770..9cf9e321a 100644 --- a/src/main/java/org/perlonjava/runtime/operators/StringOperators.java +++ b/src/main/java/org/perlonjava/runtime/operators/StringOperators.java @@ -101,12 +101,21 @@ public static RuntimeScalar toBytesString(RuntimeScalar runtimeScalar) { */ private static RuntimeScalar makeStringResult(String value, RuntimeScalar source) { RuntimeScalar result = new RuntimeScalar(value); - if (source.type == RuntimeScalarType.BYTE_STRING) { + if (source.type == RuntimeScalarType.BYTE_STRING && canRepresentAsByteString(value)) { result.type = RuntimeScalarType.BYTE_STRING; } return result; } + private static boolean canRepresentAsByteString(String value) { + for (int i = 0; i < value.length(); i++) { + if (value.charAt(i) > 0xFF) { + return false; + } + } + return true; + } + /** * Escapes all non-alphanumeric characters in the string representation of the given {@link RuntimeScalar}. * @@ -142,6 +151,17 @@ public static RuntimeScalar quotemeta(RuntimeScalar runtimeScalar) { * @return a {@link RuntimeScalar} with the case-folded string */ public static RuntimeScalar fc(RuntimeScalar runtimeScalar) { + if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) { + return caseFoldBytesAsciiOnly(runtimeScalar); + } + return fcUnicode(runtimeScalar); + } + + /** + * Performs full Unicode case folding, including Latin-1 byte strings. + * This is used under the unicode_strings feature. + */ + public static RuntimeScalar fcUnicode(RuntimeScalar runtimeScalar) { String str = runtimeScalar.toString(); // Perform full Unicode case folding using ICU4J CaseMap // Note: We do NOT use NFKC normalization because Perl's fc() preserves @@ -174,6 +194,17 @@ public static RuntimeScalar fcBytes(RuntimeScalar runtimeScalar) { * @return a {@link RuntimeScalar} with the lowercase string */ public static RuntimeScalar lc(RuntimeScalar runtimeScalar) { + if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) { + return caseFoldBytesAsciiOnly(runtimeScalar); + } + return lcUnicode(runtimeScalar); + } + + /** + * Converts to lowercase using Unicode semantics, including Latin-1 byte strings. + * This is used under the unicode_strings feature. + */ + public static RuntimeScalar lcUnicode(RuntimeScalar runtimeScalar) { // Convert the string to lowercase using ICU4J for proper Unicode handling String str = UCharacter.toLowerCase(runtimeScalar.toString()); return makeStringResult(str, runtimeScalar); @@ -196,6 +227,17 @@ public static RuntimeScalar lcBytes(RuntimeScalar runtimeScalar) { * @return a {@link RuntimeScalar} with the first character in lowercase */ public static RuntimeScalar lcfirst(RuntimeScalar runtimeScalar) { + if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) { + return lcfirstBytes(runtimeScalar); + } + return lcfirstUnicode(runtimeScalar); + } + + /** + * Converts the first character to lowercase using Unicode semantics. + * This is used under the unicode_strings feature. + */ + public static RuntimeScalar lcfirstUnicode(RuntimeScalar runtimeScalar) { String str = runtimeScalar.toString(); // Check if the string is empty if (str.isEmpty()) { @@ -218,6 +260,17 @@ public static RuntimeScalar lcfirst(RuntimeScalar runtimeScalar) { * @return a {@link RuntimeScalar} with the uppercase string */ public static RuntimeScalar uc(RuntimeScalar runtimeScalar) { + if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) { + return uppercaseBytesAsciiOnly(runtimeScalar); + } + return ucUnicode(runtimeScalar); + } + + /** + * Converts to uppercase using Unicode semantics, including Latin-1 byte strings. + * This is used under the unicode_strings feature. + */ + public static RuntimeScalar ucUnicode(RuntimeScalar runtimeScalar) { // Convert the string to uppercase using ICU4J for proper Unicode handling String str = UCharacter.toUpperCase(runtimeScalar.toString()); return makeStringResult(str, runtimeScalar); @@ -232,10 +285,21 @@ public static RuntimeScalar uc(RuntimeScalar runtimeScalar) { * @return a {@link RuntimeScalar} with the first character in titlecase */ public static RuntimeScalar ucfirst(RuntimeScalar runtimeScalar) { + if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) { + return ucfirstBytes(runtimeScalar); + } + return ucfirstUnicode(runtimeScalar); + } + + /** + * Converts the first character to titlecase using Unicode semantics. + * This is used under the unicode_strings feature. + */ + public static RuntimeScalar ucfirstUnicode(RuntimeScalar runtimeScalar) { String str = runtimeScalar.toString(); // Check if the string is empty if (str.isEmpty()) { - return new RuntimeScalar(str); + return makeStringResult(str, runtimeScalar); } int firstCodePoint = str.codePointAt(0); int charCount = Character.charCount(firstCodePoint); diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java b/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java index 3485cdc8e..351fdfd98 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java @@ -583,6 +583,7 @@ private static RuntimeScalar encodeWithCharset(String string, Charset charset, S StringBuilder result = new StringBuilder(); CharBuffer input = CharBuffer.wrap(string); ByteBuffer output = ByteBuffer.allocate((int) (string.length() * encoder.maxBytesPerChar()) + 4); + boolean stoppedOnError = false; while (input.hasRemaining()) { encoder.reset(); @@ -600,6 +601,7 @@ private static RuntimeScalar encodeWithCharset(String string, Charset charset, S int badChar = input.get(); // consume the bad character String replacement = handleEncodingError(check, codeRef, badChar, encodingName, true); if (replacement == null) { + stoppedOnError = true; // FB_QUIET: stop processing, put back unprocessed chars if ((check & LEAVE_SRC) == 0 && srcArgs != null && srcArgs.size() > srcArgIndex) { StringBuilder remaining = new StringBuilder(); @@ -635,7 +637,7 @@ private static RuntimeScalar encodeWithCharset(String string, Charset charset, S resultScalar.value = result.toString(); // Update source if LEAVE_SRC is not set (remove processed chars) - if ((check & LEAVE_SRC) == 0 && (check & RETURN_ON_ERR) == 0 + if ((check & LEAVE_SRC) == 0 && !stoppedOnError && srcArgs != null && srcArgs.size() > srcArgIndex) { srcArgs.get(srcArgIndex).set(""); } @@ -707,6 +709,7 @@ private static RuntimeScalar decodeWithCharset(byte[] bytes, Charset charset, St ByteBuffer input = ByteBuffer.wrap(bytes); CharBuffer output = CharBuffer.allocate(bytes.length * 2 + 4); StringBuilder result = new StringBuilder(); + boolean stoppedOnError = false; while (input.hasRemaining()) { decoder.reset(); @@ -724,6 +727,7 @@ private static RuntimeScalar decodeWithCharset(byte[] bytes, Charset charset, St } String replacement = handleEncodingError(check, codeRef, badBytes, encodingName, false); if (replacement == null) { + stoppedOnError = true; // FB_QUIET: stop processing if ((check & LEAVE_SRC) == 0 && srcArgs != null && srcArgs.size() > srcArgIndex) { byte[] remaining = new byte[input.remaining() + malformedLen]; @@ -751,7 +755,7 @@ private static RuntimeScalar decodeWithCharset(byte[] bytes, Charset charset, St result.append(output); // Update source if LEAVE_SRC is not set - if ((check & LEAVE_SRC) == 0 && (check & RETURN_ON_ERR) == 0 + if ((check & LEAVE_SRC) == 0 && !stoppedOnError && srcArgs != null && srcArgs.size() > srcArgIndex) { srcArgs.get(srcArgIndex).set(""); } diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/storable/Hooks.java b/src/main/java/org/perlonjava/runtime/perlmodule/storable/Hooks.java index 12f4d7949..b3f149129 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/storable/Hooks.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/storable/Hooks.java @@ -4,6 +4,8 @@ import java.util.ArrayList; import java.util.List; +import org.perlonjava.runtime.operators.ModuleOperators; +import org.perlonjava.runtime.runtimetypes.GlobalVariable; import org.perlonjava.runtime.mro.InheritanceResolver; import org.perlonjava.runtime.operators.ReferenceOperators; import org.perlonjava.runtime.runtimetypes.RuntimeArray; @@ -130,6 +132,8 @@ public static RuntimeScalar readHook(StorableReader r, StorableContext c) { } } + requireClassForHook(classname); + // Step 6a: if the class defines STORABLE_attach, prefer that over // STORABLE_thaw. The attach hook is a CLASS method that returns a // fully-formed object; we replace the placeholder with the @@ -253,6 +257,19 @@ private static String readClassname(StorableContext c, int flags) { return name; } + private static void requireClassForHook(String classname) { + if (classname == null || classname.isEmpty()) return; + if (classname.equals("main") || classname.equals("UNIVERSAL")) return; + String filename = classname.replace("::", "/").replace("'", "/") + ".pm"; + RuntimeHash inc = GlobalVariable.getGlobalHash("main::INC"); + if (inc.exists(new RuntimeScalar(filename)).getBoolean()) return; + try { + ModuleOperators.require(new RuntimeScalar(filename)); + } catch (Exception ignored) { + // Some blessed data-only packages have no loadable module. + } + } + private static void invokeThaw(String classname, RuntimeScalar self, String frozen, List extraRefs) { RuntimeScalar thawMethod = InheritanceResolver.findMethodInHierarchy( diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/storable/Refs.java b/src/main/java/org/perlonjava/runtime/perlmodule/storable/Refs.java index 1339dc4f7..a4606877c 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/storable/Refs.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/storable/Refs.java @@ -1,7 +1,5 @@ package org.perlonjava.runtime.perlmodule.storable; -import org.perlonjava.runtime.runtimetypes.RuntimeArray; -import org.perlonjava.runtime.runtimetypes.RuntimeHash; import org.perlonjava.runtime.runtimetypes.RuntimeScalar; import org.perlonjava.runtime.runtimetypes.WeakRefRegistry; import org.perlonjava.runtime.runtimetypes.RuntimeScalarType; @@ -154,15 +152,12 @@ public static RuntimeScalar readWeakOverload(StorableReader r, StorableContext c */ private static void installReferent(RuntimeScalar refScalar, RuntimeScalar referent, boolean bodyWasBare) { if (bodyWasBare) { - // Bare-container body: collapse the redundant SX_REF wrap. - // The fresh reference we attach must point at the SAME - // underlying RuntimeArray/RuntimeHash as `referent` so - // mutations through either alias (or backref tags pointing - // at the seen-table entry of the container) stay coherent. - if (referent.value instanceof RuntimeArray arr) { - refScalar.set(arr.createReference()); - } else if (referent.value instanceof RuntimeHash hash) { - refScalar.set(hash.createReference()); + // Bare body: collapse the redundant SX_REF wrap. The fresh + // reference we attach must preserve the SAME reference shape + // and underlying referent, so mutations and blessing remain + // coherent for arrays, hashes, scalar hooks, and backrefs. + if (RuntimeScalarType.isReference(referent)) { + refScalar.set(referent); } else { // Bare flag set but not a recognised container — fall // back to a fresh scalar reference. Defensive; should diff --git a/src/main/java/org/perlonjava/runtime/regex/RegexQuoteMeta.java b/src/main/java/org/perlonjava/runtime/regex/RegexQuoteMeta.java index 12264e054..d63e060bf 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RegexQuoteMeta.java +++ b/src/main/java/org/perlonjava/runtime/regex/RegexQuoteMeta.java @@ -1,17 +1,50 @@ package org.perlonjava.runtime.regex; +import org.perlonjava.runtime.operators.WarnDie; +import org.perlonjava.runtime.runtimetypes.RuntimeScalar; + public class RegexQuoteMeta { public static String escapeQ(String s) { StringBuilder sb = new StringBuilder(); int len = s.length(); int offset = 0; + boolean inCharClass = false; + boolean charClassFirst = false; + boolean escaped = false; // Predefined set of regex metacharacters final String regexMetacharacters = "-.+*?[](){}^$|\\"; while (offset < len) { char c = s.charAt(offset); + if (escaped) { + if (inCharClass && (c == 'Q' || c == 'E')) { + warnUnrecognizedCharClassEscape(c); + sb.append(c); + if (charClassFirst && c != '^') { + charClassFirst = false; + } + escaped = false; + offset++; + continue; + } + sb.append('\\'); + sb.append(c); + escaped = false; + offset++; + continue; + } + if (c == '\\' && offset + 1 < len && s.charAt(offset + 1) == 'Q') { + if (inCharClass) { + warnUnrecognizedCharClassEscape('Q'); + sb.append('Q'); + if (charClassFirst) { + charClassFirst = false; + } + offset += 2; + continue; + } // Skip past \Q offset += 2; @@ -32,11 +65,34 @@ public static String escapeQ(String s) { offset++; } } else { + if (c == '\\') { + escaped = true; + offset++; + continue; + } + if (c == '[' && !inCharClass) { + inCharClass = true; + charClassFirst = true; + } else if (c == ']' && inCharClass && !charClassFirst) { + inCharClass = false; + } else if (inCharClass && charClassFirst && c != '^') { + charClassFirst = false; + } sb.append(c); offset++; } } + if (escaped) { + sb.append('\\'); + } return sb.toString(); } + + private static void warnUnrecognizedCharClassEscape(char c) { + WarnDie.warn( + new RuntimeScalar("Unrecognized escape \\" + c + + " in character class passed through in regex\n"), + new RuntimeScalar("")); + } } diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java index 20e82bcb6..f8b3f6f50 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java @@ -2011,13 +2011,18 @@ private static RuntimeList callCachedInner(int callsiteId, } } - // Prefer PerlSubroutine interface over MethodHandle - if (cachedCode.subroutine != null) { - return cachedCode.subroutine.apply(a, callContext); - } else if (cachedCode.isStatic) { - return (RuntimeList) cachedCode.methodHandle.invoke(a, callContext); - } else { - return (RuntimeList) cachedCode.methodHandle.invoke(cachedCode.codeObject, a, callContext); + MortalList.pushMark(); + try { + // Prefer PerlSubroutine interface over MethodHandle + if (cachedCode.subroutine != null) { + return cachedCode.subroutine.apply(a, callContext); + } else if (cachedCode.isStatic) { + return (RuntimeList) cachedCode.methodHandle.invoke(a, callContext); + } else { + return (RuntimeList) cachedCode.methodHandle.invoke(cachedCode.codeObject, a, callContext); + } + } finally { + MortalList.popMark(); } } catch (Throwable e) { if (e instanceof RuntimeException) throw (RuntimeException) e; diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java index 6e662aa20..339009887 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java @@ -1261,10 +1261,11 @@ private RuntimeScalar setLargeRefCounted(RuntimeScalar value) { // Update ownership: this scalar now owns a refCount iff we incremented. this.refCountOwned = newOwned; - // Flush deferred mortal decrements. This is the primary flush point for - // the mortal mechanism — called after every assignment involving references. + // Flush deferred mortal decrements from the current function scope. + // Entries below the current MortalList mark belong to the caller, and + // must survive through nested calls such as chained AUTOLOAD dispatch. // Cost when MortalList.active is false: one boolean check (trivially predicted). - MortalList.flush(); + MortalList.flushAboveMark(); return this; } diff --git a/src/test/resources/unit/autoload.t b/src/test/resources/unit/autoload.t index 7e0c16fa8..428663ec9 100644 --- a/src/test/resources/unit/autoload.t +++ b/src/test/resources/unit/autoload.t @@ -3,7 +3,7 @@ use warnings; package X; -use Test::More tests => 5; +use Test::More tests => 6; sub x { callme(@_); @@ -53,4 +53,30 @@ b(789); "inherited AUTOLOAD works for multiple method calls"); } +# Regression: cached AUTOLOAD method calls must keep method-chain +# temporaries alive until the AUTOLOAD body has read $AUTOLOAD. Without +# the cached-call mortal boundary, the temporary invocant's DESTROY +# AUTOLOAD overwrote $AUTOLOAD between dispatch and entry. +{ + package ChainAutoloadBase; + sub missing; + sub AUTOLOAD { + our $AUTOLOAD; + my $method = $AUTOLOAD; + $method =~ s/.*:://; + return if $method eq 'DESTROY'; + return "$method:" . ref($_[0]); + } + + package ChainAutoloadChild; + our @ISA = ("ChainAutoloadBase"); + sub new { bless {}, shift } + + package main; + my $first = ChainAutoloadChild->new->missing; + my $second = ChainAutoloadChild->new->missing; + X::is($second, "missing:ChainAutoloadChild", + "cached inherited AUTOLOAD resets \$AUTOLOAD before chained call body"); +} + 1; diff --git a/src/test/resources/unit/encode_fb_quiet.t b/src/test/resources/unit/encode_fb_quiet.t new file mode 100644 index 000000000..5e371b112 --- /dev/null +++ b/src/test/resources/unit/encode_fb_quiet.t @@ -0,0 +1,20 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Test::More tests => 6; +use Encode qw(encode decode find_encoding FB_QUIET LEAVE_SRC); + +my $enc_src = "abc"; +my $encoded = encode("UTF-8", $enc_src, FB_QUIET); +is($encoded, "abc", "encode FB_QUIET returns encoded bytes"); +is($enc_src, "", "encode FB_QUIET consumes source on success"); + +my $dec_src = "abc"; +my $decoded = decode("UTF-8", $dec_src, FB_QUIET); +is($decoded, "abc", "decode FB_QUIET returns decoded string"); +is($dec_src, "", "decode FB_QUIET consumes source on success"); + +my $leave_src = "abc"; +is(decode("UTF-8", $leave_src, FB_QUIET | LEAVE_SRC), "abc", + "decode FB_QUIET with LEAVE_SRC still decodes"); +is($leave_src, "abc", "LEAVE_SRC preserves source"); diff --git a/src/test/resources/unit/operator_overrides.t b/src/test/resources/unit/operator_overrides.t index c9c670df5..524a58f13 100644 --- a/src/test/resources/unit/operator_overrides.t +++ b/src/test/resources/unit/operator_overrides.t @@ -189,4 +189,20 @@ subtest 'sleep operator override' => sub { is_deeply(\@sleep_args, [5], 'sleep override saw the right argument'); }; +subtest 'gethostbyname operator override' => sub { + plan tests => 2; + + BEGIN { + *CORE::GLOBAL::gethostbyname = sub { + die "unexpected list context" if wantarray; + return "mocked:$_[0]"; + }; + } + + is(gethostbyname("www.perl.org."), "mocked:www.perl.org.", + 'gethostbyname overridden globally'); + ok(defined CORE::gethostbyname("localhost"), + 'CORE::gethostbyname still bypasses override'); +}; + done_testing(); diff --git a/src/test/resources/unit/regex/regex_charclass.t b/src/test/resources/unit/regex/regex_charclass.t index 4e4dd588f..731f90069 100644 --- a/src/test/resources/unit/regex/regex_charclass.t +++ b/src/test/resources/unit/regex/regex_charclass.t @@ -60,4 +60,38 @@ subtest 'bracketed \c? matches DEL only' => sub { ok("color" =~ /colou?r/, "? quantifier still works (absent)"); }; +subtest 'bracketed \Q...\E applies quotemeta' => sub { + my @warnings; + local $SIG{__WARN__} = sub { push @warnings, @_ }; + + my $re = qr/[\Qabc\E]/; + ok("a" =~ $re, '\Q...\E contents match in a character class'); + ok("b" =~ $re, 'middle contents match'); + ok("c" =~ $re, 'last contents match'); + ok("Q" !~ $re, '\Q marker is not a literal Q'); + ok("E" !~ $re, '\E marker is not a literal E'); + ok("d" !~ $re, 'characters outside the class do not match'); + + my $bracket = qr/[a\Q]\E]c/; + ok("ac" =~ $bracket, 'plain character still matches'); + ok("]c" =~ $bracket, 'quoted closing bracket remains inside the class'); + is(join("", @warnings), "", 'no warnings are emitted'); +}; + +subtest 'interpolated bracketed \Q and \E are literal with warnings' => sub { + my @warnings; + local $SIG{__WARN__} = sub { push @warnings, @_ }; + + my $chars = '\Qabc\E'; + my $re = qr/[$chars]/; + ok("Q" =~ $re, 'interpolated \Q is passed through as literal Q'); + ok("E" =~ $re, 'interpolated \E is passed through as literal E'); + ok("a" =~ $re, 'interpolated character class contents still match'); + ok("d" !~ $re, 'characters outside the interpolated class do not match'); + like(join("", @warnings), qr/Unrecognized escape \\Q in character class passed through in regex/, + 'interpolated \Q warning is emitted'); + like(join("", @warnings), qr/Unrecognized escape \\E in character class passed through in regex/, + 'interpolated \E warning is emitted'); +}; + done_testing(); diff --git a/src/test/resources/unit/storable.t b/src/test/resources/unit/storable.t index 7381df039..e6d6f5f42 100644 --- a/src/test/resources/unit/storable.t +++ b/src/test/resources/unit/storable.t @@ -7,7 +7,7 @@ use File::Temp qw(tempfile); use Storable qw(store retrieve nstore freeze thaw nfreeze dclone); # Test plan -plan tests => 10; +plan tests => 11; subtest 'Basic scalar serialization' => sub { plan tests => 6; @@ -319,4 +319,36 @@ subtest 'STORABLE_freeze nested hook cookie round-trip (binary-safe)' => sub { isa_ok($thawed->{inner}, '_StTestInner', 'inner hooked object survives'); }; +subtest 'STORABLE_freeze scalar hook keeps one ref level' => sub { + plan tests => 3; + + package _StScalarHook; + use overload '""' => sub { ${ $_[0] } }, fallback => 1; + + sub new { + my ($class, $value) = @_; + return bless \$value, $class; + } + + sub STORABLE_freeze { + my ($self, $cloning) = @_; + return $$self; + } + + sub STORABLE_thaw { + my ($self, $cloning, $ice) = @_; + $$self = $ice; + } + + package main; + + my $frozen = freeze([ _StScalarHook->new("http://search.cpan.org") ]); + my $thawed = thaw($frozen); + my $obj = $thawed->[0]; + + is(ref($obj), '_StScalarHook', 'scalar hook object class survives'); + is($$obj, 'http://search.cpan.org', 'scalar hook referent survives'); + is("$obj", 'http://search.cpan.org', 'scalar hook overload sees one ref level'); +}; + done_testing(); diff --git a/src/test/resources/unit/utf8_pragma.t b/src/test/resources/unit/utf8_pragma.t index 5e3d733ba..513b2a290 100644 --- a/src/test/resources/unit/utf8_pragma.t +++ b/src/test/resources/unit/utf8_pragma.t @@ -179,11 +179,18 @@ subtest 'Escape sequences vs source encoding' => sub { my $hex = "\x{100}"; # Creates Unicode string is(length($hex), 1, 'Unicode escape creates single character with utf8'); is(ord($hex), 256, 'Unicode escape value with utf8'); - + + my $byte_escape = "B\xFCcher"; + ok(!utf8::is_utf8($byte_escape), 'fixed byte escape is not UTF-8 flagged under utf8'); + is(lc("\xC3\xBCri"), "\xC3\xBCri", 'lc preserves non-ASCII bytes from fixed byte escapes'); + # Literal source is treated as characters my $literal = "Ā"; is(length($literal), 1, 'Literal Ā is 1 character with utf8'); is(ord($literal), 256, 'Literal Ā has correct value with utf8'); + + my $latin1_literal = "ü"; + ok(utf8::is_utf8($latin1_literal), 'literal non-ASCII source is UTF-8 flagged under utf8'); } }; @@ -257,4 +264,3 @@ subtest 'Octet vs character semantics' => sub { }; done_testing(); -