Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1384,6 +1384,8 @@ public void visit(StringNode node) {
short opcode;
if (node.isVString) {
opcode = Opcodes.LOAD_VSTRING;
} else if (node.forceByteString) {
opcode = Opcodes.LOAD_BYTE_STRING;
} else if (emitterContext != null && emitterContext.symbolTable != null
&& !emitterContext.symbolTable.isStrictOptionEnabled(Strict.HINT_UTF8)
&& !emitterContext.compilerOptions.isUnicodeSource) {
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/perlonjava/backend/jvm/EmitLiteral.java
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ public static void emitString(EmitterContext ctx, StringNode node) {
return;
}

if (!ctx.symbolTable.isStrictOptionEnabled(HINT_UTF8) && !ctx.compilerOptions.isUnicodeSource) {
if (node.forceByteString
|| (!ctx.symbolTable.isStrictOptionEnabled(HINT_UTF8) && !ctx.compilerOptions.isUnicodeSource)) {
// Under `no utf8` - create an octet string, unless it contains wide characters (> 255)
// Wide characters (like \x{100}) force the string to be UTF-8 even without `use utf8`
boolean hasWideChars = false;
Expand Down
17 changes: 16 additions & 1 deletion src/main/java/org/perlonjava/frontend/astnode/StringNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ public class StringNode extends AbstractNode {
*/
public final boolean isVString;

/**
* Force this literal to be emitted as a byte string even in a C<use utf8>
* scope. Perl keeps ASCII and fixed-byte escapes such as "\xFC" unupgraded;
* actual non-ASCII source characters still use normal UTF-8 string emission.
*/
public final boolean forceByteString;

/**
* Constructs a new StringNode with the specified string value.
*
Expand All @@ -29,6 +36,7 @@ public StringNode(String value, int tokenIndex) {
this.value = value;
this.tokenIndex = tokenIndex;
this.isVString = false;
this.forceByteString = false;
}

/**
Expand All @@ -42,6 +50,14 @@ public StringNode(String value, boolean isVString, int tokenIndex) {
this.value = value;
this.tokenIndex = tokenIndex;
this.isVString = isVString;
this.forceByteString = false;
}

public StringNode(String value, boolean isVString, boolean forceByteString, int tokenIndex) {
this.value = value;
this.tokenIndex = tokenIndex;
this.isVString = isVString;
this.forceByteString = forceByteString;
}

/**
Expand All @@ -67,4 +83,3 @@ public void accept(Visitor visitor) {
visitor.visit(this);
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class ParserTables {
"die", "do", "dump",
"exec", "exit",
"fork",
"getpwuid", "glob",
"gethostbyname", "getpwuid", "glob",
"hex",
"kill",
"oct", "open",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,10 @@ private void parseDoubleQuotedEscapesRegex() {
switch (escape) {
// Case modification end marker
case "E" -> {
if (isInsideRegexCharClass()) {
appendToCurrentSegment("\\E");
return;
}
// Flush any pending literal text
flushCurrentSegment();
// Pop and apply the most recent case modifier
Expand All @@ -436,6 +440,10 @@ private void parseDoubleQuotedEscapesRegex() {

// Quotemeta modifier
case "Q" -> {
if (isInsideRegexCharClass()) {
appendToCurrentSegment("\\Q");
return;
}
flushCurrentSegment();
caseModifiers.push(new CaseModifier("Q", false));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import static org.perlonjava.frontend.parser.ParseBlock.parseBlock;
import static org.perlonjava.frontend.parser.Variable.parseArrayHashAccess;
import static org.perlonjava.runtime.perlmodule.Strict.HINT_UTF8;

/**
* Base class for parsing strings with segments and variable interpolation.
Expand Down Expand Up @@ -74,6 +75,9 @@ public abstract class StringSegmentParser {
* Buffer for accumulating literal text segments
*/
protected final StringBuilder currentSegment;
private boolean currentSegmentHasSourceNonAscii = false;
private boolean inRegexCharClass = false;
private boolean regexCharClassFirst = false;
/**
* List of AST nodes representing string segments (literals and interpolated expressions)
*/
Expand Down Expand Up @@ -128,6 +132,35 @@ protected void appendToCurrentSegment(String text) {
currentSegment.append(text);
}

protected void appendLiteralToCurrentSegment(String text) {
appendToCurrentSegment(text);
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
updateRegexCharClassState(c);
if (c > 127) {
currentSegmentHasSourceNonAscii = true;
}
}
}

protected boolean isInsideRegexCharClass() {
return isRegex && inRegexCharClass;
}

private void updateRegexCharClassState(char c) {
if (!isRegex) {
return;
}
if (c == '[' && !inRegexCharClass) {
inRegexCharClass = true;
regexCharClassFirst = true;
} else if (c == ']' && inRegexCharClass && !regexCharClassFirst) {
inRegexCharClass = false;
} else if (inRegexCharClass && regexCharClassFirst && c != '^') {
regexCharClassFirst = false;
}
}

/**
* Adds a string segment node to the segments list.
*
Expand All @@ -150,9 +183,28 @@ protected void addStringSegment(Node node) {
*/
protected void flushCurrentSegment() {
if (!currentSegment.isEmpty()) {
addStringSegment(new StringNode(currentSegment.toString(), tokenIndex));
String value = currentSegment.toString();
boolean forceByteString = shouldForceByteStringLiteral(value);
addStringSegment(new StringNode(value, false, forceByteString, tokenIndex));
currentSegment.setLength(0);
currentSegmentHasSourceNonAscii = false;
}
}

private boolean shouldForceByteStringLiteral(String value) {
if (!ctx.symbolTable.isStrictOptionEnabled(HINT_UTF8)
&& !ctx.compilerOptions.isUnicodeSource) {
return false;
}
if (currentSegmentHasSourceNonAscii) {
return false;
}
for (int i = 0; i < value.length(); i++) {
if (value.charAt(i) > 255) {
return false;
}
}
return true;
}

/**
Expand Down Expand Up @@ -639,7 +691,7 @@ public Node parse() {
continue;
} else {
// No heredocs pending, append the newline normally
appendToCurrentSegment(token.text);
appendLiteralToCurrentSegment(token.text);
}
continue;
}
Expand All @@ -650,7 +702,7 @@ public Node parse() {
}

// Default: append literal text to current segment
appendToCurrentSegment(text);
appendLiteralToCurrentSegment(text);
}

if (CompilerOptions.DEBUG_ENABLED) ctx.logDebug("StringSegmentParser.parse: Finished parsing, segments count: " + segments.size());
Expand Down Expand Up @@ -1337,4 +1389,4 @@ void handleUnicodeNameEscape() {
appendToCurrentSegment("N{" + nameBuilder);
}
}
}
}
10 changes: 8 additions & 2 deletions src/main/java/org/perlonjava/runtime/operators/Directory.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,14 @@ public static RuntimeScalar chdir(RuntimeScalar runtimeScalar) {
File absoluteDir = RuntimeIO.resolveFile(dirName);

if (absoluteDir.exists() && absoluteDir.isDirectory()) {
// Normalize the path to remove redundant . and .. components
System.setProperty("user.dir", absoluteDir.toPath().normalize().toString());
try {
// Match getcwd(3): collapse . and .., and resolve symlinks like
// macOS /var -> /private/var after chdir().
System.setProperty("user.dir", absoluteDir.getCanonicalPath());
} catch (IOException e) {
handleIOException(e, "chdir failed");
return scalarFalse;
}
return scalarTrue;
} else {
// Set errno to ENOENT (No such file or directory)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ public static RuntimeScalar quotemeta(RuntimeScalar runtimeScalar) {
* @return a {@link RuntimeScalar} with the case-folded string
*/
public static RuntimeScalar fc(RuntimeScalar runtimeScalar) {
if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) {
return caseFoldBytesAsciiOnly(runtimeScalar);
}
String str = runtimeScalar.toString();
// Perform full Unicode case folding using ICU4J CaseMap
// Note: We do NOT use NFKC normalization because Perl's fc() preserves
Expand Down Expand Up @@ -174,6 +177,9 @@ public static RuntimeScalar fcBytes(RuntimeScalar runtimeScalar) {
* @return a {@link RuntimeScalar} with the lowercase string
*/
public static RuntimeScalar lc(RuntimeScalar runtimeScalar) {
if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) {
return caseFoldBytesAsciiOnly(runtimeScalar);
}
// Convert the string to lowercase using ICU4J for proper Unicode handling
String str = UCharacter.toLowerCase(runtimeScalar.toString());
return makeStringResult(str, runtimeScalar);
Expand All @@ -196,6 +202,9 @@ public static RuntimeScalar lcBytes(RuntimeScalar runtimeScalar) {
* @return a {@link RuntimeScalar} with the first character in lowercase
*/
public static RuntimeScalar lcfirst(RuntimeScalar runtimeScalar) {
if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) {
return lcfirstBytes(runtimeScalar);
}
String str = runtimeScalar.toString();
// Check if the string is empty
if (str.isEmpty()) {
Expand All @@ -218,6 +227,9 @@ public static RuntimeScalar lcfirst(RuntimeScalar runtimeScalar) {
* @return a {@link RuntimeScalar} with the uppercase string
*/
public static RuntimeScalar uc(RuntimeScalar runtimeScalar) {
if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) {
return uppercaseBytesAsciiOnly(runtimeScalar);
}
// Convert the string to uppercase using ICU4J for proper Unicode handling
String str = UCharacter.toUpperCase(runtimeScalar.toString());
return makeStringResult(str, runtimeScalar);
Expand All @@ -232,6 +244,9 @@ public static RuntimeScalar uc(RuntimeScalar runtimeScalar) {
* @return a {@link RuntimeScalar} with the first character in titlecase
*/
public static RuntimeScalar ucfirst(RuntimeScalar runtimeScalar) {
if (runtimeScalar.type == RuntimeScalarType.BYTE_STRING) {
return ucfirstBytes(runtimeScalar);
}
String str = runtimeScalar.toString();
// Check if the string is empty
if (str.isEmpty()) {
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/org/perlonjava/runtime/perlmodule/Encode.java
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@ private static RuntimeScalar encodeWithCharset(String string, Charset charset, S
StringBuilder result = new StringBuilder();
CharBuffer input = CharBuffer.wrap(string);
ByteBuffer output = ByteBuffer.allocate((int) (string.length() * encoder.maxBytesPerChar()) + 4);
boolean stoppedOnError = false;

while (input.hasRemaining()) {
encoder.reset();
Expand All @@ -600,6 +601,7 @@ private static RuntimeScalar encodeWithCharset(String string, Charset charset, S
int badChar = input.get(); // consume the bad character
String replacement = handleEncodingError(check, codeRef, badChar, encodingName, true);
if (replacement == null) {
stoppedOnError = true;
// FB_QUIET: stop processing, put back unprocessed chars
if ((check & LEAVE_SRC) == 0 && srcArgs != null && srcArgs.size() > srcArgIndex) {
StringBuilder remaining = new StringBuilder();
Expand Down Expand Up @@ -635,7 +637,7 @@ private static RuntimeScalar encodeWithCharset(String string, Charset charset, S
resultScalar.value = result.toString();

// Update source if LEAVE_SRC is not set (remove processed chars)
if ((check & LEAVE_SRC) == 0 && (check & RETURN_ON_ERR) == 0
if ((check & LEAVE_SRC) == 0 && !stoppedOnError
&& srcArgs != null && srcArgs.size() > srcArgIndex) {
srcArgs.get(srcArgIndex).set("");
}
Expand Down Expand Up @@ -707,6 +709,7 @@ private static RuntimeScalar decodeWithCharset(byte[] bytes, Charset charset, St
ByteBuffer input = ByteBuffer.wrap(bytes);
CharBuffer output = CharBuffer.allocate(bytes.length * 2 + 4);
StringBuilder result = new StringBuilder();
boolean stoppedOnError = false;

while (input.hasRemaining()) {
decoder.reset();
Expand All @@ -724,6 +727,7 @@ private static RuntimeScalar decodeWithCharset(byte[] bytes, Charset charset, St
}
String replacement = handleEncodingError(check, codeRef, badBytes, encodingName, false);
if (replacement == null) {
stoppedOnError = true;
// FB_QUIET: stop processing
if ((check & LEAVE_SRC) == 0 && srcArgs != null && srcArgs.size() > srcArgIndex) {
byte[] remaining = new byte[input.remaining() + malformedLen];
Expand Down Expand Up @@ -751,7 +755,7 @@ private static RuntimeScalar decodeWithCharset(byte[] bytes, Charset charset, St
result.append(output);

// Update source if LEAVE_SRC is not set
if ((check & LEAVE_SRC) == 0 && (check & RETURN_ON_ERR) == 0
if ((check & LEAVE_SRC) == 0 && !stoppedOnError
&& srcArgs != null && srcArgs.size() > srcArgIndex) {
srcArgs.get(srcArgIndex).set("");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import java.util.ArrayList;
import java.util.List;

import org.perlonjava.runtime.operators.ModuleOperators;
import org.perlonjava.runtime.runtimetypes.GlobalVariable;
import org.perlonjava.runtime.mro.InheritanceResolver;
import org.perlonjava.runtime.operators.ReferenceOperators;
import org.perlonjava.runtime.runtimetypes.RuntimeArray;
Expand Down Expand Up @@ -130,6 +132,8 @@ public static RuntimeScalar readHook(StorableReader r, StorableContext c) {
}
}

requireClassForHook(classname);

// Step 6a: if the class defines STORABLE_attach, prefer that over
// STORABLE_thaw. The attach hook is a CLASS method that returns a
// fully-formed object; we replace the placeholder with the
Expand Down Expand Up @@ -253,6 +257,19 @@ private static String readClassname(StorableContext c, int flags) {
return name;
}

private static void requireClassForHook(String classname) {
if (classname == null || classname.isEmpty()) return;
if (classname.equals("main") || classname.equals("UNIVERSAL")) return;
String filename = classname.replace("::", "/").replace("'", "/") + ".pm";
RuntimeHash inc = GlobalVariable.getGlobalHash("main::INC");
if (inc.exists(new RuntimeScalar(filename)).getBoolean()) return;
try {
ModuleOperators.require(new RuntimeScalar(filename));
} catch (Exception ignored) {
// Some blessed data-only packages have no loadable module.
}
}

private static void invokeThaw(String classname, RuntimeScalar self,
String frozen, List<RuntimeScalar> extraRefs) {
RuntimeScalar thawMethod = InheritanceResolver.findMethodInHierarchy(
Expand Down
17 changes: 6 additions & 11 deletions src/main/java/org/perlonjava/runtime/perlmodule/storable/Refs.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package org.perlonjava.runtime.perlmodule.storable;

import org.perlonjava.runtime.runtimetypes.RuntimeArray;
import org.perlonjava.runtime.runtimetypes.RuntimeHash;
import org.perlonjava.runtime.runtimetypes.RuntimeScalar;
import org.perlonjava.runtime.runtimetypes.WeakRefRegistry;
import org.perlonjava.runtime.runtimetypes.RuntimeScalarType;
Expand Down Expand Up @@ -154,15 +152,12 @@ public static RuntimeScalar readWeakOverload(StorableReader r, StorableContext c
*/
private static void installReferent(RuntimeScalar refScalar, RuntimeScalar referent, boolean bodyWasBare) {
if (bodyWasBare) {
// Bare-container body: collapse the redundant SX_REF wrap.
// The fresh reference we attach must point at the SAME
// underlying RuntimeArray/RuntimeHash as `referent` so
// mutations through either alias (or backref tags pointing
// at the seen-table entry of the container) stay coherent.
if (referent.value instanceof RuntimeArray arr) {
refScalar.set(arr.createReference());
} else if (referent.value instanceof RuntimeHash hash) {
refScalar.set(hash.createReference());
// Bare body: collapse the redundant SX_REF wrap. The fresh
// reference we attach must preserve the SAME reference shape
// and underlying referent, so mutations and blessing remain
// coherent for arrays, hashes, scalar hooks, and backrefs.
if (RuntimeScalarType.isReference(referent)) {
refScalar.set(referent);
} else {
// Bare flag set but not a recognised container — fall
// back to a fresh scalar reference. Defensive; should
Expand Down
Loading
Loading