Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,23 @@ PALTEST(locale_info_MultiByteToWideChar_test4_paltest_multibytetowidechar_test4,
free(wideBuffer);
}

{
// U+6F22 — code unit > U+00FF catches long-code-path bugs that move
// only the low byte instead of performing a full 16-bit byte swap.
const char utf8[] = "\xE6\xBC\xA2";
WCHAR wide[1] = { 0 };
size_t n = minipal_convert_utf8_to_utf16(utf8, sizeof(utf8) - 1, (CHAR16_T*)wide, 1, 0);
if (n != 1 || wide[0] != 0x6F22)
Fail("utf8->utf16 produced 0x%04x (n=%zu)\n", wide[0], n);

#if BIGENDIAN
wide[0] = 0;
n = minipal_convert_utf8_to_utf16(utf8, sizeof(utf8) - 1, (CHAR16_T*)wide, 1, MINIPAL_TREAT_AS_LITTLE_ENDIAN);
if (n != 1 || wide[0] != 0x226F)
Fail("treat-as-LE utf8->utf16 produced 0x%04x (n=%zu)\n", wide[0], n);
#endif
}

#if BIGENDIAN
{
const char* ascii = "ABCDEFGHIJKLMNOPQRSTUVWXYZ123456";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,27 @@ PALTEST(locale_info_WideCharToMultiByte_test5_paltest_widechartomultibyte_test5,
free(utf8Buffer);
}

{
// U+6F22 — code unit > U+00FF catches long-code-path bugs that move
// only the low byte instead of performing a full 16-bit byte swap.
const WCHAR srcNative[1] = { 0x6F22 };
CHAR utf8[4] = { 0 };
size_t n = minipal_convert_utf16_to_utf8((const CHAR16_T*)srcNative, 1, utf8, sizeof(utf8), 0);
if (n != 3 || memcmp(utf8, "\xE6\xBC\xA2", 3) != 0)
Fail("utf16->utf8 produced %02x %02x %02x (n=%zu)\n",
(unsigned char)utf8[0], (unsigned char)utf8[1], (unsigned char)utf8[2], n);

#if BIGENDIAN
// Stored little-endian (22 6F) reads as BE word 0x226F.
const WCHAR srcLE[1] = { 0x226F };
memset(utf8, 0, sizeof(utf8));
n = minipal_convert_utf16_to_utf8((const CHAR16_T*)srcLE, 1, utf8, sizeof(utf8), MINIPAL_TREAT_AS_LITTLE_ENDIAN);
if (n != 3 || memcmp(utf8, "\xE6\xBC\xA2", 3) != 0)
Fail("treat-as-LE utf16->utf8 produced %02x %02x %02x (n=%zu)\n",
(unsigned char)utf8[0], (unsigned char)utf8[1], (unsigned char)utf8[2], n);
#endif
}

#if BIGENDIAN
{
const char* expected = "ABCDEFGHIJKLMNOPQRSTUVWXYZ123456";
Expand Down
15 changes: 6 additions & 9 deletions src/native/minipal/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,7 @@ static size_t GetChars(UTF8Encoding* self, unsigned char* bytes, size_t byteCoun
}
#if BIGENDIAN
if (self->treatAsLE)
*pTarget = ((CHAR16_T)ch)<<8;
*pTarget = (((CHAR16_T)ch) << 8 | ((CHAR16_T)ch) >> 8);
else
#endif
*pTarget = (CHAR16_T)ch;
Expand Down Expand Up @@ -1438,7 +1438,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
// compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
#if BIGENDIAN
if (self->treatAsLE)
ch = (*pSrc) >> 8;
ch = (CHAR16_T)(((*pSrc) >> 8) | ((*pSrc) << 8));
else
#endif
ch = *pSrc;
Expand Down Expand Up @@ -1581,7 +1581,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
{
#if BIGENDIAN
if (self->treatAsLE)
ch = (*pSrc) >> 8;
ch = (CHAR16_T)(((*pSrc) >> 8) | ((*pSrc) << 8));
else
#endif
ch = *pSrc;
Expand Down Expand Up @@ -1619,7 +1619,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
{
#if BIGENDIAN
if (self->treatAsLE)
ch = (*pSrc) >> 8;
ch = (CHAR16_T)(((*pSrc) >> 8) | ((*pSrc) << 8));
else
#endif
ch = *pSrc;
Expand All @@ -1635,7 +1635,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
{
#if BIGENDIAN
if (self->treatAsLE)
ch = (*pSrc) >> 8;
ch = (CHAR16_T)(((*pSrc) >> 8) | ((*pSrc) << 8));
else
#endif
ch = *pSrc;
Expand All @@ -1652,10 +1652,7 @@ static size_t GetBytes(UTF8Encoding* self, CHAR16_T* chars, size_t charCount, un
ch = *(int*)pSrc;
int chc = *(int*)(pSrc + 2);
#if BIGENDIAN
if (self->treatAsLE){
if (((ch | chc) & (int)0x80FF80FF) != 0) goto LongCodeWithMask;
}
else
if (((ch | chc) & (int)0x80FF80FF) != 0) goto LongCodeWithMask;
#else
if (((ch | chc) & (int)0xFF80FF80) != 0) goto LongCodeWithMask;
#endif
Expand Down
Loading