diff --git a/src/util/shiftjis.cpp b/src/util/shiftjis.cpp index 669a9bb13..593d6110b 100644 --- a/src/util/shiftjis.cpp +++ b/src/util/shiftjis.cpp @@ -3,274 +3,9 @@ #include #include -// https://github.com/bucanero/apollo-ps3/commit/b8e52b021239d40f2ba6945d7352345f4457b7b7 -extern const unsigned char shiftJIS_convTable[25088]; - -void sjis2ascii(char* bData) -{ - std::uint16_t ch; - int i, j = 0; - int len = static_cast(std::strlen(bData)); - - for (i = 0; i < len; i += 2) - { - ch = (bData[i] << 8) | bData[i + 1]; - - // 'A' .. 'Z' - // '0' .. '9' - if ((ch >= 0x8260 && ch <= 0x8279) || (ch >= 0x824F && ch <= 0x8258)) - { - bData[j++] = (ch & 0xFF) - 0x1F; - continue; - } - - // 'a' .. 'z' - if (ch >= 0x8281 && ch <= 0x829A) - { - bData[j++] = (ch & 0xFF) - 0x20; - continue; - } - - switch (ch) - { - case 0x0000: // End of the string - bData[j] = 0; - return; - - case 0x8140: - bData[j++] = ' '; - break; - - case 0x8143: - bData[j++] = ','; - break; - - case 0x8144: - bData[j++] = '.'; - break; - - case 0x8145: - bData[j++] = '\xFA'; - break; - - case 0x8146: - bData[j++] = ':'; - break; - - case 0x8147: - bData[j++] = ';'; - break; - - case 0x8148: - bData[j++] = '?'; - break; - - case 0x8149: - bData[j++] = '!'; - break; - - case 0x814F: - bData[j++] = '^'; - break; - - case 0x8151: - bData[j++] = '_'; - break; - - case 0x815B: - case 0x815C: - case 0x815D: - bData[j++] = '-'; - break; - - case 0x815E: - bData[j++] = '/'; - break; - - case 0x815F: - bData[j++] = '\\'; - break; - - case 0x8160: - bData[j++] = '~'; - break; - - case 0x8161: - bData[j++] = '|'; - break; - - case 0x8168: - bData[j++] = '"'; - break; - - case 0x8169: - bData[j++] = '('; - break; - - case 0x816A: - bData[j++] = ')'; - break; - - case 0x816D: - bData[j++] = '['; - break; - - case 0x816E: - bData[j++] = ']'; - break; - - case 0x816F: - bData[j++] = '{'; - break; - - case 0x8170: - bData[j++] = '}'; - break; - - case 0x817B: - bData[j++] = '+'; - break; - - case 0x817C: - bData[j++] = '-'; - break; - - case 0x817D: - bData[j++] = '\xF1'; - break; - - case 0x817E: - bData[j++] = '*'; - break; - - case 0x8180: - bData[j++] = '\xF6'; - break; - - case 0x8181: - bData[j++] = '='; - break; - - case 0x8183: - bData[j++] = '<'; - break; - - case 0x8184: - bData[j++] = '>'; - break; - - case 0x818A: - bData[j++] = '\xF8'; - break; - - case 0x818B: - bData[j++] = '\''; - break; - - case 0x818C: - bData[j++] = '"'; - break; - - case 0x8190: - bData[j++] = '$'; - break; - - case 0x8193: - bData[j++] = '%'; - break; - - case 0x8194: - bData[j++] = '#'; - break; - - case 0x8195: - bData[j++] = '&'; - break; - - case 0x8196: - bData[j++] = '*'; - break; - - case 0x8197: - bData[j++] = '@'; - break; - - // Character not found - default: - bData[j++] = bData[i]; - bData[j++] = bData[i + 1]; - break; - } - } - - bData[j] = 0; - return; -} - -char* sjis2utf8(char* input) -{ - // Simplify the input and decode standard ASCII characters - sjis2ascii(input); - - size_t len = static_cast(std::strlen(input)); - char* output = reinterpret_cast( - std::malloc(3 * len)); // ShiftJis won't give 4byte UTF8, so max. 3 byte per input char are needed - size_t indexInput = 0, indexOutput = 0; - - while (indexInput < len) - { - char arraySection = ((uint8_t)input[indexInput]) >> 4; - - size_t arrayOffset; - if (arraySection == 0x8) - arrayOffset = 0x100; // these are two-byte shiftjis - else if (arraySection == 0x9) - arrayOffset = 0x1100; - else if (arraySection == 0xE) - arrayOffset = 0x2100; - else - arrayOffset = 0; // this is one byte shiftjis - - // determining real array offset - if (arrayOffset) - { - arrayOffset += (((uint8_t)input[indexInput]) & 0xf) << 8; - indexInput++; - if (indexInput >= len) - break; - } - arrayOffset += (uint8_t)input[indexInput++]; - arrayOffset <<= 1; - - // unicode number is... - uint16_t unicodeValue = (shiftJIS_convTable[arrayOffset] << 8) | shiftJIS_convTable[arrayOffset + 1]; - - // converting to UTF8 - if (unicodeValue < 0x80) - { - output[indexOutput++] = static_cast(unicodeValue); - } - else if (unicodeValue < 0x800) - { - output[indexOutput++] = 0xC0 | static_cast((unicodeValue >> 6)); - output[indexOutput++] = 0x80 | static_cast((unicodeValue & 0x3f)); - } - else - { - output[indexOutput++] = 0xE0 | static_cast((unicodeValue >> 12)); - output[indexOutput++] = 0x80 | static_cast(((unicodeValue & 0xfff) >> 6)); - output[indexOutput++] = 0x80 | static_cast((unicodeValue & 0x3f)); - } - } - - // remove the unnecessary bytes - output[indexOutput] = 0; - return output; -} - // https://stackoverflow.com/questions/33165171/c-shiftjis-to-utf8-conversion -const unsigned char shiftJIS_convTable[25088] = { +static const unsigned char shiftJIS_convTable[25088] = { 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x0b, 0x00, 0x0c, 0x00, 0x0d, 0x00, 0x0e, 0x00, 0x0f, 0x00, 0x10, 0x00, 0x11, 0x00, 0x12, 0x00, 0x13, 0x00, 0x14, 0x00, 0x15, 0x00, 0x16, 0x00, 0x17, 0x00, 0x18, 0x00, 0x19, 0x00, 0x1a, 0x00, 0x1b, 0x00, @@ -1593,4 +1328,268 @@ const unsigned char shiftJIS_convTable[25088] = { 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, -}; \ No newline at end of file +}; + +// https://github.com/bucanero/apollo-ps3/commit/b8e52b021239d40f2ba6945d7352345f4457b7b7 + +void sjis2ascii(char* bData) +{ + std::uint16_t ch; + int i, j = 0; + int len = static_cast(std::strlen(bData)); + + for (i = 0; i < len; i += 2) + { + ch = (bData[i] << 8) | bData[i + 1]; + + // 'A' .. 'Z' + // '0' .. '9' + if ((ch >= 0x8260 && ch <= 0x8279) || (ch >= 0x824F && ch <= 0x8258)) + { + bData[j++] = (ch & 0xFF) - 0x1F; + continue; + } + + // 'a' .. 'z' + if (ch >= 0x8281 && ch <= 0x829A) + { + bData[j++] = (ch & 0xFF) - 0x20; + continue; + } + + switch (ch) + { + case 0x0000: // End of the string + bData[j] = 0; + return; + + case 0x8140: + bData[j++] = ' '; + break; + + case 0x8143: + bData[j++] = ','; + break; + + case 0x8144: + bData[j++] = '.'; + break; + + case 0x8145: + bData[j++] = '\xFA'; + break; + + case 0x8146: + bData[j++] = ':'; + break; + + case 0x8147: + bData[j++] = ';'; + break; + + case 0x8148: + bData[j++] = '?'; + break; + + case 0x8149: + bData[j++] = '!'; + break; + + case 0x814F: + bData[j++] = '^'; + break; + + case 0x8151: + bData[j++] = '_'; + break; + + case 0x815B: + case 0x815C: + case 0x815D: + bData[j++] = '-'; + break; + + case 0x815E: + bData[j++] = '/'; + break; + + case 0x815F: + bData[j++] = '\\'; + break; + + case 0x8160: + bData[j++] = '~'; + break; + + case 0x8161: + bData[j++] = '|'; + break; + + case 0x8168: + bData[j++] = '"'; + break; + + case 0x8169: + bData[j++] = '('; + break; + + case 0x816A: + bData[j++] = ')'; + break; + + case 0x816D: + bData[j++] = '['; + break; + + case 0x816E: + bData[j++] = ']'; + break; + + case 0x816F: + bData[j++] = '{'; + break; + + case 0x8170: + bData[j++] = '}'; + break; + + case 0x817B: + bData[j++] = '+'; + break; + + case 0x817C: + bData[j++] = '-'; + break; + + case 0x817D: + bData[j++] = '\xF1'; + break; + + case 0x817E: + bData[j++] = '*'; + break; + + case 0x8180: + bData[j++] = '\xF6'; + break; + + case 0x8181: + bData[j++] = '='; + break; + + case 0x8183: + bData[j++] = '<'; + break; + + case 0x8184: + bData[j++] = '>'; + break; + + case 0x818A: + bData[j++] = '\xF8'; + break; + + case 0x818B: + bData[j++] = '\''; + break; + + case 0x818C: + bData[j++] = '"'; + break; + + case 0x8190: + bData[j++] = '$'; + break; + + case 0x8193: + bData[j++] = '%'; + break; + + case 0x8194: + bData[j++] = '#'; + break; + + case 0x8195: + bData[j++] = '&'; + break; + + case 0x8196: + bData[j++] = '*'; + break; + + case 0x8197: + bData[j++] = '@'; + break; + + // Character not found + default: + bData[j++] = bData[i]; + bData[j++] = bData[i + 1]; + break; + } + } + + bData[j] = 0; + return; +} + +char* sjis2utf8(char* input) +{ + // Simplify the input and decode standard ASCII characters + sjis2ascii(input); + + size_t len = static_cast(std::strlen(input)); + char* output = reinterpret_cast( + std::malloc(3 * len)); // ShiftJis won't give 4byte UTF8, so max. 3 byte per input char are needed + size_t indexInput = 0, indexOutput = 0; + + while (indexInput < len) + { + char arraySection = ((uint8_t)input[indexInput]) >> 4; + + size_t arrayOffset; + if (arraySection == 0x8) + arrayOffset = 0x100; // these are two-byte shiftjis + else if (arraySection == 0x9) + arrayOffset = 0x1100; + else if (arraySection == 0xE) + arrayOffset = 0x2100; + else + arrayOffset = 0; // this is one byte shiftjis + + // determining real array offset + if (arrayOffset) + { + arrayOffset += (((uint8_t)input[indexInput]) & 0xf) << 8; + indexInput++; + if (indexInput >= len) + break; + } + arrayOffset += (uint8_t)input[indexInput++]; + arrayOffset <<= 1; + + // unicode number is... + uint16_t unicodeValue = (shiftJIS_convTable[arrayOffset] << 8) | shiftJIS_convTable[arrayOffset + 1]; + + // converting to UTF8 + if (unicodeValue < 0x80) + { + output[indexOutput++] = static_cast(unicodeValue); + } + else if (unicodeValue < 0x800) + { + output[indexOutput++] = 0xC0 | static_cast((unicodeValue >> 6)); + output[indexOutput++] = 0x80 | static_cast((unicodeValue & 0x3f)); + } + else + { + output[indexOutput++] = 0xE0 | static_cast((unicodeValue >> 12)); + output[indexOutput++] = 0x80 | static_cast(((unicodeValue & 0xfff) >> 6)); + output[indexOutput++] = 0x80 | static_cast((unicodeValue & 0x3f)); + } + } + + // remove the unnecessary bytes + output[indexOutput] = 0; + return output; +} \ No newline at end of file