From c08e23dc747c21c56df7d7910ad7bf1a61345fc3 Mon Sep 17 00:00:00 2001 From: IonutParau Date: Sun, 29 Jun 2025 12:08:57 +0200 Subject: [PATCH] stuff --- src/emulator.c | 12 +++- src/sandbox.lua | 1 + src/testLuaArch.c | 5 +- src/unicode.c | 158 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 170 insertions(+), 6 deletions(-) diff --git a/src/emulator.c b/src/emulator.c index b8417d8..82c58a1 100644 --- a/src/emulator.c +++ b/src/emulator.c @@ -502,7 +502,7 @@ int main() { .read = (void *)ne_fs_read, .seek = NULL, }; - nn_addFileSystem(computer, "frostos", 1, &genericFS); + nn_addFileSystem(computer, "OpenOS", 1, &genericFS); nn_screen *s = nn_newScreen(80, 32, 16, 16, 256); nn_addKeyboard(s, "shitty keyboard"); @@ -558,7 +558,7 @@ int main() { nn_value values[5]; values[0] = nn_values_cstring("key_down"); - values[1] = nn_values_cstring("my ass"); + values[1] = nn_values_cstring("shitty keyboard"); values[2] = nn_values_integer(unicode); values[3] = nn_values_integer(keycode_to_oc(keycode)); values[4] = nn_values_cstring("USER"); @@ -569,6 +569,14 @@ int main() { // well fuck printf("error happened when eventing the keyboarding: %s\n", error);;;;;; } + + values[0] = nn_values_cstring("key_up"); + error = nn_pushSignal(computer, values, 5); + + if (error != NULL) { + // well fuck + printf("error happened when eventing the keyboarding: %s\n", error);;;;;; + } } double now = nn_realTime(); diff --git a/src/sandbox.lua b/src/sandbox.lua index 48c6156..deb8f78 100644 --- a/src/sandbox.lua +++ b/src/sandbox.lua @@ -397,6 +397,7 @@ sandbox = { wtrunc = function(s, count) return unicode.sub(s, 1, count) end, + isWide = function(s) return unicode.wlen(s) > unicode.len(s) end, }), checkArg = checkArg, component = libcomponent, diff --git a/src/testLuaArch.c b/src/testLuaArch.c index 3674fe1..125a8b4 100644 --- a/src/testLuaArch.c +++ b/src/testLuaArch.c @@ -456,7 +456,10 @@ int testLuaArch_unicode_sub(lua_State *L) { } if(start < 0) start = 0; if(stop < 0) stop = 0; - if(start >= len) start = len - 1; + if(start >= len) { + lua_pushstring(L, ""); + return 1; + } if(stop >= len) stop = len - 1; if(stop < start) { lua_pushstring(L, ""); diff --git a/src/unicode.c b/src/unicode.c index 32746d6..6cf64c7 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -1,8 +1,9 @@ #include "neonucleus.h" +#include #include #include -bool nn_unicode_is_continuation(unsigned char byte) { +static bool nn_unicode_is_continuation(unsigned char byte) { return (byte >> 6) == 0b10; } @@ -36,6 +37,8 @@ bool nn_unicode_validate(const char *b) { return false; } s += 4; + } else { + return false; } } return true; @@ -54,7 +57,6 @@ char *nn_unicode_char(unsigned int *codepoints, size_t codepointCount) { char *buf = nn_malloc(len+1); if (buf == NULL) return buf; - buf[len] = '\0'; size_t j = 0; for (size_t i = 0; i < codepointCount; i++) { @@ -64,6 +66,8 @@ char *nn_unicode_char(unsigned int *codepoints, size_t codepointCount) { memcpy(buf + j, c, codepointLen); j += codepointLen; } + buf[j] = '\0'; + assert(j == len); // better safe than sorry return buf; } @@ -103,6 +107,93 @@ size_t nn_unicode_len(const char *b) { unsigned int nn_unicode_codepointAt(const char *s, size_t byteOffset) { unsigned int point = 0; const unsigned char *b = (const unsigned char *)s + byteOffset; + + const unsigned char *text = b; + + int codepoint = 0x3f; // Codepoint (defaults to '?') + int octet = (unsigned char)(text[0]); // The first UTF8 octet + + if (octet <= 0x7f) + { + // Only one octet (ASCII range x00-7F) + codepoint = text[0]; + } + else if ((octet & 0xe0) == 0xc0) + { + // Two octets + + // [0]xC2-DF [1]UTF8-tail(x80-BF) + unsigned char octet1 = text[1]; + + if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { return codepoint; } // Unexpected sequence + + if ((octet >= 0xc2) && (octet <= 0xdf)) + { + codepoint = ((octet & 0x1f) << 6) | (octet1 & 0x3f); + } + } + else if ((octet & 0xf0) == 0xe0) + { + // Three octets + unsigned char octet1 = text[1]; + unsigned char octet2 = '\0'; + + if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { return codepoint; } // Unexpected sequence + + octet2 = text[2]; + + if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { return codepoint; } // Unexpected sequence + + // [0]xE0 [1]xA0-BF [2]UTF8-tail(x80-BF) + // [0]xE1-EC [1]UTF8-tail [2]UTF8-tail(x80-BF) + // [0]xED [1]x80-9F [2]UTF8-tail(x80-BF) + // [0]xEE-EF [1]UTF8-tail [2]UTF8-tail(x80-BF) + + if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) || + ((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { return codepoint; } + + if ((octet >= 0xe0) && (octet <= 0xef)) + { + codepoint = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f); + } + } + else if ((octet & 0xf8) == 0xf0) + { + // Four octets + if (octet > 0xf4) return codepoint; + + unsigned char octet1 = text[1]; + unsigned char octet2 = '\0'; + unsigned char octet3 = '\0'; + + if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { return codepoint; } // Unexpected sequence + + octet2 = text[2]; + + if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { return codepoint; } // Unexpected sequence + + octet3 = text[3]; + + if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { return codepoint; } // Unexpected sequence + + // [0]xF0 [1]x90-BF [2]UTF8-tail [3]UTF8-tail + // [0]xF1-F3 [1]UTF8-tail [2]UTF8-tail [3]UTF8-tail + // [0]xF4 [1]x80-8F [2]UTF8-tail [3]UTF8-tail + + if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) || + ((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { return codepoint; } // Unexpected sequence + + if (octet >= 0xf0) + { + codepoint = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f); + } + } + + if (codepoint > 0x10ffff) codepoint = 0x3f; // Codepoints after U+10ffff are invalid + + return codepoint; + +/* const unsigned char subpartMask = 0b111111; // look into nn_unicode_codepointToChar as well. if(b[0] <= 0x7F) { @@ -121,9 +212,32 @@ unsigned int nn_unicode_codepointAt(const char *s, size_t byteOffset) { point += ((unsigned int)(b[3] & subpartMask)); } return point; +*/ } size_t nn_unicode_codepointSize(unsigned int codepoint) { + int size = 1; + + if (codepoint <= 0x7f) + { + size = 1; + } + else if (codepoint <= 0x7ff) + { + size = 2; + } + else if (codepoint <= 0xffff) + { + size = 3; + } + else if (codepoint <= 0x10ffff) + { + size = 4; + } + + return size; + +/* if (codepoint <= 0x007f) { return 1; } else if (codepoint <= 0x07ff) { @@ -134,10 +248,47 @@ size_t nn_unicode_codepointSize(unsigned int codepoint) { return 4; } - return 0; + return 1; +*/ } const char *nn_unicode_codepointToChar(unsigned int codepoint, size_t *len) { + + static char utf8[6] = { 0 }; + memset(utf8, 0, 6); // Clear static array + int size = 0; // Byte size of codepoint + + if (codepoint <= 0x7f) + { + utf8[0] = (char)codepoint; + size = 1; + } + else if (codepoint <= 0x7ff) + { + utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0); + utf8[1] = (char)((codepoint & 0x3f) | 0x80); + size = 2; + } + else if (codepoint <= 0xffff) + { + utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0); + utf8[1] = (char)(((codepoint >> 6) & 0x3f) | 0x80); + utf8[2] = (char)((codepoint & 0x3f) | 0x80); + size = 3; + } + else if (codepoint <= 0x10ffff) + { + utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0); + utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80); + utf8[2] = (char)(((codepoint >> 6) & 0x3f) | 0x80); + utf8[3] = (char)((codepoint & 0x3f) | 0x80); + size = 4; + } + + *len = size; + + return utf8; +/* size_t codepointSize = nn_unicode_codepointSize(codepoint); *len = codepointSize; @@ -160,6 +311,7 @@ const char *nn_unicode_codepointToChar(unsigned int codepoint, size_t *len) { } return buffer; +*/ } size_t nn_unicode_charWidth(unsigned int codepoint);