stuff

2026-02-15 04:03:49 +01:00 · 2025-06-29 12:08:57 +02:00 · 2025-06-29 12:08:57 +02:00 · c08e23dc74
commit c08e23dc74
parent 230de47515
4 changed files with 170 additions and 6 deletions
--- a/src/emulator.c
+++ b/src/emulator.c
@ -502,7 +502,7 @@ int main() {
        .read = (void *)ne_fs_read,
        .seek = NULL,
    };
-    nn_addFileSystem(computer, "frostos", 1, &genericFS);
+    nn_addFileSystem(computer, "OpenOS", 1, &genericFS);

    nn_screen *s = nn_newScreen(80, 32, 16, 16, 256);
    nn_addKeyboard(s, "shitty keyboard");
@ -558,7 +558,7 @@ int main() {
            nn_value values[5];

            values[0] = nn_values_cstring("key_down");
-            values[1] = nn_values_cstring("my ass");
+            values[1] = nn_values_cstring("shitty keyboard");
            values[2] = nn_values_integer(unicode);
            values[3] = nn_values_integer(keycode_to_oc(keycode));
            values[4] = nn_values_cstring("USER");
@ -569,6 +569,14 @@ int main() {
                // well fuck
                printf("error happened when eventing the keyboarding: %s\n", error);;;;;;
            }
+           
+            values[0] = nn_values_cstring("key_up");
+            error = nn_pushSignal(computer, values, 5);
+
+            if (error != NULL) {
+                // well fuck
+                printf("error happened when eventing the keyboarding: %s\n", error);;;;;;
+            }
        }

        double now = nn_realTime();
--- a/src/sandbox.lua
+++ b/src/sandbox.lua
@ -397,6 +397,7 @@ sandbox = {
        wtrunc = function(s, count)
            return unicode.sub(s, 1, count)
        end,
+        isWide = function(s) return unicode.wlen(s) > unicode.len(s) end,
    }),
    checkArg = checkArg,
    component = libcomponent,
--- a/src/testLuaArch.c
+++ b/src/testLuaArch.c
@ -456,7 +456,10 @@ int testLuaArch_unicode_sub(lua_State *L) {
    }
    if(start < 0) start = 0;
    if(stop < 0) stop = 0;
-    if(start >= len) start = len - 1;
+    if(start >= len) {
+        lua_pushstring(L, "");
+        return 1;
+    }
    if(stop >= len) stop = len - 1;
    if(stop < start) {
        lua_pushstring(L, "");
--- a/src/unicode.c
+++ b/src/unicode.c
@ -1,8 +1,9 @@
 #include "neonucleus.h"
+#include <assert.h>
 #include <stdio.h>
 #include <string.h>

-bool nn_unicode_is_continuation(unsigned char byte) {
+static bool nn_unicode_is_continuation(unsigned char byte) {
    return (byte >> 6) == 0b10;
 }

@ -36,6 +37,8 @@ bool nn_unicode_validate(const char *b) {
                return false;
            }
            s += 4;
+        } else {
+            return false;
        }
    }
    return true;
@ -54,7 +57,6 @@ char *nn_unicode_char(unsigned int *codepoints, size_t codepointCount) {

    char *buf = nn_malloc(len+1);
    if (buf == NULL) return buf;
-    buf[len] = '\0';

    size_t j = 0;
    for (size_t i = 0; i < codepointCount; i++) {
@ -64,6 +66,8 @@ char *nn_unicode_char(unsigned int *codepoints, size_t codepointCount) {
        memcpy(buf + j, c, codepointLen);
        j += codepointLen;
    }
+    buf[j] = '\0';
+    assert(j == len); // better safe than sorry

    return buf;
 }
@ -103,6 +107,93 @@ size_t nn_unicode_len(const char *b) {
 unsigned int nn_unicode_codepointAt(const char *s, size_t byteOffset) {
    unsigned int point = 0;
    const unsigned char *b = (const unsigned char *)s + byteOffset;
+
+    const unsigned char *text = b;
+
+    int codepoint = 0x3f;   // Codepoint (defaults to '?')
+    int octet = (unsigned char)(text[0]); // The first UTF8 octet
+
+    if (octet <= 0x7f)
+    {
+        // Only one octet (ASCII range x00-7F)
+        codepoint = text[0];
+    }
+    else if ((octet & 0xe0) == 0xc0)
+    {
+        // Two octets
+
+        // [0]xC2-DF    [1]UTF8-tail(x80-BF)
+        unsigned char octet1 = text[1];
+
+        if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { return codepoint; } // Unexpected sequence
+
+        if ((octet >= 0xc2) && (octet <= 0xdf))
+        {
+            codepoint = ((octet & 0x1f) << 6) | (octet1 & 0x3f);
+        }
+    }
+    else if ((octet & 0xf0) == 0xe0)
+    {
+        // Three octets
+        unsigned char octet1 = text[1];
+        unsigned char octet2 = '\0';
+
+        if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { return codepoint; } // Unexpected sequence
+
+        octet2 = text[2];
+
+        if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { return codepoint; } // Unexpected sequence
+
+        // [0]xE0    [1]xA0-BF       [2]UTF8-tail(x80-BF)
+        // [0]xE1-EC [1]UTF8-tail    [2]UTF8-tail(x80-BF)
+        // [0]xED    [1]x80-9F       [2]UTF8-tail(x80-BF)
+        // [0]xEE-EF [1]UTF8-tail    [2]UTF8-tail(x80-BF)
+
+        if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) ||
+            ((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { return codepoint; }
+
+        if ((octet >= 0xe0) && (octet <= 0xef))
+        {
+            codepoint = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f);
+        }
+    }
+    else if ((octet & 0xf8) == 0xf0)
+    {
+        // Four octets
+        if (octet > 0xf4) return codepoint;
+
+        unsigned char octet1 = text[1];
+        unsigned char octet2 = '\0';
+        unsigned char octet3 = '\0';
+
+        if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { return codepoint; }  // Unexpected sequence
+
+        octet2 = text[2];
+
+        if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { return codepoint; }  // Unexpected sequence
+
+        octet3 = text[3];
+
+        if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { return codepoint; }  // Unexpected sequence
+
+        // [0]xF0       [1]x90-BF       [2]UTF8-tail  [3]UTF8-tail
+        // [0]xF1-F3    [1]UTF8-tail    [2]UTF8-tail  [3]UTF8-tail
+        // [0]xF4       [1]x80-8F       [2]UTF8-tail  [3]UTF8-tail
+
+        if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) ||
+            ((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { return codepoint; } // Unexpected sequence
+
+        if (octet >= 0xf0)
+        {
+            codepoint = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f);
+        }
+    }
+
+    if (codepoint > 0x10ffff) codepoint = 0x3f;     // Codepoints after U+10ffff are invalid
+
+    return codepoint;
+
+/*
    const unsigned char subpartMask = 0b111111;
    // look into nn_unicode_codepointToChar as well.
    if(b[0] <= 0x7F) {
@ -121,9 +212,32 @@ unsigned int nn_unicode_codepointAt(const char *s, size_t byteOffset) {
        point += ((unsigned int)(b[3] & subpartMask));
    }
    return point;
+*/
 }

 size_t nn_unicode_codepointSize(unsigned int codepoint) {
+    int size = 1;
+
+    if (codepoint <= 0x7f)
+    {
+        size = 1;
+    }
+    else if (codepoint <= 0x7ff)
+    {
+        size = 2;
+    }
+    else if (codepoint <= 0xffff)
+    {
+        size = 3;
+    }
+    else if (codepoint <= 0x10ffff)
+    {
+        size = 4;
+    }
+
+    return size;
+
+/*
    if (codepoint <= 0x007f) {
        return 1;
    } else if (codepoint <= 0x07ff) {
@ -134,10 +248,47 @@ size_t nn_unicode_codepointSize(unsigned int codepoint) {
        return 4;
    }

-    return 0;
+    return 1;
+*/
 }

 const char *nn_unicode_codepointToChar(unsigned int codepoint, size_t *len) {
+
+    static char utf8[6] = { 0 };
+    memset(utf8, 0, 6); // Clear static array
+    int size = 0;       // Byte size of codepoint
+
+    if (codepoint <= 0x7f)
+    {
+        utf8[0] = (char)codepoint;
+        size = 1;
+    }
+    else if (codepoint <= 0x7ff)
+    {
+        utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0);
+        utf8[1] = (char)((codepoint & 0x3f) | 0x80);
+        size = 2;
+    }
+    else if (codepoint <= 0xffff)
+    {
+        utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0);
+        utf8[1] = (char)(((codepoint >>  6) & 0x3f) | 0x80);
+        utf8[2] = (char)((codepoint & 0x3f) | 0x80);
+        size = 3;
+    }
+    else if (codepoint <= 0x10ffff)
+    {
+        utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0);
+        utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80);
+        utf8[2] = (char)(((codepoint >>  6) & 0x3f) | 0x80);
+        utf8[3] = (char)((codepoint & 0x3f) | 0x80);
+        size = 4;
+    }
+
+    *len = size;
+
+    return utf8;
+/*
    size_t codepointSize = nn_unicode_codepointSize(codepoint);
    *len = codepointSize;

@ -160,6 +311,7 @@ const char *nn_unicode_codepointToChar(unsigned int codepoint, size_t *len) {
    }

    return buffer;
+*/
 }

 size_t nn_unicode_charWidth(unsigned int codepoint);