From 643173660a893f536cdb583be80ab943dfaf55d8 Mon Sep 17 00:00:00 2001
From: Blendi-Goose <87442375+Blendi-Goose@users.noreply.github.com>
Date: Sun, 29 Jun 2025 15:26:30 +0200
Subject: [PATCH] basically revert atoms bullshit

he can actually go fuck off
---
 src/unicode.c | 148 +-------------------------------------------------
 1 file changed, 1 insertion(+), 147 deletions(-)

diff --git a/src/unicode.c b/src/unicode.c
index 6cf64c7..f9ca12d 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -108,92 +108,6 @@ unsigned int nn_unicode_codepointAt(const char *s, size_t byteOffset) {
     unsigned int point = 0;
     const unsigned char *b = (const unsigned char *)s + byteOffset;
 
-    const unsigned char *text = b;
-
-    int codepoint = 0x3f;   // Codepoint (defaults to '?')
-    int octet = (unsigned char)(text[0]); // The first UTF8 octet
-
-    if (octet <= 0x7f)
-    {
-        // Only one octet (ASCII range x00-7F)
-        codepoint = text[0];
-    }
-    else if ((octet & 0xe0) == 0xc0)
-    {
-        // Two octets
-
-        // [0]xC2-DF    [1]UTF8-tail(x80-BF)
-        unsigned char octet1 = text[1];
-
-        if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { return codepoint; } // Unexpected sequence
-
-        if ((octet >= 0xc2) && (octet <= 0xdf))
-        {
-            codepoint = ((octet & 0x1f) << 6) | (octet1 & 0x3f);
-        }
-    }
-    else if ((octet & 0xf0) == 0xe0)
-    {
-        // Three octets
-        unsigned char octet1 = text[1];
-        unsigned char octet2 = '\0';
-
-        if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { return codepoint; } // Unexpected sequence
-
-        octet2 = text[2];
-
-        if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { return codepoint; } // Unexpected sequence
-
-        // [0]xE0    [1]xA0-BF       [2]UTF8-tail(x80-BF)
-        // [0]xE1-EC [1]UTF8-tail    [2]UTF8-tail(x80-BF)
-        // [0]xED    [1]x80-9F       [2]UTF8-tail(x80-BF)
-        // [0]xEE-EF [1]UTF8-tail    [2]UTF8-tail(x80-BF)
-
-        if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) ||
-            ((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { return codepoint; }
-
-        if ((octet >= 0xe0) && (octet <= 0xef))
-        {
-            codepoint = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f);
-        }
-    }
-    else if ((octet & 0xf8) == 0xf0)
-    {
-        // Four octets
-        if (octet > 0xf4) return codepoint;
-
-        unsigned char octet1 = text[1];
-        unsigned char octet2 = '\0';
-        unsigned char octet3 = '\0';
-
-        if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { return codepoint; }  // Unexpected sequence
-
-        octet2 = text[2];
-
-        if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { return codepoint; }  // Unexpected sequence
-
-        octet3 = text[3];
-
-        if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { return codepoint; }  // Unexpected sequence
-
-        // [0]xF0       [1]x90-BF       [2]UTF8-tail  [3]UTF8-tail
-        // [0]xF1-F3    [1]UTF8-tail    [2]UTF8-tail  [3]UTF8-tail
-        // [0]xF4       [1]x80-8F       [2]UTF8-tail  [3]UTF8-tail
-
-        if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) ||
-            ((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { return codepoint; } // Unexpected sequence
-
-        if (octet >= 0xf0)
-        {
-            codepoint = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f);
-        }
-    }
-
-    if (codepoint > 0x10ffff) codepoint = 0x3f;     // Codepoints after U+10ffff are invalid
-
-    return codepoint;
-
-/*
     const unsigned char subpartMask = 0b111111;
     // look into nn_unicode_codepointToChar as well.
     if(b[0] <= 0x7F) {
@@ -212,32 +126,9 @@ unsigned int nn_unicode_codepointAt(const char *s, size_t byteOffset) {
         point += ((unsigned int)(b[3] & subpartMask));
     }
     return point;
-*/
 }
 
 size_t nn_unicode_codepointSize(unsigned int codepoint) {
-    int size = 1;
-
-    if (codepoint <= 0x7f)
-    {
-        size = 1;
-    }
-    else if (codepoint <= 0x7ff)
-    {
-        size = 2;
-    }
-    else if (codepoint <= 0xffff)
-    {
-        size = 3;
-    }
-    else if (codepoint <= 0x10ffff)
-    {
-        size = 4;
-    }
-
-    return size;
-
-/*
     if (codepoint <= 0x007f) {
         return 1;
     } else if (codepoint <= 0x07ff) {
@@ -249,50 +140,14 @@ size_t nn_unicode_codepointSize(unsigned int codepoint) {
     }
 
     return 1;
-*/
 }
 
 const char *nn_unicode_codepointToChar(unsigned int codepoint, size_t *len) {
-
-    static char utf8[6] = { 0 };
-    memset(utf8, 0, 6); // Clear static array
-    int size = 0;       // Byte size of codepoint
-
-    if (codepoint <= 0x7f)
-    {
-        utf8[0] = (char)codepoint;
-        size = 1;
-    }
-    else if (codepoint <= 0x7ff)
-    {
-        utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0);
-        utf8[1] = (char)((codepoint & 0x3f) | 0x80);
-        size = 2;
-    }
-    else if (codepoint <= 0xffff)
-    {
-        utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0);
-        utf8[1] = (char)(((codepoint >>  6) & 0x3f) | 0x80);
-        utf8[2] = (char)((codepoint & 0x3f) | 0x80);
-        size = 3;
-    }
-    else if (codepoint <= 0x10ffff)
-    {
-        utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0);
-        utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80);
-        utf8[2] = (char)(((codepoint >>  6) & 0x3f) | 0x80);
-        utf8[3] = (char)((codepoint & 0x3f) | 0x80);
-        size = 4;
-    }
-
-    *len = size;
-
-    return utf8;
-/*
     size_t codepointSize = nn_unicode_codepointSize(codepoint);
     *len = codepointSize;
 
     static char buffer[4];
+    memset(buffer, 0, 4); // Clear static array
 
     if (codepointSize == 1) {
         buffer[0] = (char)codepoint;
@@ -311,7 +166,6 @@ const char *nn_unicode_codepointToChar(unsigned int codepoint, size_t *len) {
     }
 
     return buffer;
-*/
 }
 
 size_t nn_unicode_charWidth(unsigned int codepoint);