From 6c2a4fdc1111cd0f9167d4db1a9bee7356f05137 Mon Sep 17 00:00:00 2001 From: Blendi-Goose <87442375+Blendi-Goose@users.noreply.github.com> Date: Fri, 27 Jun 2025 21:09:28 +0200 Subject: [PATCH] terrible code, atom will fix it later --- src/unicode.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/unicode.c b/src/unicode.c index 3fc7359..0e48fda 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -2,8 +2,41 @@ #include #include +bool nn_unicode_is_continuation(char byte) { + return (byte >> 6) == 0b10; +} + bool nn_unicode_validate(const char *s) { // TODO: validate UTF-8-ness + while (*s) { + if(s[0] <= 0x7F) { + s++; + } else if((s[0] >> 5) == 0b110) { + if (!nn_unicode_is_continuation(s[1])) { + return false; + } + s += 2; + } else if((s[0] >> 4) == 0b1110) { + if (!nn_unicode_is_continuation(s[1])) { + return false; + } + if (!nn_unicode_is_continuation(s[2])) { + return false; + } + s += 3; + } else if((s[0] >> 3) == 0b11110) { + if (!nn_unicode_is_continuation(s[1])) { + return false; + } + if (!nn_unicode_is_continuation(s[2])) { + return false; + } + if (!nn_unicode_is_continuation(s[3])) { + return false; + } + s += 4; + } + } return true; }