From 0f0fc2db3549c599063c9dfc0dbde76cda238eff Mon Sep 17 00:00:00 2001 From: Blendi-Goose <87442375+Blendi-Goose@users.noreply.github.com> Date: Sun, 8 Jun 2025 23:56:45 +0200 Subject: [PATCH] absolutely awful progress that needs to be checked by every developer before it can possibly ever be trusted because i wrote it while very much not properly awake, and i'm not awake now to fix it either. --- src/unicode.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/src/unicode.c b/src/unicode.c index a4cf285..6912f1b 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -12,17 +12,17 @@ bool nn_unicode_validate(const char *s) { char *nn_unicode_char(int *codepoints, size_t codepointCount) { size_t len = 0; - for(size_t i = 0; i < codepointCount; i++) { + for (size_t i = 0; i < codepointCount; i++) { int codepoint = codepoints[i]; len += nn_unicode_codepointSize(codepoint); } char *buf = nn_malloc(len+1); - if(buf == NULL) return buf; + if (buf == NULL) return buf; buf[len] = '\0'; size_t j = 0; - for(size_t i = 0; i < codepointCount; i++) { + for (size_t i = 0; i < codepointCount; i++) { int codepoint = codepoints[i]; size_t codepointLen = 0; const char *c = nn_unicode_codepointToChar(codepoint, &codepointLen); @@ -45,9 +45,44 @@ size_t nn_unicode_len(const char *s) { int nn_unicode_codepointAt(const char *s, size_t byteOffset); -size_t nn_unicode_codepointSize(int codepoint); +size_t nn_unicode_codepointSize(int codepoint) { + if (codepoint <= 0x007f) { + return 1; + } else if (codepoint <= 0x07ff) { + return 2; + } else if (codepoint <= 0xffff) { + return 3; + } else if (codepoint <= 0x10ffff) { + return 4; + } -const char *nn_unicode_codepointToChar(int codepoint, size_t *len); + return 0; +} + +const char *nn_unicode_codepointToChar(int codepoint, size_t *len) { + size_t codepointSize = nn_unicode_codepointSize(codepoint); + *len = codepointSize; + + static char buffer[4]; + + if (codepointSize == 1) { + buffer[0] = (char)codepoint; + } else if (codepointSize == 2) { + buffer[0] = 0b11000000 + (codepoint & 0b11111); + buffer[1] = 0b10000000 + (codepoint >> 5); + } else if (codepointSize == 3) { + buffer[0] = 0b11100000 + (codepoint & 0b1111); + buffer[1] = 0b10000000 + ((codepoint >> 4) & 0b111111); + buffer[2] = 0b10000000 + (codepoint >> 10); + } else if (codepointSize == 4) { + buffer[0] = 0b11110000 + (codepoint & 0b111); + buffer[1] = 0b10000000 + ((codepoint >> 3) & 0b111111); + buffer[2] = 0b10000000 + ((codepoint >> 9) & 0b111111); + buffer[3] = 0b10000000 + (codepoint >> 15); + } + + return buffer; +} size_t nn_unicode_charWidth(int codepoint);