diff --git a/TODO.md b/TODO.md index 519ec95..b2cc707 100644 --- a/TODO.md +++ b/TODO.md @@ -1,5 +1,6 @@ # Parity with Vanilla OC (only the stuff that makes sense for an emulator) +- make the `unicode` library in testLuaArch support invalid UTF-8 (WHY IS IT OK WITH THAT) - in-memory version of `filesystem` - complete the GPU implementation (screen buffers and missing methods) - complete the screen implementation (bunch of missing methods) diff --git a/src/components/gpu.c b/src/components/gpu.c index abf0f89..97e33c8 100644 --- a/src/components/gpu.c +++ b/src/components/gpu.c @@ -131,22 +131,23 @@ void nni_gpu_set(nni_gpu *gpu, void *_, nn_component *component, nn_computer *co return; } - if(!nn_unicode_validate(s)) { - nn_setCError(computer, "invalid utf-8"); - return; - } - int current = 0; int len = 0; while(s[current] != 0) { - int codepoint = nn_unicode_codepointAt(s, current); - nn_setPixel(gpu->currentScreen, x, y, nni_gpu_makePixel(gpu, s + current)); + if(nn_unicode_isValidCodepoint(s + current)) { + int codepoint = nn_unicode_codepointAt(s, current); + nn_setPixel(gpu->currentScreen, x, y, nni_gpu_makePixel(gpu, s + current)); + current += nn_unicode_codepointSize(codepoint); + } else { + unsigned int codepoint = (unsigned char)s[current]; + nn_setPixel(gpu->currentScreen, x, y, nni_gpu_makePixel(gpu, s + current)); + current++; + } if(isVertical) { y++; } else { x++; } - current += nn_unicode_codepointSize(codepoint); len++; } diff --git a/src/neonucleus.h b/src/neonucleus.h index 7d2b788..96e65c5 100644 --- a/src/neonucleus.h +++ b/src/neonucleus.h @@ -300,6 +300,8 @@ nn_bool_t nn_decRef(nn_refc *refc); // Unicode (more specifically, UTF-8) stuff nn_bool_t nn_unicode_validate(const char *s); +// expects NULL terminator +nn_bool_t nn_unicode_isValidCodepoint(const char *s); // returned string must be nn_deallocStr()'d char *nn_unicode_char(nn_Alloc *alloc, unsigned int *codepoints, nn_size_t codepointCount); // returned array must be nn_dealloc()'d diff --git a/src/sandbox.lua b/src/sandbox.lua index c3e377a..e1c9b73 100644 --- a/src/sandbox.lua +++ b/src/sandbox.lua @@ -17,6 +17,14 @@ local function copy(v, p) end end +local function spcall(f, ...) + local t = {pcall(f, ...)} + if t[1] then + return table.unpack(t, 2) + end + return nil, t[2] +end + local function nextDeadline() return computer.uptime() + 5 end @@ -392,40 +400,9 @@ sandbox = { utf8 = copy(utf8), unicode = copy(unicode, { - wtrunc = function (str,space) - space = space - 1 - return str:sub(1,(space >= utf8.len(str)) and (#str) or (utf8.offset(str,space+1)-1)) - end, isWide = function(s) return unicode.wlen(s) > unicode.len(s) end, upper = string.upper, lower = string.lower, - --[[ - sub = function (str,a,b) - if not b then b = utf8.len(str) end - if not a then a = 1 end - -- a = math.max(a,1) - - if a < 0 then - -- negative - - a = utf8.len(str) + a + 1 - end - - if b < 0 then - b = utf8.len(str) + b + 1 - end - - if a > b then return "" end - - if b >= utf8.len(str) then b = #str else b = utf8.offset(str,b+1)-1 end - - if a > utf8.len(str) then return "" end - a = utf8.offset(str,a) - - return str:sub(a,b) - -- return str:sub(a, b) - end, - ]] }), checkArg = checkArg, component = libcomponent, diff --git a/src/unicode.c b/src/unicode.c index 0878400..dd53708 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -162,6 +162,36 @@ static nn_bool_t nn_unicode_is_continuation(unsigned char byte) { return (byte >> 6) == 0b10; } +nn_bool_t nn_unicode_isValidCodepoint(const char *s) { + if(s[0] <= 0x7F) { + return true; + } else if((s[0] >> 5) == 0b110) { + if (!nn_unicode_is_continuation(s[1])) { + return false; + } + } else if((s[0] >> 4) == 0b1110) { + if (!nn_unicode_is_continuation(s[1])) { + return false; + } + if (!nn_unicode_is_continuation(s[2])) { + return false; + } + } else if((s[0] >> 3) == 0b11110) { + if (!nn_unicode_is_continuation(s[1])) { + return false; + } + if (!nn_unicode_is_continuation(s[2])) { + return false; + } + if (!nn_unicode_is_continuation(s[3])) { + return false; + } + } else { + return false; + } + return true; +} + nn_bool_t nn_unicode_validate(const char *b) { const unsigned char* s = (const unsigned char*)b; while (*s) {