mirror of
https://github.com/NeoFlock/neonucleus.git
synced 2025-09-24 09:03:32 +02:00
unicode now allows invalid unicode
This commit is contained in:
parent
2621554165
commit
64a6b84b30
3
TODO.md
3
TODO.md
@ -1,10 +1,8 @@
|
|||||||
# Parity with Vanilla OC (only the stuff that makes sense for an emulator)
|
# Parity with Vanilla OC (only the stuff that makes sense for an emulator)
|
||||||
|
|
||||||
- make the `unicode` library in testLuaArch support invalid UTF-8 (WHY IS IT OK WITH THAT)
|
|
||||||
- in-memory version of `filesystem`
|
- in-memory version of `filesystem`
|
||||||
- complete the GPU implementation (screen buffers and missing methods)
|
- complete the GPU implementation (screen buffers and missing methods)
|
||||||
- complete the screen implementation (bunch of missing methods)
|
- complete the screen implementation (bunch of missing methods)
|
||||||
- support invalid UTF-8 for GPU set and fill, which should pretend the byte value is the codepoint.
|
|
||||||
- `hologram` component
|
- `hologram` component
|
||||||
- `computer` component
|
- `computer` component
|
||||||
- `modem` component
|
- `modem` component
|
||||||
@ -20,6 +18,7 @@
|
|||||||
|
|
||||||
- Rework filesystem component to pre-process paths to ensure proper sandboxing and not allow arbitrary remote file access
|
- Rework filesystem component to pre-process paths to ensure proper sandboxing and not allow arbitrary remote file access
|
||||||
- Do a huge audit at some point
|
- Do a huge audit at some point
|
||||||
|
- `nn_unicode_charWidth` appears to be bugged, look into that.
|
||||||
|
|
||||||
# The extra components
|
# The extra components
|
||||||
|
|
||||||
|
@ -242,7 +242,16 @@ void nn_eeprom_getChecksum(nn_eeprom *eeprom, void *_, nn_component *component,
|
|||||||
nn_data_crc32(buf, dataLen + codeLen, hash);
|
nn_data_crc32(buf, dataLen + codeLen, hash);
|
||||||
nn_dealloc(alloc, buf, dataCap + codeCap);
|
nn_dealloc(alloc, buf, dataCap + codeCap);
|
||||||
|
|
||||||
nn_return_string(computer, hash, sizeof(hash));
|
char encoded[8];
|
||||||
|
|
||||||
|
const char *hex = "0123456789abcdef";
|
||||||
|
for(int i = 0; i < 4; i++) {
|
||||||
|
unsigned char b = hash[i];
|
||||||
|
encoded[i*2] = hex[b >> 4];
|
||||||
|
encoded[i*2+1] = hex[b & 0xF];
|
||||||
|
}
|
||||||
|
|
||||||
|
nn_return_string(computer, encoded, sizeof(encoded));
|
||||||
|
|
||||||
nn_eeprom_readCost(component, dataLen + codeLen);
|
nn_eeprom_readCost(component, dataLen + codeLen);
|
||||||
}
|
}
|
||||||
|
@ -131,24 +131,17 @@ void nni_gpu_set(nni_gpu *gpu, void *_, nn_component *component, nn_computer *co
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int current = 0;
|
nn_size_t current = 0;
|
||||||
int len = 0;
|
|
||||||
while(s[current] != 0) {
|
while(s[current] != 0) {
|
||||||
if(nn_unicode_isValidCodepoint(s + current)) {
|
unsigned int codepoint = nn_unicode_nextCodepointPermissive(s, ¤t);
|
||||||
int codepoint = nn_unicode_codepointAt(s, current);
|
char buf[NN_MAXIMUM_UNICODE_BUFFER];
|
||||||
nn_setPixel(gpu->currentScreen, x, y, nni_gpu_makePixel(gpu, s + current));
|
nn_unicode_codepointToChar(buf, codepoint, NULL);
|
||||||
current += nn_unicode_codepointSize(codepoint);
|
nn_setPixel(gpu->currentScreen, x, y, nni_gpu_makePixel(gpu, buf));
|
||||||
} else {
|
|
||||||
unsigned int codepoint = (unsigned char)s[current];
|
|
||||||
nn_setPixel(gpu->currentScreen, x, y, nni_gpu_makePixel(gpu, s + current));
|
|
||||||
current++;
|
|
||||||
}
|
|
||||||
if(isVertical) {
|
if(isVertical) {
|
||||||
y++;
|
y++;
|
||||||
} else {
|
} else {
|
||||||
x++;
|
x++;
|
||||||
}
|
}
|
||||||
len++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nn_simulateBufferedIndirect(component, 1, gpu->ctrl.screenSetsPerTick);
|
nn_simulateBufferedIndirect(component, 1, gpu->ctrl.screenSetsPerTick);
|
||||||
@ -297,12 +290,9 @@ void nni_gpu_fill(nni_gpu *gpu, void *_, nn_component *component, nn_computer *c
|
|||||||
nn_setCError(computer, "bad argument #5 (character expected)");
|
nn_setCError(computer, "bad argument #5 (character expected)");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if(!nn_unicode_validate(s)) {
|
|
||||||
nn_setCError(computer, "invalid utf-8");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int codepoint = nn_unicode_codepointAt(s, 0);
|
nn_size_t startIdx = 0;
|
||||||
|
int codepoint = nn_unicode_nextCodepointPermissive(s, &startIdx);
|
||||||
|
|
||||||
// prevent DoS
|
// prevent DoS
|
||||||
if(x < 0) x = 0;
|
if(x < 0) x = 0;
|
||||||
|
@ -319,6 +319,16 @@ unsigned int nn_unicode_lowerCodepoint(unsigned int codepoint);
|
|||||||
// returned string must be nn_deallocStr()'d
|
// returned string must be nn_deallocStr()'d
|
||||||
char *nn_unicode_lower(nn_Alloc *alloc, const char *s);
|
char *nn_unicode_lower(nn_Alloc *alloc, const char *s);
|
||||||
|
|
||||||
|
// permissive means it allows invalid UTF-8, in which case each byte is treated as a codepoint
|
||||||
|
|
||||||
|
// it will return the codepoint starting at byte *index, but will also set *index to the byte afterward it
|
||||||
|
// since it is permissive, it supports invalid UTF-8
|
||||||
|
unsigned int nn_unicode_nextCodepointPermissive(const char *s, nn_size_t *index);
|
||||||
|
nn_size_t nn_unicode_lenPermissive(const char *s);
|
||||||
|
nn_size_t nn_unicode_wlenPermissive(const char *s);
|
||||||
|
// if not found, it will return -1. This is why it is an nn_intptr_t
|
||||||
|
nn_intptr_t nn_unicode_indexPermissive(const char *s, nn_size_t codepointIndex);
|
||||||
|
|
||||||
// Data card stuff
|
// Data card stuff
|
||||||
|
|
||||||
// Hashing
|
// Hashing
|
||||||
|
@ -400,9 +400,16 @@ sandbox = {
|
|||||||
|
|
||||||
utf8 = copy(utf8),
|
utf8 = copy(utf8),
|
||||||
unicode = copy(unicode, {
|
unicode = copy(unicode, {
|
||||||
isWide = function(s) return unicode.wlen(s) > unicode.len(s) end,
|
isWide = function(s)
|
||||||
|
local c = unicode.sub(s, 1, 1)
|
||||||
|
return unicode.wlen(c) > unicode.len(c)
|
||||||
|
end,
|
||||||
upper = string.upper,
|
upper = string.upper,
|
||||||
lower = string.lower,
|
lower = string.lower,
|
||||||
|
wtrunc = function (str,space)
|
||||||
|
space = space - 1
|
||||||
|
return str:sub(1,(space >= utf8.len(str)) and (#str) or (utf8.offset(str,space+1)-1))
|
||||||
|
end,
|
||||||
}),
|
}),
|
||||||
checkArg = checkArg,
|
checkArg = checkArg,
|
||||||
component = libcomponent,
|
component = libcomponent,
|
||||||
|
@ -469,10 +469,7 @@ int testLuaArch_unicode_sub(lua_State *L) {
|
|||||||
const char *s = luaL_checkstring(L, 1);
|
const char *s = luaL_checkstring(L, 1);
|
||||||
nn_Alloc *alloc = testLuaArch_getAlloc(L);
|
nn_Alloc *alloc = testLuaArch_getAlloc(L);
|
||||||
int start = luaL_checkinteger(L, 2);
|
int start = luaL_checkinteger(L, 2);
|
||||||
if(!nn_unicode_validate(s)) {
|
int len = nn_unicode_lenPermissive(s);
|
||||||
luaL_error(L, "invalid utf-8");
|
|
||||||
}
|
|
||||||
int len = nn_unicode_len(s);
|
|
||||||
if(len < 0) {
|
if(len < 0) {
|
||||||
luaL_error(L, "length overflow");
|
luaL_error(L, "length overflow");
|
||||||
}
|
}
|
||||||
@ -503,22 +500,9 @@ int testLuaArch_unicode_sub(lua_State *L) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// there is a way to do it without an allocation
|
nn_size_t startByte = nn_unicode_indexPermissive(s, start - 1);
|
||||||
// however, I'm lazy
|
nn_size_t termByte = nn_unicode_indexPermissive(s, stop);
|
||||||
size_t pointLen;
|
const char *res = testLuaArch_pushlstring(L, s + startByte, termByte - startByte);
|
||||||
unsigned int *points = nn_unicode_codepoints(alloc, s, &pointLen);
|
|
||||||
if(points == NULL) {
|
|
||||||
luaL_error(L, "out of memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
char *sub = nn_unicode_char(alloc, points + start - 1, stop - start + 1);
|
|
||||||
if(sub == NULL) {
|
|
||||||
nn_dealloc(alloc, points, sizeof(unsigned int) * pointLen);
|
|
||||||
luaL_error(L, "out of memory");
|
|
||||||
}
|
|
||||||
const char *res = testLuaArch_pushstring(L, sub);
|
|
||||||
nn_deallocStr(alloc, sub);
|
|
||||||
nn_dealloc(alloc, points, sizeof(unsigned int) * pointLen);
|
|
||||||
if (!res) {
|
if (!res) {
|
||||||
luaL_error(L, "out of memory");
|
luaL_error(L, "out of memory");
|
||||||
}
|
}
|
||||||
@ -555,10 +539,13 @@ int testLuaArch_unicode_char(lua_State *L) {
|
|||||||
|
|
||||||
int testLuaArch_unicode_len(lua_State *L) {
|
int testLuaArch_unicode_len(lua_State *L) {
|
||||||
const char *s = luaL_checkstring(L, 1);
|
const char *s = luaL_checkstring(L, 1);
|
||||||
if(!nn_unicode_validate(s)) {
|
lua_pushinteger(L, nn_unicode_lenPermissive(s));
|
||||||
luaL_error(L, "invalid utf-8");
|
return 1;
|
||||||
}
|
}
|
||||||
lua_pushinteger(L, nn_unicode_len(s));
|
|
||||||
|
int testLuaArch_unicode_wlen(lua_State *L) {
|
||||||
|
const char *s = luaL_checkstring(L, 1);
|
||||||
|
lua_pushinteger(L, nn_unicode_lenPermissive(s));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -653,7 +640,7 @@ void testLuaArch_loadEnv(lua_State *L) {
|
|||||||
lua_setfield(L, unicode, "sub");
|
lua_setfield(L, unicode, "sub");
|
||||||
lua_pushcfunction(L, testLuaArch_unicode_len);
|
lua_pushcfunction(L, testLuaArch_unicode_len);
|
||||||
lua_setfield(L, unicode, "len");
|
lua_setfield(L, unicode, "len");
|
||||||
lua_pushcfunction(L, testLuaArch_unicode_len);
|
lua_pushcfunction(L, testLuaArch_unicode_wlen);
|
||||||
lua_setfield(L, unicode, "wlen");
|
lua_setfield(L, unicode, "wlen");
|
||||||
lua_pushcfunction(L, testLuaArch_unicode_char);
|
lua_pushcfunction(L, testLuaArch_unicode_char);
|
||||||
lua_setfield(L, unicode, "char");
|
lua_setfield(L, unicode, "char");
|
||||||
|
@ -329,7 +329,7 @@ nn_size_t nn_unicode_codepointSize(unsigned int codepoint) {
|
|||||||
|
|
||||||
void nn_unicode_codepointToChar(char *buffer, unsigned int codepoint, nn_size_t *len) {
|
void nn_unicode_codepointToChar(char *buffer, unsigned int codepoint, nn_size_t *len) {
|
||||||
nn_size_t codepointSize = nn_unicode_codepointSize(codepoint);
|
nn_size_t codepointSize = nn_unicode_codepointSize(codepoint);
|
||||||
*len = codepointSize;
|
if(len != NULL) *len = codepointSize;
|
||||||
|
|
||||||
nn_memset(buffer, 0, 4); // Clear static array
|
nn_memset(buffer, 0, 4); // Clear static array
|
||||||
|
|
||||||
@ -391,3 +391,45 @@ unsigned int nn_unicode_upperCodepoint(unsigned int codepoint);
|
|||||||
char *nn_unicode_upper(nn_Alloc *alloc, const char *s);
|
char *nn_unicode_upper(nn_Alloc *alloc, const char *s);
|
||||||
unsigned int nn_unicode_lowerCodepoint(unsigned int codepoint);
|
unsigned int nn_unicode_lowerCodepoint(unsigned int codepoint);
|
||||||
char *nn_unicode_lower(nn_Alloc *alloc, const char *s);
|
char *nn_unicode_lower(nn_Alloc *alloc, const char *s);
|
||||||
|
|
||||||
|
unsigned int nn_unicode_nextCodepointPermissive(const char *s, nn_size_t *index) {
|
||||||
|
nn_size_t i = *index;
|
||||||
|
if(nn_unicode_isValidCodepoint(s + i)) {
|
||||||
|
// TODO: handle edge-case where suboptimial encoding is used
|
||||||
|
unsigned int p = nn_unicode_codepointAt(s, i);
|
||||||
|
*index = i + nn_unicode_codepointSize(p);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
unsigned int p = (unsigned char)s[i];
|
||||||
|
*index = i + 1;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
nn_size_t nn_unicode_lenPermissive(const char *b) {
|
||||||
|
nn_size_t len = 0;
|
||||||
|
nn_size_t cur = 0;
|
||||||
|
while(b[cur]) {
|
||||||
|
nn_unicode_nextCodepointPermissive(b, &cur);
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
nn_size_t nn_unicode_wlenPermissive(const char *s) {
|
||||||
|
nn_size_t wlen = 0;
|
||||||
|
nn_size_t cur = 0;
|
||||||
|
while (s[cur]) {
|
||||||
|
unsigned int codepoint = nn_unicode_nextCodepointPermissive(s, &cur);
|
||||||
|
wlen += nn_unicode_charWidth(codepoint);
|
||||||
|
}
|
||||||
|
return wlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
nn_intptr_t nn_unicode_indexPermissive(const char *s, nn_size_t codepointIndex) {
|
||||||
|
nn_size_t bytes = 0;
|
||||||
|
while(true) {
|
||||||
|
if(codepointIndex == 0) return bytes;
|
||||||
|
nn_unicode_nextCodepointPermissive(s, &bytes);
|
||||||
|
codepointIndex--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user