// istuf8 - what are the odds of random bytes being valid UTF-8?
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
// doas pkg_add libutf8proc
#include <libutf8proc/utf8proc.h>
double
byteaudit(size_t len, size_t trials)
{
size_t pass = 0;
assert(len > 0);
utf8proc_int32_t ref;
utf8proc_uint8_t *in = malloc(len);
if (!in) abort();
for (size_t i = 0; i < trials; i++) {
// PORTABILITY this may need libbsd or something else
arc4random_buf(in, len);
utf8proc_uint8_t *ip = in;
utf8proc_ssize_t left = (utf8proc_ssize_t) len;
while (1) {
utf8proc_ssize_t ret = utf8proc_iterate(ip, left, &ref);
if (ret < 1) break;
ip += ret;
left -= ret;
if (left == 0) {
pass++;
break;
}
}
}
free(in);
return (double) pass / (double) trials * 100.0;
}
int
main(void)
{
for (size_t i = 1; i <= 8; i++)
printf("%lu %.2lf\n", i, byteaudit(i, 10000000U));
}
Response:
20 (Success), text/plain