One hack is to assume that bytes of UTF-8 encoded strings above 127 are all letters. It mostly works :-)
Am I misunderstanding you, because I've always thought that's what the mbtowc(3) family of functions was?
if (isalpha(*s)) { *d++ = *s++; while (isalnum(*s)) *d++ = *s++; }
if (iswalpha(decode(&s)) { encode(&d, advance(&s)); while (iswalnum(decode(&s)) encode(&d, advance(&s)); }
if (iswalpha(c = utf8(&s, &n))) { encode(&d, c); s = n; while (iswalnum(c = utf8(&s, &n))) { encode(&d, c); s = n; } }
if ('A' == utf8(&s, &t) && 'B' == utf8(&t, &s) && 'C' == utf8(&s, &t)) // we have 'ABC'.