* text = *textCursor, first = *text;
if (first == '\0') { return 0u; }
if (first <= 127u) { ++(*textCursor); return first; }
// Doing || sequences in order is safe due to early exit.
if ((first >> 5u) == 6u) {
if ((text[1u] >> 6u) != 2u) { ++(*textCursor); return abText_InvalidSubstitute; }
*textCursor += 2u;
return ((abTextU32) (first & 31u) << 6u) | (text[1u] & 63u);
}
if ((first >> 4u) == 14u) {
if ((text[1u] >> 6u) != 2u || (text[2u] >> 6u) != 2u) { ++(*textCursor); return abText_InvalidSubstitute; }
*textCursor += 3u;
abTextU32 cp = ((abTextU32) (first & 15u) << 12u) | ((abTextU32) (text[1u] & 63u) << 6u) | (text[2u] & 63u);
return ((cp & ~((abTextU32) 0x7ffu)) != 0xd800u) ? cp : abText_InvalidSubstitute;
}
if ((first >> 3u) == 30u) {
if ((text[1u] >> 6u) != 2u || (text[2u] >> 6u) != 2u || (text[3u] >> 6u) != 2u) { ++(*textCursor); return abText_InvalidSubstitute; }
*textCursor += 4u;
abTextU32 cp = ((abTextU32) (first & 7u) << 18u) | ((abTextU32) (text[1u] & 63u) << 12u) | ((abTextU32) (text[2u] & 63u) << 6u) | (text[3u] & 63u);
return (cp <= 0x10ffff) ? cp : abText_InvalidSubstitute;
}
++(*textCursor); return abText_InvalidSubstitute;
}
как-то так :D
uint32_t GetUtf8Char(uint32_t* i, const std::string &m_text){ static const char trailingBytesForUTF8[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; static const uint32_t offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; uint32_t first = (uint8_t)m_text[*i]; uint32_t ch = 0; int count = *i; unsigned short extraBytesToRead = trailingBytesForUTF8[first]; switch (extraBytesToRead) { case 5: ch += (uint8_t)m_text[count++]; ch <<= 6; case 4: ch += (uint8_t)m_text[count++]; ch <<= 6; case 3: ch += (uint8_t)m_text[count++]; ch <<= 6; case 2: ch += (uint8_t)m_text[count++]; ch <<= 6; case 1: ch += (uint8_t)m_text[count++]; ch <<= 6; case 0: ch += (uint8_t)m_text[count++]; } *i = count - 1; ch -= offsetsFromUTF8[extraBytesToRead]; return ch; }
Обсуждают сегодня