void TestUtilities::testWrapRows()
{
UASSERT(wrap_rows("12345678",4) == "1234\n5678");
+ // test that wrap_rows doesn't wrap inside multibyte sequences
+ const unsigned char s[] = {
+ 0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x72, 0x61, 0x70, 0x74, 0x6f,
+ 0x72, 0x2f, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0x2f,
+ 0x6d, 0x69, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x62, 0x69,
+ 0x6e, 0x2f, 0x2e, 0x2e, 0};
+ std::string str((char *)s);
+ UASSERT(utf8_to_wide(wrap_rows(str, 20)) != L"<invalid UTF-8 string>");
}
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
+// Checks whether a byte is an inner byte for an utf-8 multibyte sequence
+#define IS_UTF8_MULTB_INNER(x) (((unsigned char)x >= 0x80) && ((unsigned char)x <= 0xc0))
+
typedef std::map<std::string, std::string> StringMap;
struct FlagDesc {
* every \p row_len characters whether it breaks a word or not. It is
* intended to be used for, for example, showing paths in the GUI.
*
- * @param from The string to be wrapped into rows.
+ * @note This function doesn't wrap inside utf-8 multibyte sequences and also
+ * counts multibyte sequences correcly as single characters.
+ *
+ * @param from The (utf-8) string to be wrapped into rows.
* @param row_len The row length (in characters).
* @return A new string with the wrapping applied.
*/
{
std::string to;
+ size_t character_idx = 0;
for (size_t i = 0; i < from.size(); i++) {
- if (i != 0 && i % row_len == 0)
+ if (character_idx > 0 && character_idx % row_len == 0)
to += '\n';
+ if (!IS_UTF8_MULTB_INNER(from[i]))
+ character_idx++;
to += from[i];
}