]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
vt: support Unicode recomposition
authorNicolas Pitre <npitre@baylibre.com>
Thu, 17 Apr 2025 18:45:11 +0000 (14:45 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 26 Apr 2025 09:22:04 +0000 (11:22 +0200)
Try replacing any decomposed Unicode sequence by the corresponding
recomposed code point. Code point to glyph correspondance works best
after recomposition, and this apply mostly to single-width code points
therefore we can't preserve them in their decomposed form anyway.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250417184849.475581-10-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/tty/vt/ucs.c
drivers/tty/vt/vt.c
include/linux/consolemap.h

index 5f9f25bd201b2b871e4d901f6263333804a52984..bf25d63cea61a18fdfd79be55f3f8f38217c1e41 100644 (file)
@@ -59,3 +59,65 @@ bool ucs_is_double_width(u32 cp)
        return cp_in_range(cp, ucs_double_width_ranges,
                           ARRAY_SIZE(ucs_double_width_ranges));
 }
+
+/*
+ * Structure for base with combining mark pairs and resulting recompositions.
+ * Using u16 to save space since all values are within BMP range.
+ */
+struct ucs_recomposition {
+       u16 base;       /* base character */
+       u16 mark;       /* combining mark */
+       u16 recomposed; /* corresponding recomposed character */
+};
+
+#include "ucs_recompose_table.h"
+
+struct compare_key {
+       u16 base;
+       u16 mark;
+};
+
+static int recomposition_cmp(const void *key, const void *element)
+{
+       const struct compare_key *search_key = key;
+       const struct ucs_recomposition *entry = element;
+
+       /* Compare base character first */
+       if (search_key->base < entry->base)
+               return -1;
+       if (search_key->base > entry->base)
+               return 1;
+
+       /* Base characters match, now compare combining character */
+       if (search_key->mark < entry->mark)
+               return -1;
+       if (search_key->mark > entry->mark)
+               return 1;
+
+       /* Both match */
+       return 0;
+}
+
+/**
+ * ucs_recompose() - Attempt to recompose two Unicode characters into a single character.
+ * @base: Base Unicode code point (UCS-4)
+ * @mark: Combining mark Unicode code point (UCS-4)
+ *
+ * Return: Recomposed Unicode code point, or 0 if no recomposition is possible
+ */
+u32 ucs_recompose(u32 base, u32 mark)
+{
+       /* Check if characters are within the range of our table */
+       if (!in_range(base, UCS_RECOMPOSE_MIN_BASE, UCS_RECOMPOSE_MAX_BASE) ||
+           !in_range(mark, UCS_RECOMPOSE_MIN_MARK, UCS_RECOMPOSE_MAX_MARK))
+               return 0;
+
+       struct compare_key key = { base, mark };
+       struct ucs_recomposition *result =
+               __inline_bsearch(&key, ucs_recomposition_table,
+                                ARRAY_SIZE(ucs_recomposition_table),
+                                sizeof(*ucs_recomposition_table),
+                                recomposition_cmp);
+
+       return result ? result->recomposed : 0;
+}
index a989feffad5e97a655e9e8590f4c237160b0b7b2..76554c2040bfc590d373615240a503f9d6ee2ec2 100644 (file)
@@ -2925,9 +2925,9 @@ static void vc_con_rewind(struct vc_data *vc)
 
 #define UCS_VS16       0xfe0f  /* Variation Selector 16 */
 
-static int vc_process_ucs(struct vc_data *vc, int c, int *tc)
+static int vc_process_ucs(struct vc_data *vc, int *c, int *tc)
 {
-       u32 prev_c, curr_c = c;
+       u32 prev_c, curr_c = *c;
 
        if (ucs_is_double_width(curr_c))
                return 2;
@@ -2964,6 +2964,14 @@ static int vc_process_ucs(struct vc_data *vc, int c, int *tc)
                return 1;
        }
 
+       /* try recomposition */
+       prev_c = ucs_recompose(prev_c, curr_c);
+       if (prev_c != 0) {
+               vc_con_rewind(vc);
+               *tc = *c = prev_c;
+               return 1;
+       }
+
        /* Otherwise zero-width code points are ignored. */
        return 0;
 }
@@ -2978,7 +2986,7 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c,
        bool inverse = false;
 
        if (vc->vc_utf && !vc->vc_disp_ctrl) {
-               width = vc_process_ucs(vc, c, &tc);
+               width = vc_process_ucs(vc, &c, &tc);
                if (!width)
                        goto out;
        }
index b3a911866662d918527d8beaec350c1b00039916..8167494229db65df5ed6df985d88c3041f24d832 100644 (file)
@@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
 bool ucs_is_double_width(uint32_t cp);
 bool ucs_is_zero_width(uint32_t cp);
+u32 ucs_recompose(u32 base, u32 mark);
 #else
 static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph,
                bool use_unicode)
@@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp)
 {
        return false;
 }
+
+static inline u32 ucs_recompose(u32 base, u32 mark)
+{
+       return 0;
+}
 #endif /* CONFIG_CONSOLE_TRANSLATIONS */
 
 #endif /* __LINUX_CONSOLEMAP_H__ */