From 77c4a27c4c37b3620defcab94ffd1b2f536c88cb Mon Sep 17 00:00:00 2001 From: Jules Aguillon Date: Mon, 2 Feb 2026 00:20:00 +0100 Subject: Spell checking (#1137) This adds dictionary-based spell checking to the keyboard. The keyboard looks at the word being typed and matches it against a dictionary to either complete the rest of the word or find alternative spellings. The core of this feature is implemented in cdict, which is included as a submodule in vendor/cidct. Cdict is developped at https://github.com/Julow/cdict The dictionaries are hosted at https://github.com/Julow/Unexpected-Keyboard-dictionaries/ The wordlists used to build the dictionaries are the same ones used by HeliBoard from https://codeberg.org/Helium314/aosp-dictionaries - Add an activity accessible from the launcher app that lists available dictionaries with a download button. The DictionaryListView view shows the list of available dictionaries and handles downloading and installing them. - The Dictionaries class manages installed dictionaries. Dictionaries are installed as individual files into the app's private directory. - Available dictionaries are listed in dictionaries.xml, which is generated when building Unexpected-Keyboard-dictionaries. method.xml mentions the dictionary name for each locales. --- gen_method_xml.py | 63 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 15 deletions(-) (limited to 'gen_method_xml.py') diff --git a/gen_method_xml.py b/gen_method_xml.py index 3b1962f..c01f212 100644 --- a/gen_method_xml.py +++ b/gen_method_xml.py @@ -1,3 +1,4 @@ +import xml.etree.ElementTree as ET import itertools as it # This script generates res/xml/method.xml. @@ -78,37 +79,69 @@ LOCALES = [ ] # The locale that is at the beginning of the list -DEFAULT_LOCALE = loc("en", "latin", "latn_qwerty_us", tag="en") +DEFAULT_LOCALE = loc("en", "latin", "latn_qwerty_us", tag="en", dictionary="en_US") -def loc_to_subtype(loc): +def parse_dictionaries(): + tree = ET.parse("res/values/dictionaries.xml") + root = tree.getroot() + return set(( it.text for it in root.findall('*[@name="dictionaries_locale"]/item') )) + +# Available dictionares of the form "de" or "de_CH". +available_dictionaries = parse_dictionaries() + +def subtype_elem(root, loc): tag = loc["tag"].replace("_", "-") extra_keys = ",extra_keys=" + loc["extra_keys"] if "extra_keys" in loc else "" - return f'' + dictionaries = ",dictionary=" + loc["dictionary"] if loc["dictionary"] != None else "" + extra_value = f'script={loc["script"]},default_layout={loc["default_layout"]}{dictionaries}{extra_keys}' + ET.SubElement(root, "subtype", attrib={ + "android:label": "%s", + "android:languageTag": tag, + "android:imeSubtypeLocale": loc["name"], + "android:imeSubtypeMode": "keyboard", + "android:isAsciiCapable": "true", + "android:imeSubtypeExtraValue": extra_value + }) -# Return locales in sorted order with the 'tag' item added. -def compute_tags(): +# Return locales in sorted order with the "tag" and "dictionary" attributes +# added. +def compute_attrs(): + locales_grouped = {} # Locales grouped by language tag def lang(loc): return loc["name"].split("_")[0] - locales_grouped = { k: list(v) for k, v in it.groupby(sorted(LOCALES, key=lang), lang) } + for loc in LOCALES: + locales_grouped.setdefault(lang(loc), []).append(loc) def tag(loc): if "tag" in loc: return loc["tag"] l = lang(loc) if loc["name"] == f"{l}_{l.upper()}": return l # Locales like "fr_FR" # Return a short tag when it's not shared between several locales return l if len(locales_grouped[l]) == 1 else loc["name"] - return [ dict(tag=tag(loc), **loc) for loc in LOCALES ] + def dictionary(loc): + if loc["name"] in available_dictionaries: return loc["name"] + l = lang(loc) + if l in available_dictionaries: return l + return None + def add_attrs(loc): + return dict(tag=tag(loc), dictionary=dictionary(loc), **loc) + return map(add_attrs, LOCALES) def gen(): - locales = compute_tags() - print(f""" - - - {loc_to_subtype(DEFAULT_LOCALE)} - {"\n ".join(sorted(map(loc_to_subtype, locales)))} -""") + """)) + subtype_elem(root, DEFAULT_LOCALE) + for loc in sorted(locales, key=lambda loc: loc["name"]): + subtype_elem(root, loc) + ET.indent(root) + print(ET.tostring(root, encoding="utf-8", xml_declaration=True).decode("UTF-8")) gen() -- cgit v1.2.3