From 77c4a27c4c37b3620defcab94ffd1b2f536c88cb Mon Sep 17 00:00:00 2001
From: Jules Aguillon
Date: Mon, 2 Feb 2026 00:20:00 +0100
Subject: Spell checking (#1137)
This adds dictionary-based spell checking to the keyboard. The keyboard looks at the word being typed and matches it against a dictionary to either complete the rest of the word or find alternative spellings.
The core of this feature is implemented in cdict, which is included as a
submodule in vendor/cidct.
Cdict is developped at https://github.com/Julow/cdict
The dictionaries are hosted at https://github.com/Julow/Unexpected-Keyboard-dictionaries/
The wordlists used to build the dictionaries are the same ones used by
HeliBoard from https://codeberg.org/Helium314/aosp-dictionaries
- Add an activity accessible from the launcher app that lists available
dictionaries with a download button.
The DictionaryListView view shows the list of available dictionaries and
handles downloading and installing them.
- The Dictionaries class manages installed dictionaries. Dictionaries are
installed as individual files into the app's private directory.
- Available dictionaries are listed in dictionaries.xml, which is generated
when building Unexpected-Keyboard-dictionaries.
method.xml mentions the dictionary name for each locales.
---
gen_method_xml.py | 63 ++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 48 insertions(+), 15 deletions(-)
(limited to 'gen_method_xml.py')
diff --git a/gen_method_xml.py b/gen_method_xml.py
index 3b1962f..c01f212 100644
--- a/gen_method_xml.py
+++ b/gen_method_xml.py
@@ -1,3 +1,4 @@
+import xml.etree.ElementTree as ET
import itertools as it
# This script generates res/xml/method.xml.
@@ -78,37 +79,69 @@ LOCALES = [
]
# The locale that is at the beginning of the list
-DEFAULT_LOCALE = loc("en", "latin", "latn_qwerty_us", tag="en")
+DEFAULT_LOCALE = loc("en", "latin", "latn_qwerty_us", tag="en", dictionary="en_US")
-def loc_to_subtype(loc):
+def parse_dictionaries():
+ tree = ET.parse("res/values/dictionaries.xml")
+ root = tree.getroot()
+ return set(( it.text for it in root.findall('*[@name="dictionaries_locale"]/item') ))
+
+# Available dictionares of the form "de" or "de_CH".
+available_dictionaries = parse_dictionaries()
+
+def subtype_elem(root, loc):
tag = loc["tag"].replace("_", "-")
extra_keys = ",extra_keys=" + loc["extra_keys"] if "extra_keys" in loc else ""
- return f''
+ dictionaries = ",dictionary=" + loc["dictionary"] if loc["dictionary"] != None else ""
+ extra_value = f'script={loc["script"]},default_layout={loc["default_layout"]}{dictionaries}{extra_keys}'
+ ET.SubElement(root, "subtype", attrib={
+ "android:label": "%s",
+ "android:languageTag": tag,
+ "android:imeSubtypeLocale": loc["name"],
+ "android:imeSubtypeMode": "keyboard",
+ "android:isAsciiCapable": "true",
+ "android:imeSubtypeExtraValue": extra_value
+ })
-# Return locales in sorted order with the 'tag' item added.
-def compute_tags():
+# Return locales in sorted order with the "tag" and "dictionary" attributes
+# added.
+def compute_attrs():
+ locales_grouped = {} # Locales grouped by language tag
def lang(loc):
return loc["name"].split("_")[0]
- locales_grouped = { k: list(v) for k, v in it.groupby(sorted(LOCALES, key=lang), lang) }
+ for loc in LOCALES:
+ locales_grouped.setdefault(lang(loc), []).append(loc)
def tag(loc):
if "tag" in loc: return loc["tag"]
l = lang(loc)
if loc["name"] == f"{l}_{l.upper()}": return l # Locales like "fr_FR"
# Return a short tag when it's not shared between several locales
return l if len(locales_grouped[l]) == 1 else loc["name"]
- return [ dict(tag=tag(loc), **loc) for loc in LOCALES ]
+ def dictionary(loc):
+ if loc["name"] in available_dictionaries: return loc["name"]
+ l = lang(loc)
+ if l in available_dictionaries: return l
+ return None
+ def add_attrs(loc):
+ return dict(tag=tag(loc), dictionary=dictionary(loc), **loc)
+ return map(add_attrs, LOCALES)
def gen():
- locales = compute_tags()
- print(f"""
-
-
- {loc_to_subtype(DEFAULT_LOCALE)}
- {"\n ".join(sorted(map(loc_to_subtype, locales)))}
-""")
+ """))
+ subtype_elem(root, DEFAULT_LOCALE)
+ for loc in sorted(locales, key=lambda loc: loc["name"]):
+ subtype_elem(root, loc)
+ ET.indent(root)
+ print(ET.tostring(root, encoding="utf-8", xml_declaration=True).decode("UTF-8"))
gen()
--
cgit v1.2.3