diff options
| author | Quinn Cypher | 2024-05-08 07:02:19 -0400 |
|---|---|---|
| committer | GitHub | 2024-05-08 13:02:19 +0200 |
| commit | a91332a9030197f1f43a21d8ba6b4fd0aa279d85 (patch) | |
| tree | ec2dab094acd967481179ebe0103ab311b268c30 /gen_emoji.py | |
| parent | 53e04d57843b94cb89ad1d01bddbe19a98f7562d (diff) | |
| download | unexpected-keyboard-a91332a9030197f1f43a21d8ba6b4fd0aa279d85.tar.gz unexpected-keyboard-a91332a9030197f1f43a21d8ba6b4fd0aa279d85.zip | |
Pull the emoji list from unicode.org (#612)
- Removing unused information (names and descriptions) from the Emoji class
- Creating a Gradle task that generates a more efficient res/raw/emojis.txt file from the most recent Unicode standard
- Saving recently used emoji preferences as emoji values rather than names
- Migrating old user preferences to the new system
Diffstat (limited to 'gen_emoji.py')
| -rw-r--r-- | gen_emoji.py | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/gen_emoji.py b/gen_emoji.py new file mode 100644 index 0000000..bb1ef8e --- /dev/null +++ b/gen_emoji.py @@ -0,0 +1,38 @@ +import urllib.request +import os.path + +EMOJIS_PATH = 'res/raw/emojis.txt' +EMOJI_TEST_PATH = 'emoji-test.txt' +EMOJI_TEST_URL = 'https://unicode.org/Public/emoji/latest/emoji-test.txt' + +def rawEmojiFromCodes(codes): + return ''.join([chr(int(c, 16)) for c in codes]) + +def getEmojiTestContents(): + if os.path.exists(EMOJI_TEST_PATH): + print(f'Using existing {EMOJI_TEST_PATH}') + else: + print(f'Downloading {EMOJI_TEST_URL}') + urllib.request.urlretrieve(EMOJI_TEST_URL, EMOJI_TEST_PATH) + return open(EMOJI_TEST_PATH, mode='r', encoding='UTF-8').read() + + +emoji_list = [] +group_indices = [] +for line in getEmojiTestContents().splitlines(): + if line.startswith('# group:'): + if len(group_indices) == 0 or len(emoji_list) > group_indices[-1]: + group_indices.append(len(emoji_list)) + elif not line.startswith('#') and 'fully-qualified' in line: + codes = line.split(';')[0].split() + emoji_list.append(rawEmojiFromCodes(codes)) + +with open(EMOJIS_PATH, 'w', encoding='UTF-8') as emojis: + for e in emoji_list: + emojis.write(f'{e}\n') + emojis.write('\n') + + emojis.write(' '.join([str(g) for g in group_indices])) + emojis.write('\n') + +print(f'Parsed {len(emoji_list)} emojis in {len(group_indices)}') |
