Skip to content

Commit

Permalink
Detect scripts in non-BMP code blocks
Browse files Browse the repository at this point in the history
Enabled local glyph rendering in some additional CJKV code blocks, as well as in Tangut, which is thrash-prone like CJKV. Disabled letter spacing for more cursive scripts.
  • Loading branch information
1ec5 committed Aug 13, 2024
1 parent 22bdf94 commit c3ec76d
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 50 deletions.
9 changes: 8 additions & 1 deletion src/render/glyph_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,16 @@ export class GlyphManager {
unicodeBlockLookup['Hangul Syllables'](id) ||
unicodeBlockLookup['Hiragana'](id) ||
unicodeBlockLookup['Ideographic Description Characters'](id) ||
unicodeBlockLookup['Kana Extended-A'](id) ||
unicodeBlockLookup['Kana Extended-B'](id) ||
unicodeBlockLookup['Kana Supplement'](id) ||
unicodeBlockLookup['Kangxi Radicals'](id) ||
unicodeBlockLookup['Katakana'](id) ||
unicodeBlockLookup['Katakana Phonetic Extensions'](id));
unicodeBlockLookup['Katakana Phonetic Extensions'](id) ||
unicodeBlockLookup['Small Kana Extension'](id) ||
unicodeBlockLookup['Tangut'](id) ||
unicodeBlockLookup['Tangut Components'](id) ||
unicodeBlockLookup['Tangut Supplement'](id));
/* eslint-enable new-cap */
}

Expand Down
98 changes: 49 additions & 49 deletions src/util/is_char_in_unicode_block.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ export const unicodeBlockLookup: UnicodeBlockLookup = {
// 'Armenian': (char) => char >= 0x0530 && char <= 0x058F,
//'Hebrew': (char) => char >= 0x0590 && char <= 0x05FF,
'Arabic': (char) => char >= 0x0600 && char <= 0x06FF,
//'Syriac': (char) => char >= 0x0700 && char <= 0x074F,
'Syriac': (char) => char >= 0x0700 && char <= 0x074F,
'Arabic Supplement': (char) => char >= 0x0750 && char <= 0x077F,
// 'Thaana': (char) => char >= 0x0780 && char <= 0x07BF,
// 'NKo': (char) => char >= 0x07C0 && char <= 0x07FF,
// 'Samaritan': (char) => char >= 0x0800 && char <= 0x083F,
// 'Mandaic': (char) => char >= 0x0840 && char <= 0x085F,
// 'Syriac Supplement': (char) => char >= 0x0860 && char <= 0x086F,
'Syriac Supplement': (char) => char >= 0x0860 && char <= 0x086F,
'Arabic Extended-B': (char) => char >= 0x0870 && char <= 0x089F,
'Arabic Extended-A': (char) => char >= 0x08A0 && char <= 0x08FF,
// 'Devanagari': (char) => char >= 0x0900 && char <= 0x097F,
Expand Down Expand Up @@ -53,7 +53,7 @@ export const unicodeBlockLookup: UnicodeBlockLookup = {
// 'Buhid': (char) => char >= 0x1740 && char <= 0x175F,
// 'Tagbanwa': (char) => char >= 0x1760 && char <= 0x177F,
'Khmer': (char) => char >= 0x1780 && char <= 0x17FF,
// 'Mongolian': (char) => char >= 0x1800 && char <= 0x18AF,
'Mongolian': (char) => char >= 0x1800 && char <= 0x18AF,
'Unified Canadian Aboriginal Syllabics Extended': (char) => char >= 0x18B0 && char <= 0x18FF,
// 'Limbu': (char) => char >= 0x1900 && char <= 0x194F,
// 'Tai Le': (char) => char >= 0x1950 && char <= 0x197F,
Expand Down Expand Up @@ -199,7 +199,7 @@ export const unicodeBlockLookup: UnicodeBlockLookup = {
// 'Hatran': (char) => char >= 0x108E0 && char <= 0x108FF,
// 'Phoenician': (char) => char >= 0x10900 && char <= 0x1091F,
// 'Lydian': (char) => char >= 0x10920 && char <= 0x1093F,
// 'Meroitic Hieroglyphs': (char) => char >= 0x10980 && char <= 0x1099F,
'Meroitic Hieroglyphs': (char) => char >= 0x10980 && char <= 0x1099F,
// 'Meroitic Cursive': (char) => char >= 0x109A0 && char <= 0x109FF,
// 'Kharoshthi': (char) => char >= 0x10A00 && char <= 0x10A5F,
// 'Old South Arabian': (char) => char >= 0x10A60 && char <= 0x10A7F,
Expand All @@ -218,7 +218,7 @@ export const unicodeBlockLookup: UnicodeBlockLookup = {
// 'Arabic Extended-C': (char) => char >= 0x10EC0 && char <= 0x10EFF,
// 'Old Sogdian': (char) => char >= 0x10F00 && char <= 0x10F2F,
// 'Sogdian': (char) => char >= 0x10F30 && char <= 0x10F6F,
// 'Old Uyghur': (char) => char >= 0x10F70 && char <= 0x10FAF,
'Old Uyghur': (char) => char >= 0x10F70 && char <= 0x10FAF,
// 'Chorasmian': (char) => char >= 0x10FB0 && char <= 0x10FDF,
// 'Elymaic': (char) => char >= 0x10FE0 && char <= 0x10FFF,
// 'Brahmi': (char) => char >= 0x11000 && char <= 0x1107F,
Expand All @@ -235,19 +235,19 @@ export const unicodeBlockLookup: UnicodeBlockLookup = {
// 'Tulu-Tigalari': (char) => char >= 0x11380 && char <= 0x113FF,
// 'Newa': (char) => char >= 0x11400 && char <= 0x1147F,
// 'Tirhuta': (char) => char >= 0x11480 && char <= 0x114DF,
// 'Siddham': (char) => char >= 0x11580 && char <= 0x115FF,
'Siddham': (char) => char >= 0x11580 && char <= 0x115FF,
// 'Modi': (char) => char >= 0x11600 && char <= 0x1165F,
// 'Mongolian Supplement': (char) => char >= 0x11660 && char <= 0x1167F,
'Mongolian Supplement': (char) => char >= 0x11660 && char <= 0x1167F,
// 'Takri': (char) => char >= 0x11680 && char <= 0x116CF,
// 'Myanmar Extended-C': (char) => char >= 0x116D0 && char <= 0x116FF,
// 'Ahom': (char) => char >= 0x11700 && char <= 0x1174F,
// 'Dogra': (char) => char >= 0x11800 && char <= 0x1184F,
// 'Warang Citi': (char) => char >= 0x118A0 && char <= 0x118FF,
// 'Dives Akuru': (char) => char >= 0x11900 && char <= 0x1195F,
// 'Nandinagari': (char) => char >= 0x119A0 && char <= 0x119FF,
// 'Zanabazar Square': (char) => char >= 0x11A00 && char <= 0x11A4F,
// 'Soyombo': (char) => char >= 0x11A50 && char <= 0x11AAF,
// 'Unified Canadian Aboriginal Syllabics Extended-A': (char) => char >= 0x11AB0 && char <= 0x11ABF,
'Zanabazar Square': (char) => char >= 0x11A00 && char <= 0x11A4F,
'Soyombo': (char) => char >= 0x11A50 && char <= 0x11AAF,
'Unified Canadian Aboriginal Syllabics Extended-A': (char) => char >= 0x11AB0 && char <= 0x11ABF,
// 'Pau Cin Hau': (char) => char >= 0x11AC0 && char <= 0x11AFF,
// 'Devanagari Extended-A': (char) => char >= 0x11B00 && char <= 0x11B5F,
// 'Sunuwar': (char) => char >= 0x11BC0 && char <= 0x11BFF,
Expand All @@ -263,10 +263,10 @@ export const unicodeBlockLookup: UnicodeBlockLookup = {
// 'Cuneiform Numbers and Punctuation': (char) => char >= 0x12400 && char <= 0x1247F,
// 'Early Dynastic Cuneiform': (char) => char >= 0x12480 && char <= 0x1254F,
// 'Cypro-Minoan': (char) => char >= 0x12F90 && char <= 0x12FFF,
// 'Egyptian Hieroglyphs': (char) => char >= 0x13000 && char <= 0x1342F,
// 'Egyptian Hieroglyph Format Controls': (char) => char >= 0x13430 && char <= 0x1345F,
// 'Egyptian Hieroglyphs Extended-A': (char) => char >= 0x13460 && char <= 0x143FF,
// 'Anatolian Hieroglyphs': (char) => char >= 0x14400 && char <= 0x1467F,
'Egyptian Hieroglyphs': (char) => char >= 0x13000 && char <= 0x1342F,
'Egyptian Hieroglyph Format Controls': (char) => char >= 0x13430 && char <= 0x1345F,
'Egyptian Hieroglyphs Extended-A': (char) => char >= 0x13460 && char <= 0x143FF,
'Anatolian Hieroglyphs': (char) => char >= 0x14400 && char <= 0x1467F,
// 'Gurung Khema': (char) => char >= 0x16100 && char <= 0x1613F,
// 'Bamum Supplement': (char) => char >= 0x16800 && char <= 0x16A3F,
// 'Mro': (char) => char >= 0x16A40 && char <= 0x16A6F,
Expand All @@ -276,29 +276,29 @@ export const unicodeBlockLookup: UnicodeBlockLookup = {
// 'Kirat Rai': (char) => char >= 0x16D40 && char <= 0x16D7F,
// 'Medefaidrin': (char) => char >= 0x16E40 && char <= 0x16E9F,
// 'Miao': (char) => char >= 0x16F00 && char <= 0x16F9F,
// 'Ideographic Symbols and Punctuation': (char) => char >= 0x16FE0 && char <= 0x16FFF,
// 'Tangut': (char) => char >= 0x17000 && char <= 0x187FF,
// 'Tangut Components': (char) => char >= 0x18800 && char <= 0x18AFF,
// 'Khitan Small Script': (char) => char >= 0x18B00 && char <= 0x18CFF,
// 'Tangut Supplement': (char) => char >= 0x18D00 && char <= 0x18D7F,
// 'Kana Extended-B': (char) => char >= 0x1AFF0 && char <= 0x1AFFF,
// 'Kana Supplement': (char) => char >= 0x1B000 && char <= 0x1B0FF,
// 'Kana Extended-A': (char) => char >= 0x1B100 && char <= 0x1B12F,
// 'Small Kana Extension': (char) => char >= 0x1B130 && char <= 0x1B16F,
// 'Nushu': (char) => char >= 0x1B170 && char <= 0x1B2FF,
// 'Duployan': (char) => char >= 0x1BC00 && char <= 0x1BC9F,
'Ideographic Symbols and Punctuation': (char) => char >= 0x16FE0 && char <= 0x16FFF,
'Tangut': (char) => char >= 0x17000 && char <= 0x187FF,
'Tangut Components': (char) => char >= 0x18800 && char <= 0x18AFF,
'Khitan Small Script': (char) => char >= 0x18B00 && char <= 0x18CFF,
'Tangut Supplement': (char) => char >= 0x18D00 && char <= 0x18D7F,
'Kana Extended-B': (char) => char >= 0x1AFF0 && char <= 0x1AFFF,
'Kana Supplement': (char) => char >= 0x1B000 && char <= 0x1B0FF,
'Kana Extended-A': (char) => char >= 0x1B100 && char <= 0x1B12F,
'Small Kana Extension': (char) => char >= 0x1B130 && char <= 0x1B16F,
'Nushu': (char) => char >= 0x1B170 && char <= 0x1B2FF,
'Duployan': (char) => char >= 0x1BC00 && char <= 0x1BC9F,
// 'Shorthand Format Controls': (char) => char >= 0x1BCA0 && char <= 0x1BCAF,
// 'Symbols for Legacy Computing Supplement': (char) => char >= 0x1CC00 && char <= 0x1CEBF,
// 'Znamenny Musical Notation': (char) => char >= 0x1CF00 && char <= 0x1CFCF,
// 'Byzantine Musical Symbols': (char) => char >= 0x1D000 && char <= 0x1D0FF,
// 'Musical Symbols': (char) => char >= 0x1D100 && char <= 0x1D1FF,
'Znamenny Musical Notation': (char) => char >= 0x1CF00 && char <= 0x1CFCF,
'Byzantine Musical Symbols': (char) => char >= 0x1D000 && char <= 0x1D0FF,
'Musical Symbols': (char) => char >= 0x1D100 && char <= 0x1D1FF,
// 'Ancient Greek Musical Notation': (char) => char >= 0x1D200 && char <= 0x1D24F,
// 'Kaktovik Numerals': (char) => char >= 0x1D2C0 && char <= 0x1D2DF,
// 'Mayan Numerals': (char) => char >= 0x1D2E0 && char <= 0x1D2FF,
// 'Tai Xuan Jing Symbols': (char) => char >= 0x1D300 && char <= 0x1D35F,
// 'Counting Rod Numerals': (char) => char >= 0x1D360 && char <= 0x1D37F,
'Mayan Numerals': (char) => char >= 0x1D2E0 && char <= 0x1D2FF,
'Tai Xuan Jing Symbols': (char) => char >= 0x1D300 && char <= 0x1D35F,
'Counting Rod Numerals': (char) => char >= 0x1D360 && char <= 0x1D37F,
// 'Mathematical Alphanumeric Symbols': (char) => char >= 0x1D400 && char <= 0x1D7FF,
// 'Sutton SignWriting': (char) => char >= 0x1D800 && char <= 0x1DAAF,
'Sutton SignWriting': (char) => char >= 0x1D800 && char <= 0x1DAAF,
// 'Latin Extended-G': (char) => char >= 0x1DF00 && char <= 0x1DFFF,
// 'Glagolitic Supplement': (char) => char >= 0x1E000 && char <= 0x1E02F,
// 'Cyrillic Extended-D': (char) => char >= 0x1E030 && char <= 0x1E08F,
Expand All @@ -312,22 +312,22 @@ export const unicodeBlockLookup: UnicodeBlockLookup = {
// 'Adlam': (char) => char >= 0x1E900 && char <= 0x1E95F,
// 'Indic Siyaq Numbers': (char) => char >= 0x1EC70 && char <= 0x1ECBF,
// 'Ottoman Siyaq Numbers': (char) => char >= 0x1ED00 && char <= 0x1ED4F,
// 'Arabic Mathematical Alphabetic Symbols': (char) => char >= 0x1EE00 && char <= 0x1EEFF,
// 'Mahjong Tiles': (char) => char >= 0x1F000 && char <= 0x1F02F,
// 'Domino Tiles': (char) => char >= 0x1F030 && char <= 0x1F09F,
// 'Playing Cards': (char) => char >= 0x1F0A0 && char <= 0x1F0FF,
// 'Enclosed Alphanumeric Supplement': (char) => char >= 0x1F100 && char <= 0x1F1FF,
'Arabic Mathematical Alphabetic Symbols': (char) => char >= 0x1EE00 && char <= 0x1EEFF,
'Mahjong Tiles': (char) => char >= 0x1F000 && char <= 0x1F02F,
'Domino Tiles': (char) => char >= 0x1F030 && char <= 0x1F09F,
'Playing Cards': (char) => char >= 0x1F0A0 && char <= 0x1F0FF,
'Enclosed Alphanumeric Supplement': (char) => char >= 0x1F100 && char <= 0x1F1FF,
'Enclosed Ideographic Supplement': (char) => char >= 0x1F200 && char <= 0x1F2FF,
// 'Miscellaneous Symbols and Pictographs': (char) => char >= 0x1F300 && char <= 0x1F5FF,
// 'Emoticons': (char) => char >= 0x1F600 && char <= 0x1F64F,
// 'Ornamental Dingbats': (char) => char >= 0x1F650 && char <= 0x1F67F,
// 'Transport and Map Symbols': (char) => char >= 0x1F680 && char <= 0x1F6FF,
// 'Alchemical Symbols': (char) => char >= 0x1F700 && char <= 0x1F77F,
// 'Geometric Shapes Extended': (char) => char >= 0x1F780 && char <= 0x1F7FF,
'Miscellaneous Symbols and Pictographs': (char) => char >= 0x1F300 && char <= 0x1F5FF,
'Emoticons': (char) => char >= 0x1F600 && char <= 0x1F64F,
'Ornamental Dingbats': (char) => char >= 0x1F650 && char <= 0x1F67F,
'Transport and Map Symbols': (char) => char >= 0x1F680 && char <= 0x1F6FF,
'Alchemical Symbols': (char) => char >= 0x1F700 && char <= 0x1F77F,
'Geometric Shapes Extended': (char) => char >= 0x1F780 && char <= 0x1F7FF,
// 'Supplemental Arrows-C': (char) => char >= 0x1F800 && char <= 0x1F8FF,
// 'Supplemental Symbols and Pictographs': (char) => char >= 0x1F900 && char <= 0x1F9FF,
// 'Chess Symbols': (char) => char >= 0x1FA00 && char <= 0x1FA6F,
// 'Symbols and Pictographs Extended-A': (char) => char >= 0x1FA70 && char <= 0x1FAFF,
'Supplemental Symbols and Pictographs': (char) => char >= 0x1F900 && char <= 0x1F9FF,
'Chess Symbols': (char) => char >= 0x1FA00 && char <= 0x1FA6F,
'Symbols and Pictographs Extended-A': (char) => char >= 0x1FA70 && char <= 0x1FAFF,
// 'Symbols for Legacy Computing': (char) => char >= 0x1FB00 && char <= 0x1FBFF,
'CJK Unified Ideographs Extension B': (char) => char >= 0x20000 && char <= 0x2A6DF,
'CJK Unified Ideographs Extension C': (char) => char >= 0x2A700 && char <= 0x2B73F,
Expand All @@ -337,9 +337,9 @@ export const unicodeBlockLookup: UnicodeBlockLookup = {
'CJK Unified Ideographs Extension I': (char) => char >= 0x2EBF0 && char <= 0x2EE5F,
'CJK Unified Ideographs Extension G': (char) => char >= 0x30000 && char <= 0x3134F,
'CJK Unified Ideographs Extension H': (char) => char >= 0x31350 && char <= 0x323AF,
'CJK Compatibility Ideographs Supplement': (char) => char >= 0x2F800 && char <= 0x2FA1F
'CJK Compatibility Ideographs Supplement': (char) => char >= 0x2F800 && char <= 0x2FA1F,
// 'Tags': (char) => char >= 0xE0000 && char <= 0xE007F,
// 'Variation Selectors Supplement': (char) => char >= 0xE0100 && char <= 0xE01EF,
// 'Supplementary Private Use Area-A': (char) => char >= 0xF0000 && char <= 0xFFFFF,
// 'Supplementary Private Use Area-B': (char) => char >= 0x100000 && char <= 0x10FFFF,
'Supplementary Private Use Area-A': (char) => char >= 0xF0000 && char <= 0xFFFFF,
'Supplementary Private Use Area-B': (char) => char >= 0x100000 && char <= 0x10FFFF
};
Loading

0 comments on commit c3ec76d

Please sign in to comment.