-
Notifications
You must be signed in to change notification settings - Fork 16
/
unicode-data.node.txt
55 lines (46 loc) · 3.92 KB
/
unicode-data.node.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
UNICODE-DATA
TODO: finish documentation of this
VERSION ==> #0.1.0
require('@unicode/unicode-X.Y.Z') #UNICODE
#UCD and emoji data.
#Latest X.Y.Z: 15.1.0
CNUM #Codepoint NUM
CCHAR #Codepoint 'CHAR'
CODEPOINT_MAP #MAP where key is CNUM and value is STR
require('.../FILE') #List of codepoints, with info spread across several FILEs:
# - code-points.js CNUM_ARR
# - symbols.js CCHAR_ARR
# - regex.js REGEXP
#.../FILE is '@unicode/unicode-X.Y.Z/Names/NAME/index.js', e.g. '@unicode/unicode-X.Y.Z/Scripts/SCRIPT/FILE'
#Applies to most UNICODE.* unless specified
UNICODE.Names #STR_ARR (empty)
import '@unicode/unicode-X.Y.Z/ #CODEPOINT_MAP of codepoint names, uppercased, e.g. 'EXCLAMATION MARK'
Names/index.js' #Some names are shared:
# - '<control>' (0-1f, 7f-9f)
# - '[Non ]Private Use High Surrogate' (d800-db7f, db80-dbff), 'Low Surrogate' (dc00-dfff)
# - 'Private use' (e000-f8ff), 'Plane 15-16 Private use' (f0000-10ffff)
# - 'CJK Ideograph[ Extension A-G|K]' (3400-4dbf, 4e00-9ffc, 3400-9999, 20000-2ebe0, 30000-3134a),
# 'Hangul Syllable' (ac00-d7a3), Tangut Ideograph (17000-187f7)
UNICODE.Block #STR_ARR, i.e. grouping close codepoints together.
#Examples: 'Box_Drawing', 'Basic_Latin'
UNICODE.Script #STR_ARR, i.e. grouping close codepoints together.
#Examples: 'Arabic', 'Basic_Latin'
UNICODE.Script_Extensions #STR_ARR
UNICODE.General_Category #STR_ARR, i.e. general attributes.
#Examples: 'Letter', 'Punctuation', 'Number'
UNICODE.Binary_Property #STR_ARR, i.e. specific attributes.
#Examples: 'Cased', 'Dash', 'Alphabetic'
UNICODE.Sequence_Property #STR_ARR, i.e. emoji data.
#Example: 'RGI_Emoji' (main way to detect whether emoji)
UNICODE.Case_Folding #STR_ARR, among 'C', 'F', 'S', 'T'
UNICODE.Line_Break #STR_ARR
#Example: 'Break_After'
UNICODE.Word_Break #STR_ARR
#Example: 'Double_Quote'
UNICODE.Names #STR_ARR
#Example: 'Abbreviation'
UNICODE.Bidi_Class #STR_ARR
UNICODE.Bidi_Mirroring_Glyph #STR_ARR
UNICODE.Bidi_Paired_Bracket_Type #STR_ARR, among 'Open', 'Close', 'None'