;;;-*-Mode: LISP; Package: CCL -*- ;;; ;;; Copyright (C) 2009 Clozure Associates ;;; Copyright (C) 1994-2001 Digitool, Inc ;;; This file is part of Clozure CL. ;;; ;;; Clozure CL is licensed under the terms of the Lisp Lesser GNU Public ;;; License , known as the LLGPL and distributed with Clozure CL as the ;;; file "LICENSE". The LLGPL consists of a preamble and the LGPL, ;;; which is distributed with Clozure CL as the file "LGPL". Where these ;;; conflict, the preamble takes precedence. ;;; ;;; Clozure CL is referenced in the preamble as the "LIBRARY." ;;; ;;; The LLGPL is also available online at ;;; http://opensource.franz.com/preamble.html ;;; READ and related functions. (in-package "CCL") (eval-when (:compile-toplevel :execute) (defconstant readtable-case-keywords '((:upcase . 1) (:downcase . 2) (:preserve . 0) (:invert . -1) (:studly . -2))) (defmacro readtable-case-keywords () `',readtable-case-keywords)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defmethod make-load-form ((ref package-ref) &optional env) (declare (ignore env)) `(register-package-ref ',(package-ref.name ref))) (defmethod print-object ((ref package-ref) stream) (print-unreadable-object (ref stream :type t :identity t) (format stream "for ~s [~s]" (package-ref.name ref) (package-ref.pkg ref)))) ;;; Maps character names to characters (defvar *name->char* (make-hash-table :test #'equalp)) ;;; Maps characters to (canonical) character names. (defvar *char->name* (make-hash-table :test #'eql)) ;;; This isn't thread-safe. If the user really wants to register character ;;; names from multiple threads, they should do their own locking. (defun register-character-name (name char) (setf (gethash name *name->char*) char) (unless (gethash char *char->name*) (setf (gethash char *char->name*) name))) (dolist (pair '( ;; Standard character names ("Newline" . #\012) ("Space" . #\040) ;; Semi-standard character names ("Rubout" . #\177) ("Page" . #\014) ("Tab" . #\011) ("Backspace" . #\010) ("Return" . #\015) ("Linefeed" . #\012) ;; Other character names. (When available, standard ;; names should be used for printing in preference to ;; any non-standard names.) ("Null" . #\000) ("Nul" . #\000) ("SOH" . #\001) ("STX" . #\002) ("ETX" . #\003) ("EOT" . #\004) ("ENQ" . #\005) ("ACK" . #\006) ("Bell" . #\007) ("BEL" . #\007) ; ^G , used by Franz (and others with bells.) ("Delete" . #\010) ("BS" . #\010) ("HT" . #\011) ("LF" . #\012) ("NL" . #\012) ("PageUp" . #\013) ("VT" . #\013) ("PageDown" . #\014) ("Formfeed" . #\014) ("FF" . #\014) ("NP" . #\014) ("CR" . #\015) ("SO" . #\016) ("SI" . #\017) ("Sub" . #\032) ("ESC" . #\033) ("Escape" . #\033) ("Clear" . #\033) ("Altmode" . #\033) ("ALT" . #\033) ("Fs" . #\034) ("Gs" . #\035) ("Rs" . #\036) ("Us" . #\037) ("DEL" . #\177)("ForwardDelete" . #\177) ("No-Break_Space" . #\u+00a0) ("Inverted_Exclamation_Mark" . #\u+00a1) ("Cent_Sign" . #\u+00a2) ("Pound_Sign" . #\u+00a3) ("Currency_Sign" . #\u+00a4) ("Yen_Sign" . #\u+00a5) ("Broken_Bar" . #\u+00a6) ("Section_Sign" . #\u+00a7) ("Diaeresis" . #\u+00a8) ("Copyright_Sign" . #\u+00a9) ("Feminine_Ordinal_Indicator" . #\u+00aa) ("Left-Pointing_Double_Angle_Quotation_Mark" . #\u+00ab) ("Not_Sign" . #\u+00ac) ("Soft_Hyphen" . #\u+00ad) ("Registered_Sign" . #\u+00ae) ("Macron" . #\u+00af) ("Degree_Sign" . #\u+00b0) ("Plus-Minus_Sign" . #\u+00b1) ("Superscript_Two" . #\u+00b2) ("Superscript_Three" . #\u+00b3) ("Acute_Accent" . #\u+00b4) ("Micro_Sign" . #\u+00b5) ("Pilcrow_Sign" . #\u+00b6) ("Middle_Dot" . #\u+00b7) ("Cedilla" . #\u+00b8) ("Superscript_One" . #\u+00b9) ("Masculine_Ordinal_Indicator" . #\u+00ba) ("Right-Pointing_Double_Angle_Quotation_Mark" . #\u+00bb) ("Vulgar_Fraction_One_Quarter" . #\u+00bc) ("Vulgar_Fraction_One_Half" . #\u+00bd) ("Vulgar_Fraction_Three_Quarters" . #\u+00be) ("Inverted_Question_Mark" . #\u+00bf) ("Latin_Capital_Letter_A_With_Grave" . #\u+00c0) ("Latin_Capital_Letter_A_With_Acute" . #\u+00c1) ("Latin_Capital_Letter_A_With_Circumflex" . #\u+00c2) ("Latin_Capital_Letter_A_With_Tilde" . #\u+00c3) ("Latin_Capital_Letter_A_With_Diaeresis" . #\u+00c4) ("Latin_Capital_Letter_A_With_Ring_Above" . #\u+00c5) ("Latin_Capital_Letter_Ae" . #\u+00c6) ("Latin_Capital_Letter_C_With_Cedilla" . #\u+00c7) ("Latin_Capital_Letter_E_With_Grave" . #\u+00c8) ("Latin_Capital_Letter_E_With_Acute" . #\u+00c9) ("Latin_Capital_Letter_E_With_Circumflex" . #\u+00ca) ("Latin_Capital_Letter_E_With_Diaeresis" . #\u+00cb) ("Latin_Capital_Letter_I_With_Grave" . #\u+00cc) ("Latin_Capital_Letter_I_With_Acute" . #\u+00cd) ("Latin_Capital_Letter_I_With_Circumflex" . #\u+00ce) ("Latin_Capital_Letter_I_With_Diaeresis" . #\u+00cf) ("Latin_Capital_Letter_Eth" . #\u+00d0) ("Latin_Capital_Letter_N_With_Tilde" . #\u+00d1) ("Latin_Capital_Letter_O_With_Grave" . #\u+00d2) ("Latin_Capital_Letter_O_With_Acute" . #\u+00d3) ("Latin_Capital_Letter_O_With_Circumflex" . #\u+00d4) ("Latin_Capital_Letter_O_With_Tilde" . #\u+00d5) ("Latin_Capital_Letter_O_With_Diaeresis" . #\u+00d6) ("Multiplication_Sign" . #\u+00d7) ("Latin_Capital_Letter_O_With_Stroke" . #\u+00d8) ("Latin_Capital_Letter_U_With_Grave" . #\u+00d9) ("Latin_Capital_Letter_U_With_Acute" . #\u+00da) ("Latin_Capital_Letter_U_With_Circumflex" . #\u+00db) ("Latin_Capital_Letter_U_With_Diaeresis" . #\u+00dc) ("Latin_Capital_Letter_Y_With_Acute" . #\u+00dd) ("Latin_Capital_Letter_Thorn" . #\u+00de) ("Latin_Small_Letter_Sharp_S" . #\u+00df) ("Latin_Small_Letter_A_With_Grave" . #\u+00e0) ("Latin_Small_Letter_A_With_Acute" . #\u+00e1) ("Latin_Small_Letter_A_With_Circumflex" . #\u+00e2) ("Latin_Small_Letter_A_With_Tilde" . #\u+00e3) ("Latin_Small_Letter_A_With_Diaeresis" . #\u+00e4) ("Latin_Small_Letter_A_With_Ring_Above" . #\u+00e5) ("Latin_Small_Letter_Ae" . #\u+00e6) ("Latin_Small_Letter_C_With_Cedilla" . #\u+00e7) ("Latin_Small_Letter_E_With_Grave" . #\u+00e8) ("Latin_Small_Letter_E_With_Acute" . #\u+00e9) ("Latin_Small_Letter_E_With_Circumflex" . #\u+00ea) ("Latin_Small_Letter_E_With_Diaeresis" . #\u+00eb) ("Latin_Small_Letter_I_With_Grave" . #\u+00ec) ("Latin_Small_Letter_I_With_Acute" . #\u+00ed) ("Latin_Small_Letter_I_With_Circumflex" . #\u+00ee) ("Latin_Small_Letter_I_With_Diaeresis" . #\u+00ef) ("Latin_Small_Letter_Eth" . #\u+00f0) ("Latin_Small_Letter_N_With_Tilde" . #\u+00f1) ("Latin_Small_Letter_O_With_Grave" . #\u+00f2) ("Latin_Small_Letter_O_With_Acute" . #\u+00f3) ("Latin_Small_Letter_O_With_Circumflex" . #\u+00f4) ("Latin_Small_Letter_O_With_Tilde" . #\u+00f5) ("Latin_Small_Letter_O_With_Diaeresis" . #\u+00f6) ("Division_Sign" . #\u+00f7) ("Latin_Small_Letter_O_With_Stroke" . #\u+00f8) ("Latin_Small_Letter_U_With_Grave" . #\u+00f9) ("Latin_Small_Letter_U_With_Acute" . #\u+00fa) ("Latin_Small_Letter_U_With_Circumflex" . #\u+00fb) ("Latin_Small_Letter_U_With_Diaeresis" . #\u+00fc) ("Latin_Small_Letter_Y_With_Acute" . #\u+00fd) ("Latin_Small_Letter_Thorn" . #\u+00fe) ("Latin_Small_Letter_Y_With_Diaeresis" . #\u+00ff) ("Latin_Capital_Letter_A_With_Macron" . #\u+0100) ("Latin_Small_Letter_A_With_Macron" . #\u+0101) ("Latin_Capital_Letter_A_With_Breve" . #\u+0102) ("Latin_Small_Letter_A_With_Breve" . #\u+0103) ("Latin_Capital_Letter_A_With_Ogonek" . #\u+0104) ("Latin_Small_Letter_A_With_Ogonek" . #\u+0105) ("Latin_Capital_Letter_C_With_Acute" . #\u+0106) ("Latin_Small_Letter_C_With_Acute" . #\u+0107) ("Latin_Capital_Letter_C_With_Circumflex" . #\u+0108) ("Latin_Small_Letter_C_With_Circumflex" . #\u+0109) ("Latin_Capital_Letter_C_With_Dot_Above" . #\u+010a) ("Latin_Small_Letter_C_With_Dot_Above" . #\u+010b) ("Latin_Capital_Letter_C_With_Caron" . #\u+010c) ("Latin_Small_Letter_C_With_Caron" . #\u+010d) ("Latin_Capital_Letter_D_With_Caron" . #\u+010e) ("Latin_Small_Letter_D_With_Caron" . #\u+010f) ("Latin_Capital_Letter_D_With_Stroke" . #\u+0110) ("Latin_Small_Letter_D_With_Stroke" . #\u+0111) ("Latin_Capital_Letter_E_With_Macron" . #\u+0112) ("Latin_Small_Letter_E_With_Macron" . #\u+0113) ("Latin_Capital_Letter_E_With_Breve" . #\u+0114) ("Latin_Small_Letter_E_With_Breve" . #\u+0115) ("Latin_Capital_Letter_E_With_Dot_Above" . #\u+0116) ("Latin_Small_Letter_E_With_Dot_Above" . #\u+0117) ("Latin_Capital_Letter_E_With_Ogonek" . #\u+0118) ("Latin_Small_Letter_E_With_Ogonek" . #\u+0119) ("Latin_Capital_Letter_E_With_Caron" . #\u+011a) ("Latin_Small_Letter_E_With_Caron" . #\u+011b) ("Latin_Capital_Letter_G_With_Circumflex" . #\u+011c) ("Latin_Small_Letter_G_With_Circumflex" . #\u+011d) ("Latin_Capital_Letter_G_With_Breve" . #\u+011e) ("Latin_Small_Letter_G_With_Breve" . #\u+011f) ("Latin_Capital_Letter_G_With_Dot_Above" . #\u+0120) ("Latin_Small_Letter_G_With_Dot_Above" . #\u+0121) ("Latin_Capital_Letter_G_With_Cedilla" . #\u+0122) ("Latin_Small_Letter_G_With_Cedilla" . #\u+0123) ("Latin_Capital_Letter_H_With_Circumflex" . #\u+0124) ("Latin_Small_Letter_H_With_Circumflex" . #\u+0125) ("Latin_Capital_Letter_H_With_Stroke" . #\u+0126) ("Latin_Small_Letter_H_With_Stroke" . #\u+0127) ("Latin_Capital_Letter_I_With_Tilde" . #\u+0128) ("Latin_Small_Letter_I_With_Tilde" . #\u+0129) ("Latin_Capital_Letter_I_With_Macron" . #\u+012a) ("Latin_Small_Letter_I_With_Macron" . #\u+012b) ("Latin_Capital_Letter_I_With_Breve" . #\u+012c) ("Latin_Small_Letter_I_With_Breve" . #\u+012d) ("Latin_Capital_Letter_I_With_Ogonek" . #\u+012e) ("Latin_Small_Letter_I_With_Ogonek" . #\u+012f) ("Latin_Capital_Letter_I_With_Dot_Above" . #\u+0130) ("Latin_Small_Letter_Dotless_I" . #\u+0131) ("Latin_Capital_Ligature_Ij" . #\u+0132) ("Latin_Small_Ligature_Ij" . #\u+0133) ("Latin_Capital_Letter_J_With_Circumflex" . #\u+0134) ("Latin_Small_Letter_J_With_Circumflex" . #\u+0135) ("Latin_Capital_Letter_K_With_Cedilla" . #\u+0136) ("Latin_Small_Letter_K_With_Cedilla" . #\u+0137) ("Latin_Small_Letter_Kra" . #\u+0138) ("Latin_Capital_Letter_L_With_Acute" . #\u+0139) ("Latin_Small_Letter_L_With_Acute" . #\u+013a) ("Latin_Capital_Letter_L_With_Cedilla" . #\u+013b) ("Latin_Small_Letter_L_With_Cedilla" . #\u+013c) ("Latin_Capital_Letter_L_With_Caron" . #\u+013d) ("Latin_Small_Letter_L_With_Caron" . #\u+013e) ("Latin_Capital_Letter_L_With_Middle_Dot" . #\u+013f) ("Latin_Small_Letter_L_With_Middle_Dot" . #\u+0140) ("Latin_Capital_Letter_L_With_Stroke" . #\u+0141) ("Latin_Small_Letter_L_With_Stroke" . #\u+0142) ("Latin_Capital_Letter_N_With_Acute" . #\u+0143) ("Latin_Small_Letter_N_With_Acute" . #\u+0144) ("Latin_Capital_Letter_N_With_Cedilla" . #\u+0145) ("Latin_Small_Letter_N_With_Cedilla" . #\u+0146) ("Latin_Capital_Letter_N_With_Caron" . #\u+0147) ("Latin_Small_Letter_N_With_Caron" . #\u+0148) ("Latin_Small_Letter_N_Preceded_By_Apostrophe" . #\u+0149) ("Latin_Capital_Letter_Eng" . #\u+014a) ("Latin_Small_Letter_Eng" . #\u+014b) ("Latin_Capital_Letter_O_With_Macron" . #\u+014c) ("Latin_Small_Letter_O_With_Macron" . #\u+014d) ("Latin_Capital_Letter_O_With_Breve" . #\u+014e) ("Latin_Small_Letter_O_With_Breve" . #\u+014f) ("Latin_Capital_Letter_O_With_Double_Acute" . #\u+0150) ("Latin_Small_Letter_O_With_Double_Acute" . #\u+0151) ("Latin_Capital_Ligature_Oe" . #\u+0152) ("Latin_Small_Ligature_Oe" . #\u+0153) ("Latin_Capital_Letter_R_With_Acute" . #\u+0154) ("Latin_Small_Letter_R_With_Acute" . #\u+0155) ("Latin_Capital_Letter_R_With_Cedilla" . #\u+0156) ("Latin_Small_Letter_R_With_Cedilla" . #\u+0157) ("Latin_Capital_Letter_R_With_Caron" . #\u+0158) ("Latin_Small_Letter_R_With_Caron" . #\u+0159) ("Latin_Capital_Letter_S_With_Acute" . #\u+015a) ("Latin_Small_Letter_S_With_Acute" . #\u+015b) ("Latin_Capital_Letter_S_With_Circumflex" . #\u+015c) ("Latin_Small_Letter_S_With_Circumflex" . #\u+015d) ("Latin_Capital_Letter_S_With_Cedilla" . #\u+015e) ("Latin_Small_Letter_S_With_Cedilla" . #\u+015f) ("Latin_Capital_Letter_S_With_Caron" . #\u+0160) ("Latin_Small_Letter_S_With_Caron" . #\u+0161) ("Latin_Capital_Letter_T_With_Cedilla" . #\u+0162) ("Latin_Small_Letter_T_With_Cedilla" . #\u+0163) ("Latin_Capital_Letter_T_With_Caron" . #\u+0164) ("Latin_Small_Letter_T_With_Caron" . #\u+0165) ("Latin_Capital_Letter_T_With_Stroke" . #\u+0166) ("Latin_Small_Letter_T_With_Stroke" . #\u+0167) ("Latin_Capital_Letter_U_With_Tilde" . #\u+0168) ("Latin_Small_Letter_U_With_Tilde" . #\u+0169) ("Latin_Capital_Letter_U_With_Macron" . #\u+016a) ("Latin_Small_Letter_U_With_Macron" . #\u+016b) ("Latin_Capital_Letter_U_With_Breve" . #\u+016c) ("Latin_Small_Letter_U_With_Breve" . #\u+016d) ("Latin_Capital_Letter_U_With_Ring_Above" . #\u+016e) ("Latin_Small_Letter_U_With_Ring_Above" . #\u+016f) ("Latin_Capital_Letter_U_With_Double_Acute" . #\u+0170) ("Latin_Small_Letter_U_With_Double_Acute" . #\u+0171) ("Latin_Capital_Letter_U_With_Ogonek" . #\u+0172) ("Latin_Small_Letter_U_With_Ogonek" . #\u+0173) ("Latin_Capital_Letter_W_With_Circumflex" . #\u+0174) ("Latin_Small_Letter_W_With_Circumflex" . #\u+0175) ("Latin_Capital_Letter_Y_With_Circumflex" . #\u+0176) ("Latin_Small_Letter_Y_With_Circumflex" . #\u+0177) ("Latin_Capital_Letter_Y_With_Diaeresis" . #\u+0178) ("Latin_Capital_Letter_Z_With_Acute" . #\u+0179) ("Latin_Small_Letter_Z_With_Acute" . #\u+017a) ("Latin_Capital_Letter_Z_With_Dot_Above" . #\u+017b) ("Latin_Small_Letter_Z_With_Dot_Above" . #\u+017c) ("Latin_Capital_Letter_Z_With_Caron" . #\u+017d) ("Latin_Small_Letter_Z_With_Caron" . #\u+017e) ("Latin_Small_Letter_Long_S" . #\u+017f) ("Latin_Small_Letter_B_With_Stroke" . #\u+0180) ("Latin_Capital_Letter_B_With_Hook" . #\u+0181) ("Latin_Capital_Letter_B_With_Topbar" . #\u+0182) ("Latin_Small_Letter_B_With_Topbar" . #\u+0183) ("Latin_Capital_Letter_Tone_Six" . #\u+0184) ("Latin_Small_Letter_Tone_Six" . #\u+0185) ("Latin_Capital_Letter_Open_O" . #\u+0186) ("Latin_Capital_Letter_C_With_Hook" . #\u+0187) ("Latin_Small_Letter_C_With_Hook" . #\u+0188) ("Latin_Capital_Letter_African_D" . #\u+0189) ("Latin_Capital_Letter_D_With_Hook" . #\u+018a) ("Latin_Capital_Letter_D_With_Topbar" . #\u+018b) ("Latin_Small_Letter_D_With_Topbar" . #\u+018c) ("Latin_Small_Letter_Turned_Delta" . #\u+018d) ("Latin_Capital_Letter_Reversed_E" . #\u+018e) ("Latin_Capital_Letter_Schwa" . #\u+018f) ("Latin_Capital_Letter_Open_E" . #\u+0190) ("Latin_Capital_Letter_F_With_Hook" . #\u+0191) ("Latin_Small_Letter_F_With_Hook" . #\u+0192) ("Latin_Capital_Letter_G_With_Hook" . #\u+0193) ("Latin_Capital_Letter_Gamma" . #\u+0194) ("Latin_Small_Letter_Hv" . #\u+0195) ("Latin_Capital_Letter_Iota" . #\u+0196) ("Latin_Capital_Letter_I_With_Stroke" . #\u+0197) ("Latin_Capital_Letter_K_With_Hook" . #\u+0198) ("Latin_Small_Letter_K_With_Hook" . #\u+0199) ("Latin_Small_Letter_L_With_Bar" . #\u+019a) ("Latin_Small_Letter_Lambda_With_Stroke" . #\u+019b) ("Latin_Capital_Letter_Turned_M" . #\u+019c) ("Latin_Capital_Letter_N_With_Left_Hook" . #\u+019d) ("Latin_Small_Letter_N_With_Long_Right_Leg" . #\u+019e) ("Latin_Capital_Letter_O_With_Middle_Tilde" . #\u+019f) ("Latin_Capital_Letter_O_With_Horn" . #\u+01a0) ("Latin_Small_Letter_O_With_Horn" . #\u+01a1) ("Latin_Capital_Letter_Oi" . #\u+01a2) ("Latin_Small_Letter_Oi" . #\u+01a3) ("Latin_Capital_Letter_P_With_Hook" . #\u+01a4) ("Latin_Small_Letter_P_With_Hook" . #\u+01a5) ("Latin_Letter_Yr" . #\u+01a6) ("Latin_Capital_Letter_Tone_Two" . #\u+01a7) ("Latin_Small_Letter_Tone_Two" . #\u+01a8) ("Latin_Capital_Letter_Esh" . #\u+01a9) ("Latin_Letter_Reversed_Esh_Loop" . #\u+01aa) ("Latin_Small_Letter_T_With_Palatal_Hook" . #\u+01ab) ("Latin_Capital_Letter_T_With_Hook" . #\u+01ac) ("Latin_Small_Letter_T_With_Hook" . #\u+01ad) ("Latin_Capital_Letter_T_With_Retroflex_Hook" . #\u+01ae) ("Latin_Capital_Letter_U_With_Horn" . #\u+01af) ("Latin_Small_Letter_U_With_Horn" . #\u+01b0) ("Latin_Capital_Letter_Upsilon" . #\u+01b1) ("Latin_Capital_Letter_V_With_Hook" . #\u+01b2) ("Latin_Capital_Letter_Y_With_Hook" . #\u+01b3) ("Latin_Small_Letter_Y_With_Hook" . #\u+01b4) ("Latin_Capital_Letter_Z_With_Stroke" . #\u+01b5) ("Latin_Small_Letter_Z_With_Stroke" . #\u+01b6) ("Latin_Capital_Letter_Ezh" . #\u+01b7) ("Latin_Capital_Letter_Ezh_Reversed" . #\u+01b8) ("Latin_Small_Letter_Ezh_Reversed" . #\u+01b9) ("Latin_Small_Letter_Ezh_With_Tail" . #\u+01ba) ("Latin_Letter_Two_With_Stroke" . #\u+01bb) ("Latin_Capital_Letter_Tone_Five" . #\u+01bc) ("Latin_Small_Letter_Tone_Five" . #\u+01bd) ("Latin_Letter_Inverted_Glottal_Stop_With_Stroke" . #\u+01be) ("Latin_Letter_Wynn" . #\u+01bf) ("Latin_Letter_Dental_Click" . #\u+01c0) ("Latin_Letter_Lateral_Click" . #\u+01c1) ("Latin_Letter_Alveolar_Click" . #\u+01c2) ("Latin_Letter_Retroflex_Click" . #\u+01c3) ("Latin_Capital_Letter_Dz_With_Caron" . #\u+01c4) ("Latin_Capital_Letter_D_With_Small_Letter_Z_With_Caron" . #\u+01c5) ("Latin_Small_Letter_Dz_With_Caron" . #\u+01c6) ("Latin_Capital_Letter_Lj" . #\u+01c7) ("Latin_Capital_Letter_L_With_Small_Letter_J" . #\u+01c8) ("Latin_Small_Letter_Lj" . #\u+01c9) ("Latin_Capital_Letter_Nj" . #\u+01ca) ("Latin_Capital_Letter_N_With_Small_Letter_J" . #\u+01cb) ("Latin_Small_Letter_Nj" . #\u+01cc) ("Latin_Capital_Letter_A_With_Caron" . #\u+01cd) ("Latin_Small_Letter_A_With_Caron" . #\u+01ce) ("Latin_Capital_Letter_I_With_Caron" . #\u+01cf) ("Latin_Small_Letter_I_With_Caron" . #\u+01d0) ("Latin_Capital_Letter_O_With_Caron" . #\u+01d1) ("Latin_Small_Letter_O_With_Caron" . #\u+01d2) ("Latin_Capital_Letter_U_With_Caron" . #\u+01d3) ("Latin_Small_Letter_U_With_Caron" . #\u+01d4) ("Latin_Capital_Letter_U_With_Diaeresis_And_Macron" . #\u+01d5) ("Latin_Small_Letter_U_With_Diaeresis_And_Macron" . #\u+01d6) ("Latin_Capital_Letter_U_With_Diaeresis_And_Acute" . #\u+01d7) ("Latin_Small_Letter_U_With_Diaeresis_And_Acute" . #\u+01d8) ("Latin_Capital_Letter_U_With_Diaeresis_And_Caron" . #\u+01d9) ("Latin_Small_Letter_U_With_Diaeresis_And_Caron" . #\u+01da) ("Latin_Capital_Letter_U_With_Diaeresis_And_Grave" . #\u+01db) ("Latin_Small_Letter_U_With_Diaeresis_And_Grave" . #\u+01dc) ("Latin_Small_Letter_Turned_E" . #\u+01dd) ("Latin_Capital_Letter_A_With_Diaeresis_And_Macron" . #\u+01de) ("Latin_Small_Letter_A_With_Diaeresis_And_Macron" . #\u+01df) ("Latin_Capital_Letter_A_With_Dot_Above_And_Macron" . #\u+01e0) ("Latin_Small_Letter_A_With_Dot_Above_And_Macron" . #\u+01e1) ("Latin_Capital_Letter_Ae_With_Macron" . #\u+01e2) ("Latin_Small_Letter_Ae_With_Macron" . #\u+01e3) ("Latin_Capital_Letter_G_With_Stroke" . #\u+01e4) ("Latin_Small_Letter_G_With_Stroke" . #\u+01e5) ("Latin_Capital_Letter_G_With_Caron" . #\u+01e6) ("Latin_Small_Letter_G_With_Caron" . #\u+01e7) ("Latin_Capital_Letter_K_With_Caron" . #\u+01e8) ("Latin_Small_Letter_K_With_Caron" . #\u+01e9) ("Latin_Capital_Letter_O_With_Ogonek" . #\u+01ea) ("Latin_Small_Letter_O_With_Ogonek" . #\u+01eb) ("Latin_Capital_Letter_O_With_Ogonek_And_Macron" . #\u+01ec) ("Latin_Small_Letter_O_With_Ogonek_And_Macron" . #\u+01ed) ("Latin_Capital_Letter_Ezh_With_Caron" . #\u+01ee) ("Latin_Small_Letter_Ezh_With_Caron" . #\u+01ef) ("Latin_Small_Letter_J_With_Caron" . #\u+01f0) ("Latin_Capital_Letter_Dz" . #\u+01f1) ("Latin_Capital_Letter_D_With_Small_Letter_Z" . #\u+01f2) ("Latin_Small_Letter_Dz" . #\u+01f3) ("Latin_Capital_Letter_G_With_Acute" . #\u+01f4) ("Latin_Small_Letter_G_With_Acute" . #\u+01f5) ("Latin_Capital_Letter_Hwair" . #\u+01f6) ("Latin_Capital_Letter_Wynn" . #\u+01f7) ("Latin_Capital_Letter_N_With_Grave" . #\u+01f8) ("Latin_Small_Letter_N_With_Grave" . #\u+01f9) ("Latin_Capital_Letter_A_With_Ring_Above_And_Acute" . #\u+01fa) ("Latin_Small_Letter_A_With_Ring_Above_And_Acute" . #\u+01fb) ("Latin_Capital_Letter_Ae_With_Acute" . #\u+01fc) ("Latin_Small_Letter_Ae_With_Acute" . #\u+01fd) ("Latin_Capital_Letter_O_With_Stroke_And_Acute" . #\u+01fe) ("Latin_Small_Letter_O_With_Stroke_And_Acute" . #\u+01ff) ("Latin_Capital_Letter_A_With_Double_Grave" . #\u+0200) ("Latin_Small_Letter_A_With_Double_Grave" . #\u+0201) ("Latin_Capital_Letter_A_With_Inverted_Breve" . #\u+0202) ("Latin_Small_Letter_A_With_Inverted_Breve" . #\u+0203) ("Latin_Capital_Letter_E_With_Double_Grave" . #\u+0204) ("Latin_Small_Letter_E_With_Double_Grave" . #\u+0205) ("Latin_Capital_Letter_E_With_Inverted_Breve" . #\u+0206) ("Latin_Small_Letter_E_With_Inverted_Breve" . #\u+0207) ("Latin_Capital_Letter_I_With_Double_Grave" . #\u+0208) ("Latin_Small_Letter_I_With_Double_Grave" . #\u+0209) ("Latin_Capital_Letter_I_With_Inverted_Breve" . #\u+020a) ("Latin_Small_Letter_I_With_Inverted_Breve" . #\u+020b) ("Latin_Capital_Letter_O_With_Double_Grave" . #\u+020c) ("Latin_Small_Letter_O_With_Double_Grave" . #\u+020d) ("Latin_Capital_Letter_O_With_Inverted_Breve" . #\u+020e) ("Latin_Small_Letter_O_With_Inverted_Breve" . #\u+020f) ("Latin_Capital_Letter_R_With_Double_Grave" . #\u+0210) ("Latin_Small_Letter_R_With_Double_Grave" . #\u+0211) ("Latin_Capital_Letter_R_With_Inverted_Breve" . #\u+0212) ("Latin_Small_Letter_R_With_Inverted_Breve" . #\u+0213) ("Latin_Capital_Letter_U_With_Double_Grave" . #\u+0214) ("Latin_Small_Letter_U_With_Double_Grave" . #\u+0215) ("Latin_Capital_Letter_U_With_Inverted_Breve" . #\u+0216) ("Latin_Small_Letter_U_With_Inverted_Breve" . #\u+0217) ("Latin_Capital_Letter_S_With_Comma_Below" . #\u+0218) ("Latin_Small_Letter_S_With_Comma_Below" . #\u+0219) ("Latin_Capital_Letter_T_With_Comma_Below" . #\u+021a) ("Latin_Small_Letter_T_With_Comma_Below" . #\u+021b) ("Latin_Capital_Letter_Yogh" . #\u+021c) ("Latin_Small_Letter_Yogh" . #\u+021d) ("Latin_Capital_Letter_H_With_Caron" . #\u+021e) ("Latin_Small_Letter_H_With_Caron" . #\u+021f) ("Latin_Capital_Letter_N_With_Long_Right_Leg" . #\u+0220) ("Latin_Small_Letter_D_With_Curl" . #\u+0221) ("Latin_Capital_Letter_Ou" . #\u+0222) ("Latin_Small_Letter_Ou" . #\u+0223) ("Latin_Capital_Letter_Z_With_Hook" . #\u+0224) ("Latin_Small_Letter_Z_With_Hook" . #\u+0225) ("Latin_Capital_Letter_A_With_Dot_Above" . #\u+0226) ("Latin_Small_Letter_A_With_Dot_Above" . #\u+0227) ("Latin_Capital_Letter_E_With_Cedilla" . #\u+0228) ("Latin_Small_Letter_E_With_Cedilla" . #\u+0229) ("Latin_Capital_Letter_O_With_Diaeresis_And_Macron" . #\u+022a) ("Latin_Small_Letter_O_With_Diaeresis_And_Macron" . #\u+022b) ("Latin_Capital_Letter_O_With_Tilde_And_Macron" . #\u+022c) ("Latin_Small_Letter_O_With_Tilde_And_Macron" . #\u+022d) ("Latin_Capital_Letter_O_With_Dot_Above" . #\u+022e) ("Latin_Small_Letter_O_With_Dot_Above" . #\u+022f) ("Latin_Capital_Letter_O_With_Dot_Above_And_Macron" . #\u+0230) ("Latin_Small_Letter_O_With_Dot_Above_And_Macron" . #\u+0231) ("Latin_Capital_Letter_Y_With_Macron" . #\u+0232) ("Latin_Small_Letter_Y_With_Macron" . #\u+0233) ("Latin_Small_Letter_L_With_Curl" . #\u+0234) ("Latin_Small_Letter_N_With_Curl" . #\u+0235) ("Latin_Small_Letter_T_With_Curl" . #\u+0236) ("Latin_Small_Letter_Dotless_J" . #\u+0237) ("Latin_Small_Letter_Db_Digraph" . #\u+0238) ("Latin_Small_Letter_Qp_Digraph" . #\u+0239) ("Latin_Capital_Letter_A_With_Stroke" . #\u+023a) ("Latin_Capital_Letter_C_With_Stroke" . #\u+023b) ("Latin_Small_Letter_C_With_Stroke" . #\u+023c) ("Latin_Capital_Letter_L_With_Bar" . #\u+023d) ("Latin_Capital_Letter_T_With_Diagonal_Stroke" . #\u+023e) ("Latin_Small_Letter_S_With_Swash_Tail" . #\u+023f) ("Latin_Small_Letter_Z_With_Swash_Tail" . #\u+0240) ("Latin_Capital_Letter_Glottal_Stop" . #\u+0241) ("Latin_Small_Letter_Glottal_Stop" . #\u+0242) ("Latin_Capital_Letter_B_With_Stroke" . #\u+0243) ("Latin_Capital_Letter_U_Bar" . #\u+0244) ("Latin_Capital_Letter_Turned_V" . #\u+0245) ("Latin_Capital_Letter_E_With_Stroke" . #\u+0246) ("Latin_Small_Letter_E_With_Stroke" . #\u+0247) ("Latin_Capital_Letter_J_With_Stroke" . #\u+0248) ("Latin_Small_Letter_J_With_Stroke" . #\u+0249) ("Latin_Capital_Letter_Small_Q_With_Hook_Tail" . #\u+024a) ("Latin_Small_Letter_Q_With_Hook_Tail" . #\u+024b) ("Latin_Capital_Letter_R_With_Stroke" . #\u+024c) ("Latin_Small_Letter_R_With_Stroke" . #\u+024d) ("Latin_Capital_Letter_Y_With_Stroke" . #\u+024e) ("Latin_Small_Letter_Y_With_Stroke" . #\u+024f) ("Latin_Small_Letter_Turned_A" . #\u+0250) ("Latin_Small_Letter_Alpha" . #\u+0251) ("Latin_Small_Letter_Turned_Alpha" . #\u+0252) ("Latin_Small_Letter_B_With_Hook" . #\u+0253) ("Latin_Small_Letter_Open_O" . #\u+0254) ("Latin_Small_Letter_C_With_Curl" . #\u+0255) ("Latin_Small_Letter_D_With_Tail" . #\u+0256) ("Latin_Small_Letter_D_With_Hook" . #\u+0257) ("Latin_Small_Letter_Reversed_E" . #\u+0258) ("Latin_Small_Letter_Schwa" . #\u+0259) ("Latin_Small_Letter_Schwa_With_Hook" . #\u+025a) ("Latin_Small_Letter_Open_E" . #\u+025b) ("Latin_Small_Letter_Reversed_Open_E" . #\u+025c) ("Latin_Small_Letter_Reversed_Open_E_With_Hook" . #\u+025d) ("Latin_Small_Letter_Closed_Reversed_Open_E" . #\u+025e) ("Latin_Small_Letter_Dotless_J_With_Stroke" . #\u+025f) ("Latin_Small_Letter_G_With_Hook" . #\u+0260) ("Latin_Small_Letter_Script_G" . #\u+0261) ("Latin_Letter_Small_Capital_G" . #\u+0262) ("Latin_Small_Letter_Gamma" . #\u+0263) ("Latin_Small_Letter_Rams_Horn" . #\u+0264) ("Latin_Small_Letter_Turned_H" . #\u+0265) ("Latin_Small_Letter_H_With_Hook" . #\u+0266) ("Latin_Small_Letter_Heng_With_Hook" . #\u+0267) ("Latin_Small_Letter_I_With_Stroke" . #\u+0268) ("Latin_Small_Letter_Iota" . #\u+0269) ("Latin_Letter_Small_Capital_I" . #\u+026a) ("Latin_Small_Letter_L_With_Middle_Tilde" . #\u+026b) ("Latin_Small_Letter_L_With_Belt" . #\u+026c) ("Latin_Small_Letter_L_With_Retroflex_Hook" . #\u+026d) ("Latin_Small_Letter_Lezh" . #\u+026e) ("Latin_Small_Letter_Turned_M" . #\u+026f) ("Latin_Small_Letter_Turned_M_With_Long_Leg" . #\u+0270) ("Latin_Small_Letter_M_With_Hook" . #\u+0271) ("Latin_Small_Letter_N_With_Left_Hook" . #\u+0272) ("Latin_Small_Letter_N_With_Retroflex_Hook" . #\u+0273) ("Latin_Letter_Small_Capital_N" . #\u+0274) ("Latin_Small_Letter_Barred_O" . #\u+0275) ("Latin_Letter_Small_Capital_Oe" . #\u+0276) ("Latin_Small_Letter_Closed_Omega" . #\u+0277) ("Latin_Small_Letter_Phi" . #\u+0278) ("Latin_Small_Letter_Turned_R" . #\u+0279) ("Latin_Small_Letter_Turned_R_With_Long_Leg" . #\u+027a) ("Latin_Small_Letter_Turned_R_With_Hook" . #\u+027b) ("Latin_Small_Letter_R_With_Long_Leg" . #\u+027c) ("Latin_Small_Letter_R_With_Tail" . #\u+027d) ("Latin_Small_Letter_R_With_Fishhook" . #\u+027e) ("Latin_Small_Letter_Reversed_R_With_Fishhook" . #\u+027f) ("Latin_Letter_Small_Capital_R" . #\u+0280) ("Latin_Letter_Small_Capital_Inverted_R" . #\u+0281) ("Latin_Small_Letter_S_With_Hook" . #\u+0282) ("Latin_Small_Letter_Esh" . #\u+0283) ("Latin_Small_Letter_Dotless_J_With_Stroke_And_Hook" . #\u+0284) ("Latin_Small_Letter_Squat_Reversed_Esh" . #\u+0285) ("Latin_Small_Letter_Esh_With_Curl" . #\u+0286) ("Latin_Small_Letter_Turned_T" . #\u+0287) ("Latin_Small_Letter_T_With_Retroflex_Hook" . #\u+0288) ("Latin_Small_Letter_U_Bar" . #\u+0289) ("Latin_Small_Letter_Upsilon" . #\u+028a) ("Latin_Small_Letter_V_With_Hook" . #\u+028b) ("Latin_Small_Letter_Turned_V" . #\u+028c) ("Latin_Small_Letter_Turned_W" . #\u+028d) ("Latin_Small_Letter_Turned_Y" . #\u+028e) ("Latin_Letter_Small_Capital_Y" . #\u+028f) ("Latin_Small_Letter_Z_With_Retroflex_Hook" . #\u+0290) ("Latin_Small_Letter_Z_With_Curl" . #\u+0291) ("Latin_Small_Letter_Ezh" . #\u+0292) ("Latin_Small_Letter_Ezh_With_Curl" . #\u+0293) ("Latin_Letter_Glottal_Stop" . #\u+0294) ("Latin_Letter_Pharyngeal_Voiced_Fricative" . #\u+0295) ("Latin_Letter_Inverted_Glottal_Stop" . #\u+0296) ("Latin_Letter_Stretched_C" . #\u+0297) ("Latin_Letter_Bilabial_Click" . #\u+0298) ("Latin_Letter_Small_Capital_B" . #\u+0299) ("Latin_Small_Letter_Closed_Open_E" . #\u+029a) ("Latin_Letter_Small_Capital_G_With_Hook" . #\u+029b) ("Latin_Letter_Small_Capital_H" . #\u+029c) ("Latin_Small_Letter_J_With_Crossed-Tail" . #\u+029d) ("Latin_Small_Letter_Turned_K" . #\u+029e) ("Latin_Letter_Small_Capital_L" . #\u+029f) ("Latin_Small_Letter_Q_With_Hook" . #\u+02a0) ("Latin_Letter_Glottal_Stop_With_Stroke" . #\u+02a1) ("Latin_Letter_Reversed_Glottal_Stop_With_Stroke" . #\u+02a2) ("Latin_Small_Letter_Dz_Digraph" . #\u+02a3) ("Latin_Small_Letter_Dezh_Digraph" . #\u+02a4) ("Latin_Small_Letter_Dz_Digraph_With_Curl" . #\u+02a5) ("Latin_Small_Letter_Ts_Digraph" . #\u+02a6) ("Latin_Small_Letter_Tesh_Digraph" . #\u+02a7) ("Latin_Small_Letter_Tc_Digraph_With_Curl" . #\u+02a8) ("Latin_Small_Letter_Feng_Digraph" . #\u+02a9) ("Latin_Small_Letter_Ls_Digraph" . #\u+02aa) ("Latin_Small_Letter_Lz_Digraph" . #\u+02ab) ("Latin_Letter_Bilabial_Percussive" . #\u+02ac) ("Latin_Letter_Bidental_Percussive" . #\u+02ad) ("Latin_Small_Letter_Turned_H_With_Fishhook" . #\u+02ae) ("Latin_Small_Letter_Turned_H_With_Fishhook_And_Tail" . #\u+02af) ("Modifier_Letter_Small_H" . #\u+02b0) ("Modifier_Letter_Small_H_With_Hook" . #\u+02b1) ("Modifier_Letter_Small_J" . #\u+02b2) ("Modifier_Letter_Small_R" . #\u+02b3) ("Modifier_Letter_Small_Turned_R" . #\u+02b4) ("Modifier_Letter_Small_Turned_R_With_Hook" . #\u+02b5) ("Modifier_Letter_Small_Capital_Inverted_R" . #\u+02b6) ("Modifier_Letter_Small_W" . #\u+02b7) ("Modifier_Letter_Small_Y" . #\u+02b8) ("Modifier_Letter_Prime" . #\u+02b9) ("Modifier_Letter_Double_Prime" . #\u+02ba) ("Modifier_Letter_Turned_Comma" . #\u+02bb) ("Modifier_Letter_Apostrophe" . #\u+02bc) ("Modifier_Letter_Reversed_Comma" . #\u+02bd) ("Modifier_Letter_Right_Half_Ring" . #\u+02be) ("Modifier_Letter_Left_Half_Ring" . #\u+02bf) ("Modifier_Letter_Glottal_Stop" . #\u+02c0) ("Modifier_Letter_Reversed_Glottal_Stop" . #\u+02c1) ("Modifier_Letter_Left_Arrowhead" . #\u+02c2) ("Modifier_Letter_Right_Arrowhead" . #\u+02c3) ("Modifier_Letter_Up_Arrowhead" . #\u+02c4) ("Modifier_Letter_Down_Arrowhead" . #\u+02c5) ("Modifier_Letter_Circumflex_Accent" . #\u+02c6) ("Caron" . #\u+02c7) ("Modifier_Letter_Vertical_Line" . #\u+02c8) ("Modifier_Letter_Macron" . #\u+02c9) ("Modifier_Letter_Acute_Accent" . #\u+02ca) ("Modifier_Letter_Grave_Accent" . #\u+02cb) ("Modifier_Letter_Low_Vertical_Line" . #\u+02cc) ("Modifier_Letter_Low_Macron" . #\u+02cd) ("Modifier_Letter_Low_Grave_Accent" . #\u+02ce) ("Modifier_Letter_Low_Acute_Accent" . #\u+02cf) ("Modifier_Letter_Triangular_Colon" . #\u+02d0) ("Modifier_Letter_Half_Triangular_Colon" . #\u+02d1) ("Modifier_Letter_Centred_Right_Half_Ring" . #\u+02d2) ("Modifier_Letter_Centred_Left_Half_Ring" . #\u+02d3) ("Modifier_Letter_Up_Tack" . #\u+02d4) ("Modifier_Letter_Down_Tack" . #\u+02d5) ("Modifier_Letter_Plus_Sign" . #\u+02d6) ("Modifier_Letter_Minus_Sign" . #\u+02d7) ("Breve" . #\u+02d8) ("Dot_Above" . #\u+02d9) ("Ring_Above" . #\u+02da) ("Ogonek" . #\u+02db) ("Small_Tilde" . #\u+02dc) ("Double_Acute_Accent" . #\u+02dd) ("Modifier_Letter_Rhotic_Hook" . #\u+02de) ("Modifier_Letter_Cross_Accent" . #\u+02df) ("Modifier_Letter_Small_Gamma" . #\u+02e0) ("Modifier_Letter_Small_L" . #\u+02e1) ("Modifier_Letter_Small_S" . #\u+02e2) ("Modifier_Letter_Small_X" . #\u+02e3) ("Modifier_Letter_Small_Reversed_Glottal_Stop" . #\u+02e4) ("Modifier_Letter_Extra-High_Tone_Bar" . #\u+02e5) ("Modifier_Letter_High_Tone_Bar" . #\u+02e6) ("Modifier_Letter_Mid_Tone_Bar" . #\u+02e7) ("Modifier_Letter_Low_Tone_Bar" . #\u+02e8) ("Modifier_Letter_Extra-Low_Tone_Bar" . #\u+02e9) ("Modifier_Letter_Yin_Departing_Tone_Mark" . #\u+02ea) ("Modifier_Letter_Yang_Departing_Tone_Mark" . #\u+02eb) ("Modifier_Letter_Voicing" . #\u+02ec) ("Modifier_Letter_Unaspirated" . #\u+02ed) ("Modifier_Letter_Double_Apostrophe" . #\u+02ee) ("Modifier_Letter_Low_Down_Arrowhead" . #\u+02ef) ("Modifier_Letter_Low_Up_Arrowhead" . #\u+02f0) ("Modifier_Letter_Low_Left_Arrowhead" . #\u+02f1) ("Modifier_Letter_Low_Right_Arrowhead" . #\u+02f2) ("Modifier_Letter_Low_Ring" . #\u+02f3) ("Modifier_Letter_Middle_Grave_Accent" . #\u+02f4) ("Modifier_Letter_Middle_Double_Grave_Accent" . #\u+02f5) ("Modifier_Letter_Middle_Double_Acute_Accent" . #\u+02f6) ("Modifier_Letter_Low_Tilde" . #\u+02f7) ("Modifier_Letter_Raised_Colon" . #\u+02f8) ("Modifier_Letter_Begin_High_Tone" . #\u+02f9) ("Modifier_Letter_End_High_Tone" . #\u+02fa) ("Modifier_Letter_Begin_Low_Tone" . #\u+02fb) ("Modifier_Letter_End_Low_Tone" . #\u+02fc) ("Modifier_Letter_Shelf" . #\u+02fd) ("Modifier_Letter_Open_Shelf" . #\u+02fe) ("Modifier_Letter_Low_Left_Arrow" . #\u+02ff) ("Combining_Grave_Accent" . #\u+0300) ("Combining_Acute_Accent" . #\u+0301) ("Combining_Circumflex_Accent" . #\u+0302) ("Combining_Tilde" . #\u+0303) ("Combining_Macron" . #\u+0304) ("Combining_Overline" . #\u+0305) ("Combining_Breve" . #\u+0306) ("Combining_Dot_Above" . #\u+0307) ("Combining_Diaeresis" . #\u+0308) ("Combining_Hook_Above" . #\u+0309) ("Combining_Ring_Above" . #\u+030a) ("Combining_Double_Acute_Accent" . #\u+030b) ("Combining_Caron" . #\u+030c) ("Combining_Vertical_Line_Above" . #\u+030d) ("Combining_Double_Vertical_Line_Above" . #\u+030e) ("Combining_Double_Grave_Accent" . #\u+030f) ("Combining_Candrabindu" . #\u+0310) ("Combining_Inverted_Breve" . #\u+0311) ("Combining_Turned_Comma_Above" . #\u+0312) ("Combining_Comma_Above" . #\u+0313) ("Combining_Reversed_Comma_Above" . #\u+0314) ("Combining_Comma_Above_Right" . #\u+0315) ("Combining_Grave_Accent_Below" . #\u+0316) ("Combining_Acute_Accent_Below" . #\u+0317) ("Combining_Left_Tack_Below" . #\u+0318) ("Combining_Right_Tack_Below" . #\u+0319) ("Combining_Left_Angle_Above" . #\u+031a) ("Combining_Horn" . #\u+031b) ("Combining_Left_Half_Ring_Below" . #\u+031c) ("Combining_Up_Tack_Below" . #\u+031d) ("Combining_Down_Tack_Below" . #\u+031e) ("Combining_Plus_Sign_Below" . #\u+031f) ("Combining_Minus_Sign_Below" . #\u+0320) ("Combining_Palatalized_Hook_Below" . #\u+0321) ("Combining_Retroflex_Hook_Below" . #\u+0322) ("Combining_Dot_Below" . #\u+0323) ("Combining_Diaeresis_Below" . #\u+0324) ("Combining_Ring_Below" . #\u+0325) ("Combining_Comma_Below" . #\u+0326) ("Combining_Cedilla" . #\u+0327) ("Combining_Ogonek" . #\u+0328) ("Combining_Vertical_Line_Below" . #\u+0329) ("Combining_Bridge_Below" . #\u+032a) ("Combining_Inverted_Double_Arch_Below" . #\u+032b) ("Combining_Caron_Below" . #\u+032c) ("Combining_Circumflex_Accent_Below" . #\u+032d) ("Combining_Breve_Below" . #\u+032e) ("Combining_Inverted_Breve_Below" . #\u+032f) ("Combining_Tilde_Below" . #\u+0330) ("Combining_Macron_Below" . #\u+0331) ("Combining_Low_Line" . #\u+0332) ("Combining_Double_Low_Line" . #\u+0333) ("Combining_Tilde_Overlay" . #\u+0334) ("Combining_Short_Stroke_Overlay" . #\u+0335) ("Combining_Long_Stroke_Overlay" . #\u+0336) ("Combining_Short_Solidus_Overlay" . #\u+0337) ("Combining_Long_Solidus_Overlay" . #\u+0338) ("Combining_Right_Half_Ring_Below" . #\u+0339) ("Combining_Inverted_Bridge_Below" . #\u+033a) ("Combining_Square_Below" . #\u+033b) ("Combining_Seagull_Below" . #\u+033c) ("Combining_X_Above" . #\u+033d) ("Combining_Vertical_Tilde" . #\u+033e) ("Combining_Double_Overline" . #\u+033f) ("Combining_Grave_Tone_Mark" . #\u+0340) ("Combining_Acute_Tone_Mark" . #\u+0341) ("Combining_Greek_Perispomeni" . #\u+0342) ("Combining_Greek_Koronis" . #\u+0343) ("Combining_Greek_Dialytika_Tonos" . #\u+0344) ("Combining_Greek_Ypogegrammeni" . #\u+0345) ("Combining_Bridge_Above" . #\u+0346) ("Combining_Equals_Sign_Below" . #\u+0347) ("Combining_Double_Vertical_Line_Below" . #\u+0348) ("Combining_Left_Angle_Below" . #\u+0349) ("Combining_Not_Tilde_Above" . #\u+034a) ("Combining_Homothetic_Above" . #\u+034b) ("Combining_Almost_Equal_To_Above" . #\u+034c) ("Combining_Left_Right_Arrow_Below" . #\u+034d) ("Combining_Upwards_Arrow_Below" . #\u+034e) ("Combining_Grapheme_Joiner" . #\u+034f) ("Combining_Right_Arrowhead_Above" . #\u+0350) ("Combining_Left_Half_Ring_Above" . #\u+0351) ("Combining_Fermata" . #\u+0352) ("Combining_X_Below" . #\u+0353) ("Combining_Left_Arrowhead_Below" . #\u+0354) ("Combining_Right_Arrowhead_Below" . #\u+0355) ("Combining_Right_Arrowhead_And_Up_Arrowhead_Below" . #\u+0356) ("Combining_Right_Half_Ring_Above" . #\u+0357) ("Combining_Dot_Above_Right" . #\u+0358) ("Combining_Asterisk_Below" . #\u+0359) ("Combining_Double_Ring_Below" . #\u+035a) ("Combining_Zigzag_Above" . #\u+035b) ("Combining_Double_Breve_Below" . #\u+035c) ("Combining_Double_Breve" . #\u+035d) ("Combining_Double_Macron" . #\u+035e) ("Combining_Double_Macron_Below" . #\u+035f) ("Combining_Double_Tilde" . #\u+0360) ("Combining_Double_Inverted_Breve" . #\u+0361) ("Combining_Double_Rightwards_Arrow_Below" . #\u+0362) ("Combining_Latin_Small_Letter_A" . #\u+0363) ("Combining_Latin_Small_Letter_E" . #\u+0364) ("Combining_Latin_Small_Letter_I" . #\u+0365) ("Combining_Latin_Small_Letter_O" . #\u+0366) ("Combining_Latin_Small_Letter_U" . #\u+0367) ("Combining_Latin_Small_Letter_C" . #\u+0368) ("Combining_Latin_Small_Letter_D" . #\u+0369) ("Combining_Latin_Small_Letter_H" . #\u+036a) ("Combining_Latin_Small_Letter_M" . #\u+036b) ("Combining_Latin_Small_Letter_R" . #\u+036c) ("Combining_Latin_Small_Letter_T" . #\u+036d) ("Combining_Latin_Small_Letter_V" . #\u+036e) ("Combining_Latin_Small_Letter_X" . #\u+036f) ("Greek_Numeral_Sign" . #\u+0374) ("Greek_Lower_Numeral_Sign" . #\u+0375) ("Greek_Ypogegrammeni" . #\u+037a) ("Greek_Small_Reversed_Lunate_Sigma_Symbol" . #\u+037b) ("Greek_Small_Dotted_Lunate_Sigma_Symbol" . #\u+037c) ("Greek_Small_Reversed_Dotted_Lunate_Sigma_Symbol" . #\u+037d) ("Greek_Question_Mark" . #\u+037e) ("Greek_Tonos" . #\u+0384) ("Greek_Dialytika_Tonos" . #\u+0385) ("Greek_Capital_Letter_Alpha_With_Tonos" . #\u+0386) ("Greek_Ano_Teleia" . #\u+0387) ("Greek_Capital_Letter_Epsilon_With_Tonos" . #\u+0388) ("Greek_Capital_Letter_Eta_With_Tonos" . #\u+0389) ("Greek_Capital_Letter_Iota_With_Tonos" . #\u+038a) ("Greek_Capital_Letter_Omicron_With_Tonos" . #\u+038c) ("Greek_Capital_Letter_Upsilon_With_Tonos" . #\u+038e) ("Greek_Capital_Letter_Omega_With_Tonos" . #\u+038f) ("Greek_Small_Letter_Iota_With_Dialytika_And_Tonos" . #\u+0390) ("Greek_Capital_Letter_Alpha" . #\u+0391) ("Greek_Capital_Letter_Beta" . #\u+0392) ("Greek_Capital_Letter_Gamma" . #\u+0393) ("Greek_Capital_Letter_Delta" . #\u+0394) ("Greek_Capital_Letter_Epsilon" . #\u+0395) ("Greek_Capital_Letter_Zeta" . #\u+0396) ("Greek_Capital_Letter_Eta" . #\u+0397) ("Greek_Capital_Letter_Theta" . #\u+0398) ("Greek_Capital_Letter_Iota" . #\u+0399) ("Greek_Capital_Letter_Kappa" . #\u+039a) ("Greek_Capital_Letter_Lamda" . #\u+039b) ("Greek_Capital_Letter_Mu" . #\u+039c) ("Greek_Capital_Letter_Nu" . #\u+039d) ("Greek_Capital_Letter_Xi" . #\u+039e) ("Greek_Capital_Letter_Omicron" . #\u+039f) ("Greek_Capital_Letter_Pi" . #\u+03a0) ("Greek_Capital_Letter_Rho" . #\u+03a1) ("Greek_Capital_Letter_Sigma" . #\u+03a3) ("Greek_Capital_Letter_Tau" . #\u+03a4) ("Greek_Capital_Letter_Upsilon" . #\u+03a5) ("Greek_Capital_Letter_Phi" . #\u+03a6) ("Greek_Capital_Letter_Chi" . #\u+03a7) ("Greek_Capital_Letter_Psi" . #\u+03a8) ("Greek_Capital_Letter_Omega" . #\u+03a9) ("Greek_Capital_Letter_Iota_With_Dialytika" . #\u+03aa) ("Greek_Capital_Letter_Upsilon_With_Dialytika" . #\u+03ab) ("Greek_Small_Letter_Alpha_With_Tonos" . #\u+03ac) ("Greek_Small_Letter_Epsilon_With_Tonos" . #\u+03ad) ("Greek_Small_Letter_Eta_With_Tonos" . #\u+03ae) ("Greek_Small_Letter_Iota_With_Tonos" . #\u+03af) ("Greek_Small_Letter_Upsilon_With_Dialytika_And_Tonos" . #\u+03b0) ("Greek_Small_Letter_Alpha" . #\u+03b1) ("Greek_Small_Letter_Beta" . #\u+03b2) ("Greek_Small_Letter_Gamma" . #\u+03b3) ("Greek_Small_Letter_Delta" . #\u+03b4) ("Greek_Small_Letter_Epsilon" . #\u+03b5) ("Greek_Small_Letter_Zeta" . #\u+03b6) ("Greek_Small_Letter_Eta" . #\u+03b7) ("Greek_Small_Letter_Theta" . #\u+03b8) ("Greek_Small_Letter_Iota" . #\u+03b9) ("Greek_Small_Letter_Kappa" . #\u+03ba) ("Greek_Small_Letter_Lamda" . #\u+03bb) ("Greek_Small_Letter_Mu" . #\u+03bc) ("Greek_Small_Letter_Nu" . #\u+03bd) ("Greek_Small_Letter_Xi" . #\u+03be) ("Greek_Small_Letter_Omicron" . #\u+03bf) ("Greek_Small_Letter_Pi" . #\u+03c0) ("Greek_Small_Letter_Rho" . #\u+03c1) ("Greek_Small_Letter_Final_Sigma" . #\u+03c2) ("Greek_Small_Letter_Sigma" . #\u+03c3) ("Greek_Small_Letter_Tau" . #\u+03c4) ("Greek_Small_Letter_Upsilon" . #\u+03c5) ("Greek_Small_Letter_Phi" . #\u+03c6) ("Greek_Small_Letter_Chi" . #\u+03c7) ("Greek_Small_Letter_Psi" . #\u+03c8) ("Greek_Small_Letter_Omega" . #\u+03c9) ("Greek_Small_Letter_Iota_With_Dialytika" . #\u+03ca) ("Greek_Small_Letter_Upsilon_With_Dialytika" . #\u+03cb) ("Greek_Small_Letter_Omicron_With_Tonos" . #\u+03cc) ("Greek_Small_Letter_Upsilon_With_Tonos" . #\u+03cd) ("Greek_Small_Letter_Omega_With_Tonos" . #\u+03ce) ("Greek_Beta_Symbol" . #\u+03d0) ("Greek_Theta_Symbol" . #\u+03d1) ("Greek_Upsilon_With_Hook_Symbol" . #\u+03d2) ("Greek_Upsilon_With_Acute_And_Hook_Symbol" . #\u+03d3) ("Greek_Upsilon_With_Diaeresis_And_Hook_Symbol" . #\u+03d4) ("Greek_Phi_Symbol" . #\u+03d5) ("Greek_Pi_Symbol" . #\u+03d6) ("Greek_Kai_Symbol" . #\u+03d7) ("Greek_Letter_Archaic_Koppa" . #\u+03d8) ("Greek_Small_Letter_Archaic_Koppa" . #\u+03d9) ("Greek_Letter_Stigma" . #\u+03da) ("Greek_Small_Letter_Stigma" . #\u+03db) ("Greek_Letter_Digamma" . #\u+03dc) ("Greek_Small_Letter_Digamma" . #\u+03dd) ("Greek_Letter_Koppa" . #\u+03de) ("Greek_Small_Letter_Koppa" . #\u+03df) ("Greek_Letter_Sampi" . #\u+03e0) ("Greek_Small_Letter_Sampi" . #\u+03e1) ("Coptic_Capital_Letter_Shei" . #\u+03e2) ("Coptic_Small_Letter_Shei" . #\u+03e3) ("Coptic_Capital_Letter_Fei" . #\u+03e4) ("Coptic_Small_Letter_Fei" . #\u+03e5) ("Coptic_Capital_Letter_Khei" . #\u+03e6) ("Coptic_Small_Letter_Khei" . #\u+03e7) ("Coptic_Capital_Letter_Hori" . #\u+03e8) ("Coptic_Small_Letter_Hori" . #\u+03e9) ("Coptic_Capital_Letter_Gangia" . #\u+03ea) ("Coptic_Small_Letter_Gangia" . #\u+03eb) ("Coptic_Capital_Letter_Shima" . #\u+03ec) ("Coptic_Small_Letter_Shima" . #\u+03ed) ("Coptic_Capital_Letter_Dei" . #\u+03ee) ("Coptic_Small_Letter_Dei" . #\u+03ef) ("Greek_Kappa_Symbol" . #\u+03f0) ("Greek_Rho_Symbol" . #\u+03f1) ("Greek_Lunate_Sigma_Symbol" . #\u+03f2) ("Greek_Letter_Yot" . #\u+03f3) ("Greek_Capital_Theta_Symbol" . #\u+03f4) ("Greek_Lunate_Epsilon_Symbol" . #\u+03f5) ("Greek_Reversed_Lunate_Epsilon_Symbol" . #\u+03f6) ("Greek_Capital_Letter_Sho" . #\u+03f7) ("Greek_Small_Letter_Sho" . #\u+03f8) ("Greek_Capital_Lunate_Sigma_Symbol" . #\u+03f9) ("Greek_Capital_Letter_San" . #\u+03fa) ("Greek_Small_Letter_San" . #\u+03fb) ("Greek_Rho_With_Stroke_Symbol" . #\u+03fc) ("Greek_Capital_Reversed_Lunate_Sigma_Symbol" . #\u+03fd) ("Greek_Capital_Dotted_Lunate_Sigma_Symbol" . #\u+03fe) ("Greek_Capital_Reversed_Dotted_Lunate_Sigma_Symbol" . #\u+03ff) ("Cyrillic_Capital_Letter_Ie_With_Grave" . #\u+0400) ("Cyrillic_Capital_Letter_Io" . #\u+0401) ("Cyrillic_Capital_Letter_Dje" . #\u+0402) ("Cyrillic_Capital_Letter_Gje" . #\u+0403) ("Cyrillic_Capital_Letter_Ukrainian_Ie" . #\u+0404) ("Cyrillic_Capital_Letter_Dze" . #\u+0405) ("Cyrillic_Capital_Letter_Byelorussian-Ukrainian_I" . #\u+0406) ("Cyrillic_Capital_Letter_Yi" . #\u+0407) ("Cyrillic_Capital_Letter_Je" . #\u+0408) ("Cyrillic_Capital_Letter_Lje" . #\u+0409) ("Cyrillic_Capital_Letter_Nje" . #\u+040a) ("Cyrillic_Capital_Letter_Tshe" . #\u+040b) ("Cyrillic_Capital_Letter_Kje" . #\u+040c) ("Cyrillic_Capital_Letter_I_With_Grave" . #\u+040d) ("Cyrillic_Capital_Letter_Short_U" . #\u+040e) ("Cyrillic_Capital_Letter_Dzhe" . #\u+040f) ("Cyrillic_Capital_Letter_A" . #\u+0410) ("Cyrillic_Capital_Letter_Be" . #\u+0411) ("Cyrillic_Capital_Letter_Ve" . #\u+0412) ("Cyrillic_Capital_Letter_Ghe" . #\u+0413) ("Cyrillic_Capital_Letter_De" . #\u+0414) ("Cyrillic_Capital_Letter_Ie" . #\u+0415) ("Cyrillic_Capital_Letter_Zhe" . #\u+0416) ("Cyrillic_Capital_Letter_Ze" . #\u+0417) ("Cyrillic_Capital_Letter_I" . #\u+0418) ("Cyrillic_Capital_Letter_Short_I" . #\u+0419) ("Cyrillic_Capital_Letter_Ka" . #\u+041a) ("Cyrillic_Capital_Letter_El" . #\u+041b) ("Cyrillic_Capital_Letter_Em" . #\u+041c) ("Cyrillic_Capital_Letter_En" . #\u+041d) ("Cyrillic_Capital_Letter_O" . #\u+041e) ("Cyrillic_Capital_Letter_Pe" . #\u+041f) ("Cyrillic_Capital_Letter_Er" . #\u+0420) ("Cyrillic_Capital_Letter_Es" . #\u+0421) ("Cyrillic_Capital_Letter_Te" . #\u+0422) ("Cyrillic_Capital_Letter_U" . #\u+0423) ("Cyrillic_Capital_Letter_Ef" . #\u+0424) ("Cyrillic_Capital_Letter_Ha" . #\u+0425) ("Cyrillic_Capital_Letter_Tse" . #\u+0426) ("Cyrillic_Capital_Letter_Che" . #\u+0427) ("Cyrillic_Capital_Letter_Sha" . #\u+0428) ("Cyrillic_Capital_Letter_Shcha" . #\u+0429) ("Cyrillic_Capital_Letter_Hard_Sign" . #\u+042a) ("Cyrillic_Capital_Letter_Yeru" . #\u+042b) ("Cyrillic_Capital_Letter_Soft_Sign" . #\u+042c) ("Cyrillic_Capital_Letter_E" . #\u+042d) ("Cyrillic_Capital_Letter_Yu" . #\u+042e) ("Cyrillic_Capital_Letter_Ya" . #\u+042f) ("Cyrillic_Small_Letter_A" . #\u+0430) ("Cyrillic_Small_Letter_Be" . #\u+0431) ("Cyrillic_Small_Letter_Ve" . #\u+0432) ("Cyrillic_Small_Letter_Ghe" . #\u+0433) ("Cyrillic_Small_Letter_De" . #\u+0434) ("Cyrillic_Small_Letter_Ie" . #\u+0435) ("Cyrillic_Small_Letter_Zhe" . #\u+0436) ("Cyrillic_Small_Letter_Ze" . #\u+0437) ("Cyrillic_Small_Letter_I" . #\u+0438) ("Cyrillic_Small_Letter_Short_I" . #\u+0439) ("Cyrillic_Small_Letter_Ka" . #\u+043a) ("Cyrillic_Small_Letter_El" . #\u+043b) ("Cyrillic_Small_Letter_Em" . #\u+043c) ("Cyrillic_Small_Letter_En" . #\u+043d) ("Cyrillic_Small_Letter_O" . #\u+043e) ("Cyrillic_Small_Letter_Pe" . #\u+043f) ("Cyrillic_Small_Letter_Er" . #\u+0440) ("Cyrillic_Small_Letter_Es" . #\u+0441) ("Cyrillic_Small_Letter_Te" . #\u+0442) ("Cyrillic_Small_Letter_U" . #\u+0443) ("Cyrillic_Small_Letter_Ef" . #\u+0444) ("Cyrillic_Small_Letter_Ha" . #\u+0445) ("Cyrillic_Small_Letter_Tse" . #\u+0446) ("Cyrillic_Small_Letter_Che" . #\u+0447) ("Cyrillic_Small_Letter_Sha" . #\u+0448) ("Cyrillic_Small_Letter_Shcha" . #\u+0449) ("Cyrillic_Small_Letter_Hard_Sign" . #\u+044a) ("Cyrillic_Small_Letter_Yeru" . #\u+044b) ("Cyrillic_Small_Letter_Soft_Sign" . #\u+044c) ("Cyrillic_Small_Letter_E" . #\u+044d) ("Cyrillic_Small_Letter_Yu" . #\u+044e) ("Cyrillic_Small_Letter_Ya" . #\u+044f) ("Cyrillic_Small_Letter_Ie_With_Grave" . #\u+0450) ("Cyrillic_Small_Letter_Io" . #\u+0451) ("Cyrillic_Small_Letter_Dje" . #\u+0452) ("Cyrillic_Small_Letter_Gje" . #\u+0453) ("Cyrillic_Small_Letter_Ukrainian_Ie" . #\u+0454) ("Cyrillic_Small_Letter_Dze" . #\u+0455) ("Cyrillic_Small_Letter_Byelorussian-Ukrainian_I" . #\u+0456) ("Cyrillic_Small_Letter_Yi" . #\u+0457) ("Cyrillic_Small_Letter_Je" . #\u+0458) ("Cyrillic_Small_Letter_Lje" . #\u+0459) ("Cyrillic_Small_Letter_Nje" . #\u+045a) ("Cyrillic_Small_Letter_Tshe" . #\u+045b) ("Cyrillic_Small_Letter_Kje" . #\u+045c) ("Cyrillic_Small_Letter_I_With_Grave" . #\u+045d) ("Cyrillic_Small_Letter_Short_U" . #\u+045e) ("Cyrillic_Small_Letter_Dzhe" . #\u+045f) ("Cyrillic_Capital_Letter_Omega" . #\u+0460) ("Cyrillic_Small_Letter_Omega" . #\u+0461) ("Cyrillic_Capital_Letter_Yat" . #\u+0462) ("Cyrillic_Small_Letter_Yat" . #\u+0463) ("Cyrillic_Capital_Letter_Iotified_E" . #\u+0464) ("Cyrillic_Small_Letter_Iotified_E" . #\u+0465) ("Cyrillic_Capital_Letter_Little_Yus" . #\u+0466) ("Cyrillic_Small_Letter_Little_Yus" . #\u+0467) ("Cyrillic_Capital_Letter_Iotified_Little_Yus" . #\u+0468) ("Cyrillic_Small_Letter_Iotified_Little_Yus" . #\u+0469) ("Cyrillic_Capital_Letter_Big_Yus" . #\u+046a) ("Cyrillic_Small_Letter_Big_Yus" . #\u+046b) ("Cyrillic_Capital_Letter_Iotified_Big_Yus" . #\u+046c) ("Cyrillic_Small_Letter_Iotified_Big_Yus" . #\u+046d) ("Cyrillic_Capital_Letter_Ksi" . #\u+046e) ("Cyrillic_Small_Letter_Ksi" . #\u+046f) ("Cyrillic_Capital_Letter_Psi" . #\u+0470) ("Cyrillic_Small_Letter_Psi" . #\u+0471) ("Cyrillic_Capital_Letter_Fita" . #\u+0472) ("Cyrillic_Small_Letter_Fita" . #\u+0473) ("Cyrillic_Capital_Letter_Izhitsa" . #\u+0474) ("Cyrillic_Small_Letter_Izhitsa" . #\u+0475) ("Cyrillic_Capital_Letter_Izhitsa_With_Double_Grave_Accent" . #\u+0476) ("Cyrillic_Small_Letter_Izhitsa_With_Double_Grave_Accent" . #\u+0477) ("Cyrillic_Capital_Letter_Uk" . #\u+0478) ("Cyrillic_Small_Letter_Uk" . #\u+0479) ("Cyrillic_Capital_Letter_Round_Omega" . #\u+047a) ("Cyrillic_Small_Letter_Round_Omega" . #\u+047b) ("Cyrillic_Capital_Letter_Omega_With_Titlo" . #\u+047c) ("Cyrillic_Small_Letter_Omega_With_Titlo" . #\u+047d) ("Cyrillic_Capital_Letter_Ot" . #\u+047e) ("Cyrillic_Small_Letter_Ot" . #\u+047f) ("Cyrillic_Capital_Letter_Koppa" . #\u+0480) ("Cyrillic_Small_Letter_Koppa" . #\u+0481) ("Cyrillic_Thousands_Sign" . #\u+0482) ("Combining_Cyrillic_Titlo" . #\u+0483) ("Combining_Cyrillic_Palatalization" . #\u+0484) ("Combining_Cyrillic_Dasia_Pneumata" . #\u+0485) ("Combining_Cyrillic_Psili_Pneumata" . #\u+0486) ("Combining_Cyrillic_Hundred_Thousands_Sign" . #\u+0488) ("Combining_Cyrillic_Millions_Sign" . #\u+0489) ("Cyrillic_Capital_Letter_Short_I_With_Tail" . #\u+048a) ("Cyrillic_Small_Letter_Short_I_With_Tail" . #\u+048b) ("Cyrillic_Capital_Letter_Semisoft_Sign" . #\u+048c) ("Cyrillic_Small_Letter_Semisoft_Sign" . #\u+048d) ("Cyrillic_Capital_Letter_Er_With_Tick" . #\u+048e) ("Cyrillic_Small_Letter_Er_With_Tick" . #\u+048f) ("Cyrillic_Capital_Letter_Ghe_With_Upturn" . #\u+0490) ("Cyrillic_Small_Letter_Ghe_With_Upturn" . #\u+0491) ("Cyrillic_Capital_Letter_Ghe_With_Stroke" . #\u+0492) ("Cyrillic_Small_Letter_Ghe_With_Stroke" . #\u+0493) ("Cyrillic_Capital_Letter_Ghe_With_Middle_Hook" . #\u+0494) ("Cyrillic_Small_Letter_Ghe_With_Middle_Hook" . #\u+0495) ("Cyrillic_Capital_Letter_Zhe_With_Descender" . #\u+0496) ("Cyrillic_Small_Letter_Zhe_With_Descender" . #\u+0497) ("Cyrillic_Capital_Letter_Ze_With_Descender" . #\u+0498) ("Cyrillic_Small_Letter_Ze_With_Descender" . #\u+0499) ("Cyrillic_Capital_Letter_Ka_With_Descender" . #\u+049a) ("Cyrillic_Small_Letter_Ka_With_Descender" . #\u+049b) ("Cyrillic_Capital_Letter_Ka_With_Vertical_Stroke" . #\u+049c) ("Cyrillic_Small_Letter_Ka_With_Vertical_Stroke" . #\u+049d) ("Cyrillic_Capital_Letter_Ka_With_Stroke" . #\u+049e) ("Cyrillic_Small_Letter_Ka_With_Stroke" . #\u+049f) ("Cyrillic_Capital_Letter_Bashkir_Ka" . #\u+04a0) ("Cyrillic_Small_Letter_Bashkir_Ka" . #\u+04a1) ("Cyrillic_Capital_Letter_En_With_Descender" . #\u+04a2) ("Cyrillic_Small_Letter_En_With_Descender" . #\u+04a3) ("Cyrillic_Capital_Ligature_En_Ghe" . #\u+04a4) ("Cyrillic_Small_Ligature_En_Ghe" . #\u+04a5) ("Cyrillic_Capital_Letter_Pe_With_Middle_Hook" . #\u+04a6) ("Cyrillic_Small_Letter_Pe_With_Middle_Hook" . #\u+04a7) ("Cyrillic_Capital_Letter_Abkhasian_Ha" . #\u+04a8) ("Cyrillic_Small_Letter_Abkhasian_Ha" . #\u+04a9) ("Cyrillic_Capital_Letter_Es_With_Descender" . #\u+04aa) ("Cyrillic_Small_Letter_Es_With_Descender" . #\u+04ab) ("Cyrillic_Capital_Letter_Te_With_Descender" . #\u+04ac) ("Cyrillic_Small_Letter_Te_With_Descender" . #\u+04ad) ("Cyrillic_Capital_Letter_Straight_U" . #\u+04ae) ("Cyrillic_Small_Letter_Straight_U" . #\u+04af) ("Cyrillic_Capital_Letter_Straight_U_With_Stroke" . #\u+04b0) ("Cyrillic_Small_Letter_Straight_U_With_Stroke" . #\u+04b1) ("Cyrillic_Capital_Letter_Ha_With_Descender" . #\u+04b2) ("Cyrillic_Small_Letter_Ha_With_Descender" . #\u+04b3) ("Cyrillic_Capital_Ligature_Te_Tse" . #\u+04b4) ("Cyrillic_Small_Ligature_Te_Tse" . #\u+04b5) ("Cyrillic_Capital_Letter_Che_With_Descender" . #\u+04b6) ("Cyrillic_Small_Letter_Che_With_Descender" . #\u+04b7) ("Cyrillic_Capital_Letter_Che_With_Vertical_Stroke" . #\u+04b8) ("Cyrillic_Small_Letter_Che_With_Vertical_Stroke" . #\u+04b9) ("Cyrillic_Capital_Letter_Shha" . #\u+04ba) ("Cyrillic_Small_Letter_Shha" . #\u+04bb) ("Cyrillic_Capital_Letter_Abkhasian_Che" . #\u+04bc) ("Cyrillic_Small_Letter_Abkhasian_Che" . #\u+04bd) ("Cyrillic_Capital_Letter_Abkhasian_Che_With_Descender" . #\u+04be) ("Cyrillic_Small_Letter_Abkhasian_Che_With_Descender" . #\u+04bf) ("Cyrillic_Letter_Palochka" . #\u+04c0) ("Cyrillic_Capital_Letter_Zhe_With_Breve" . #\u+04c1) ("Cyrillic_Small_Letter_Zhe_With_Breve" . #\u+04c2) ("Cyrillic_Capital_Letter_Ka_With_Hook" . #\u+04c3) ("Cyrillic_Small_Letter_Ka_With_Hook" . #\u+04c4) ("Cyrillic_Capital_Letter_El_With_Tail" . #\u+04c5) ("Cyrillic_Small_Letter_El_With_Tail" . #\u+04c6) ("Cyrillic_Capital_Letter_En_With_Hook" . #\u+04c7) ("Cyrillic_Small_Letter_En_With_Hook" . #\u+04c8) ("Cyrillic_Capital_Letter_En_With_Tail" . #\u+04c9) ("Cyrillic_Small_Letter_En_With_Tail" . #\u+04ca) ("Cyrillic_Capital_Letter_Khakassian_Che" . #\u+04cb) ("Cyrillic_Small_Letter_Khakassian_Che" . #\u+04cc) ("Cyrillic_Capital_Letter_Em_With_Tail" . #\u+04cd) ("Cyrillic_Small_Letter_Em_With_Tail" . #\u+04ce) ("Cyrillic_Small_Letter_Palochka" . #\u+04cf) ("Cyrillic_Capital_Letter_A_With_Breve" . #\u+04d0) ("Cyrillic_Small_Letter_A_With_Breve" . #\u+04d1) ("Cyrillic_Capital_Letter_A_With_Diaeresis" . #\u+04d2) ("Cyrillic_Small_Letter_A_With_Diaeresis" . #\u+04d3) ("Cyrillic_Capital_Ligature_A_Ie" . #\u+04d4) ("Cyrillic_Small_Ligature_A_Ie" . #\u+04d5) ("Cyrillic_Capital_Letter_Ie_With_Breve" . #\u+04d6) ("Cyrillic_Small_Letter_Ie_With_Breve" . #\u+04d7) ("Cyrillic_Capital_Letter_Schwa" . #\u+04d8) ("Cyrillic_Small_Letter_Schwa" . #\u+04d9) ("Cyrillic_Capital_Letter_Schwa_With_Diaeresis" . #\u+04da) ("Cyrillic_Small_Letter_Schwa_With_Diaeresis" . #\u+04db) ("Cyrillic_Capital_Letter_Zhe_With_Diaeresis" . #\u+04dc) ("Cyrillic_Small_Letter_Zhe_With_Diaeresis" . #\u+04dd) ("Cyrillic_Capital_Letter_Ze_With_Diaeresis" . #\u+04de) ("Cyrillic_Small_Letter_Ze_With_Diaeresis" . #\u+04df) ("Cyrillic_Capital_Letter_Abkhasian_Dze" . #\u+04e0) ("Cyrillic_Small_Letter_Abkhasian_Dze" . #\u+04e1) ("Cyrillic_Capital_Letter_I_With_Macron" . #\u+04e2) ("Cyrillic_Small_Letter_I_With_Macron" . #\u+04e3) ("Cyrillic_Capital_Letter_I_With_Diaeresis" . #\u+04e4) ("Cyrillic_Small_Letter_I_With_Diaeresis" . #\u+04e5) ("Cyrillic_Capital_Letter_O_With_Diaeresis" . #\u+04e6) ("Cyrillic_Small_Letter_O_With_Diaeresis" . #\u+04e7) ("Cyrillic_Capital_Letter_Barred_O" . #\u+04e8) ("Cyrillic_Small_Letter_Barred_O" . #\u+04e9) ("Cyrillic_Capital_Letter_Barred_O_With_Diaeresis" . #\u+04ea) ("Cyrillic_Small_Letter_Barred_O_With_Diaeresis" . #\u+04eb) ("Cyrillic_Capital_Letter_E_With_Diaeresis" . #\u+04ec) ("Cyrillic_Small_Letter_E_With_Diaeresis" . #\u+04ed) ("Cyrillic_Capital_Letter_U_With_Macron" . #\u+04ee) ("Cyrillic_Small_Letter_U_With_Macron" . #\u+04ef) ("Cyrillic_Capital_Letter_U_With_Diaeresis" . #\u+04f0) ("Cyrillic_Small_Letter_U_With_Diaeresis" . #\u+04f1) ("Cyrillic_Capital_Letter_U_With_Double_Acute" . #\u+04f2) ("Cyrillic_Small_Letter_U_With_Double_Acute" . #\u+04f3) ("Cyrillic_Capital_Letter_Che_With_Diaeresis" . #\u+04f4) ("Cyrillic_Small_Letter_Che_With_Diaeresis" . #\u+04f5) ("Cyrillic_Capital_Letter_Ghe_With_Descender" . #\u+04f6) ("Cyrillic_Small_Letter_Ghe_With_Descender" . #\u+04f7) ("Cyrillic_Capital_Letter_Yeru_With_Diaeresis" . #\u+04f8) ("Cyrillic_Small_Letter_Yeru_With_Diaeresis" . #\u+04f9) ("Cyrillic_Capital_Letter_Ghe_With_Stroke_And_Hook" . #\u+04fa) ("Cyrillic_Small_Letter_Ghe_With_Stroke_And_Hook" . #\u+04fb) ("Cyrillic_Capital_Letter_Ha_With_Hook" . #\u+04fc) ("Cyrillic_Small_Letter_Ha_With_Hook" . #\u+04fd) ("Cyrillic_Capital_Letter_Ha_With_Stroke" . #\u+04fe) ("Cyrillic_Small_Letter_Ha_With_Stroke" . #\u+04ff) ("Cyrillic_Capital_Letter_Komi_De" . #\u+0500) ("Cyrillic_Small_Letter_Komi_De" . #\u+0501) ("Cyrillic_Capital_Letter_Komi_Dje" . #\u+0502) ("Cyrillic_Small_Letter_Komi_Dje" . #\u+0503) ("Cyrillic_Capital_Letter_Komi_Zje" . #\u+0504) ("Cyrillic_Small_Letter_Komi_Zje" . #\u+0505) ("Cyrillic_Capital_Letter_Komi_Dzje" . #\u+0506) ("Cyrillic_Small_Letter_Komi_Dzje" . #\u+0507) ("Cyrillic_Capital_Letter_Komi_Lje" . #\u+0508) ("Cyrillic_Small_Letter_Komi_Lje" . #\u+0509) ("Cyrillic_Capital_Letter_Komi_Nje" . #\u+050a) ("Cyrillic_Small_Letter_Komi_Nje" . #\u+050b) ("Cyrillic_Capital_Letter_Komi_Sje" . #\u+050c) ("Cyrillic_Small_Letter_Komi_Sje" . #\u+050d) ("Cyrillic_Capital_Letter_Komi_Tje" . #\u+050e) ("Cyrillic_Small_Letter_Komi_Tje" . #\u+050f) ("Cyrillic_Capital_Letter_Reversed_Ze" . #\u+0510) ("Cyrillic_Small_Letter_Reversed_Ze" . #\u+0511) ("Cyrillic_Capital_Letter_El_With_Hook" . #\u+0512) ("Cyrillic_Small_Letter_El_With_Hook" . #\u+0513) ("Armenian_Capital_Letter_Ayb" . #\u+0531) ("Armenian_Capital_Letter_Ben" . #\u+0532) ("Armenian_Capital_Letter_Gim" . #\u+0533) ("Armenian_Capital_Letter_Da" . #\u+0534) ("Armenian_Capital_Letter_Ech" . #\u+0535) ("Armenian_Capital_Letter_Za" . #\u+0536) ("Armenian_Capital_Letter_Eh" . #\u+0537) ("Armenian_Capital_Letter_Et" . #\u+0538) ("Armenian_Capital_Letter_To" . #\u+0539) ("Armenian_Capital_Letter_Zhe" . #\u+053a) ("Armenian_Capital_Letter_Ini" . #\u+053b) ("Armenian_Capital_Letter_Liwn" . #\u+053c) ("Armenian_Capital_Letter_Xeh" . #\u+053d) ("Armenian_Capital_Letter_Ca" . #\u+053e) ("Armenian_Capital_Letter_Ken" . #\u+053f) ("Armenian_Capital_Letter_Ho" . #\u+0540) ("Armenian_Capital_Letter_Ja" . #\u+0541) ("Armenian_Capital_Letter_Ghad" . #\u+0542) ("Armenian_Capital_Letter_Cheh" . #\u+0543) ("Armenian_Capital_Letter_Men" . #\u+0544) ("Armenian_Capital_Letter_Yi" . #\u+0545) ("Armenian_Capital_Letter_Now" . #\u+0546) ("Armenian_Capital_Letter_Sha" . #\u+0547) ("Armenian_Capital_Letter_Vo" . #\u+0548) ("Armenian_Capital_Letter_Cha" . #\u+0549) ("Armenian_Capital_Letter_Peh" . #\u+054a) ("Armenian_Capital_Letter_Jheh" . #\u+054b) ("Armenian_Capital_Letter_Ra" . #\u+054c) ("Armenian_Capital_Letter_Seh" . #\u+054d) ("Armenian_Capital_Letter_Vew" . #\u+054e) ("Armenian_Capital_Letter_Tiwn" . #\u+054f) ("Armenian_Capital_Letter_Reh" . #\u+0550) ("Armenian_Capital_Letter_Co" . #\u+0551) ("Armenian_Capital_Letter_Yiwn" . #\u+0552) ("Armenian_Capital_Letter_Piwr" . #\u+0553) ("Armenian_Capital_Letter_Keh" . #\u+0554) ("Armenian_Capital_Letter_Oh" . #\u+0555) ("Armenian_Capital_Letter_Feh" . #\u+0556) ("Armenian_Modifier_Letter_Left_Half_Ring" . #\u+0559) ("Armenian_Apostrophe" . #\u+055a) ("Armenian_Emphasis_Mark" . #\u+055b) ("Armenian_Exclamation_Mark" . #\u+055c) ("Armenian_Comma" . #\u+055d) ("Armenian_Question_Mark" . #\u+055e) ("Armenian_Abbreviation_Mark" . #\u+055f) ("Armenian_Small_Letter_Ayb" . #\u+0561) ("Armenian_Small_Letter_Ben" . #\u+0562) ("Armenian_Small_Letter_Gim" . #\u+0563) ("Armenian_Small_Letter_Da" . #\u+0564) ("Armenian_Small_Letter_Ech" . #\u+0565) ("Armenian_Small_Letter_Za" . #\u+0566) ("Armenian_Small_Letter_Eh" . #\u+0567) ("Armenian_Small_Letter_Et" . #\u+0568) ("Armenian_Small_Letter_To" . #\u+0569) ("Armenian_Small_Letter_Zhe" . #\u+056a) ("Armenian_Small_Letter_Ini" . #\u+056b) ("Armenian_Small_Letter_Liwn" . #\u+056c) ("Armenian_Small_Letter_Xeh" . #\u+056d) ("Armenian_Small_Letter_Ca" . #\u+056e) ("Armenian_Small_Letter_Ken" . #\u+056f) ("Armenian_Small_Letter_Ho" . #\u+0570) ("Armenian_Small_Letter_Ja" . #\u+0571) ("Armenian_Small_Letter_Ghad" . #\u+0572) ("Armenian_Small_Letter_Cheh" . #\u+0573) ("Armenian_Small_Letter_Men" . #\u+0574) ("Armenian_Small_Letter_Yi" . #\u+0575) ("Armenian_Small_Letter_Now" . #\u+0576) ("Armenian_Small_Letter_Sha" . #\u+0577) ("Armenian_Small_Letter_Vo" . #\u+0578) ("Armenian_Small_Letter_Cha" . #\u+0579) ("Armenian_Small_Letter_Peh" . #\u+057a) ("Armenian_Small_Letter_Jheh" . #\u+057b) ("Armenian_Small_Letter_Ra" . #\u+057c) ("Armenian_Small_Letter_Seh" . #\u+057d) ("Armenian_Small_Letter_Vew" . #\u+057e) ("Armenian_Small_Letter_Tiwn" . #\u+057f) ("Armenian_Small_Letter_Reh" . #\u+0580) ("Armenian_Small_Letter_Co" . #\u+0581) ("Armenian_Small_Letter_Yiwn" . #\u+0582) ("Armenian_Small_Letter_Piwr" . #\u+0583) ("Armenian_Small_Letter_Keh" . #\u+0584) ("Armenian_Small_Letter_Oh" . #\u+0585) ("Armenian_Small_Letter_Feh" . #\u+0586) ("Armenian_Small_Ligature_Ech_Yiwn" . #\u+0587) ("Armenian_Full_Stop" . #\u+0589) ("Armenian_Hyphen" . #\u+058a) ("Hebrew_Accent_Etnahta" . #\u+0591) ("Hebrew_Accent_Segol" . #\u+0592) ("Hebrew_Accent_Shalshelet" . #\u+0593) ("Hebrew_Accent_Zaqef_Qatan" . #\u+0594) ("Hebrew_Accent_Zaqef_Gadol" . #\u+0595) ("Hebrew_Accent_Tipeha" . #\u+0596) ("Hebrew_Accent_Revia" . #\u+0597) ("Hebrew_Accent_Zarqa" . #\u+0598) ("Hebrew_Accent_Pashta" . #\u+0599) ("Hebrew_Accent_Yetiv" . #\u+059a) ("Hebrew_Accent_Tevir" . #\u+059b) ("Hebrew_Accent_Geresh" . #\u+059c) ("Hebrew_Accent_Geresh_Muqdam" . #\u+059d) ("Hebrew_Accent_Gershayim" . #\u+059e) ("Hebrew_Accent_Qarney_Para" . #\u+059f) ("Hebrew_Accent_Telisha_Gedola" . #\u+05a0) ("Hebrew_Accent_Pazer" . #\u+05a1) ("Hebrew_Accent_Atnah_Hafukh" . #\u+05a2) ("Hebrew_Accent_Munah" . #\u+05a3) ("Hebrew_Accent_Mahapakh" . #\u+05a4) ("Hebrew_Accent_Merkha" . #\u+05a5) ("Hebrew_Accent_Merkha_Kefula" . #\u+05a6) ("Hebrew_Accent_Darga" . #\u+05a7) ("Hebrew_Accent_Qadma" . #\u+05a8) ("Hebrew_Accent_Telisha_Qetana" . #\u+05a9) ("Hebrew_Accent_Yerah_Ben_Yomo" . #\u+05aa) ("Hebrew_Accent_Ole" . #\u+05ab) ("Hebrew_Accent_Iluy" . #\u+05ac) ("Hebrew_Accent_Dehi" . #\u+05ad) ("Hebrew_Accent_Zinor" . #\u+05ae) ("Hebrew_Mark_Masora_Circle" . #\u+05af) ("Hebrew_Point_Sheva" . #\u+05b0) ("Hebrew_Point_Hataf_Segol" . #\u+05b1) ("Hebrew_Point_Hataf_Patah" . #\u+05b2) ("Hebrew_Point_Hataf_Qamats" . #\u+05b3) ("Hebrew_Point_Hiriq" . #\u+05b4) ("Hebrew_Point_Tsere" . #\u+05b5) ("Hebrew_Point_Segol" . #\u+05b6) ("Hebrew_Point_Patah" . #\u+05b7) ("Hebrew_Point_Qamats" . #\u+05b8) ("Hebrew_Point_Holam" . #\u+05b9) ("Hebrew_Point_Holam_Haser_For_Vav" . #\u+05ba) ("Hebrew_Point_Qubuts" . #\u+05bb) ("Hebrew_Point_Dagesh_Or_Mapiq" . #\u+05bc) ("Hebrew_Point_Meteg" . #\u+05bd) ("Hebrew_Punctuation_Maqaf" . #\u+05be) ("Hebrew_Point_Rafe" . #\u+05bf) ("Hebrew_Punctuation_Paseq" . #\u+05c0) ("Hebrew_Point_Shin_Dot" . #\u+05c1) ("Hebrew_Point_Sin_Dot" . #\u+05c2) ("Hebrew_Punctuation_Sof_Pasuq" . #\u+05c3) ("Hebrew_Mark_Upper_Dot" . #\u+05c4) ("Hebrew_Mark_Lower_Dot" . #\u+05c5) ("Hebrew_Punctuation_Nun_Hafukha" . #\u+05c6) ("Hebrew_Point_Qamats_Qatan" . #\u+05c7) ("Hebrew_Letter_Alef" . #\u+05d0) ("Hebrew_Letter_Bet" . #\u+05d1) ("Hebrew_Letter_Gimel" . #\u+05d2) ("Hebrew_Letter_Dalet" . #\u+05d3) ("Hebrew_Letter_He" . #\u+05d4) ("Hebrew_Letter_Vav" . #\u+05d5) ("Hebrew_Letter_Zayin" . #\u+05d6) ("Hebrew_Letter_Het" . #\u+05d7) ("Hebrew_Letter_Tet" . #\u+05d8) ("Hebrew_Letter_Yod" . #\u+05d9) ("Hebrew_Letter_Final_Kaf" . #\u+05da) ("Hebrew_Letter_Kaf" . #\u+05db) ("Hebrew_Letter_Lamed" . #\u+05dc) ("Hebrew_Letter_Final_Mem" . #\u+05dd) ("Hebrew_Letter_Mem" . #\u+05de) ("Hebrew_Letter_Final_Nun" . #\u+05df) ("Hebrew_Letter_Nun" . #\u+05e0) ("Hebrew_Letter_Samekh" . #\u+05e1) ("Hebrew_Letter_Ayin" . #\u+05e2) ("Hebrew_Letter_Final_Pe" . #\u+05e3) ("Hebrew_Letter_Pe" . #\u+05e4) ("Hebrew_Letter_Final_Tsadi" . #\u+05e5) ("Hebrew_Letter_Tsadi" . #\u+05e6) ("Hebrew_Letter_Qof" . #\u+05e7) ("Hebrew_Letter_Resh" . #\u+05e8) ("Hebrew_Letter_Shin" . #\u+05e9) ("Hebrew_Letter_Tav" . #\u+05ea) ("Hebrew_Ligature_Yiddish_Double_Vav" . #\u+05f0) ("Hebrew_Ligature_Yiddish_Vav_Yod" . #\u+05f1) ("Hebrew_Ligature_Yiddish_Double_Yod" . #\u+05f2) ("Hebrew_Punctuation_Geresh" . #\u+05f3) ("Hebrew_Punctuation_Gershayim" . #\u+05f4) ("Arabic_Number_Sign" . #\u+0600) ("Arabic_Sign_Sanah" . #\u+0601) ("Arabic_Footnote_Marker" . #\u+0602) ("Arabic_Sign_Safha" . #\u+0603) ("Afghani_Sign" . #\u+060b) ("Arabic_Comma" . #\u+060c) ("Arabic_Date_Separator" . #\u+060d) ("Arabic_Poetic_Verse_Sign" . #\u+060e) ("Arabic_Sign_Misra" . #\u+060f) ("Arabic_Sign_Sallallahou_Alayhe_Wassallam" . #\u+0610) ("Arabic_Sign_Alayhe_Assallam" . #\u+0611) ("Arabic_Sign_Rahmatullah_Alayhe" . #\u+0612) ("Arabic_Sign_Radi_Allahou_Anhu" . #\u+0613) ("Arabic_Sign_Takhallus" . #\u+0614) ("Arabic_Small_High_Tah" . #\u+0615) ("Arabic_Semicolon" . #\u+061b) ("Arabic_Triple_Dot_Punctuation_Mark" . #\u+061e) ("Arabic_Question_Mark" . #\u+061f) ("Arabic_Letter_Hamza" . #\u+0621) ("Arabic_Letter_Alef_With_Madda_Above" . #\u+0622) ("Arabic_Letter_Alef_With_Hamza_Above" . #\u+0623) ("Arabic_Letter_Waw_With_Hamza_Above" . #\u+0624) ("Arabic_Letter_Alef_With_Hamza_Below" . #\u+0625) ("Arabic_Letter_Yeh_With_Hamza_Above" . #\u+0626) ("Arabic_Letter_Alef" . #\u+0627) ("Arabic_Letter_Beh" . #\u+0628) ("Arabic_Letter_Teh_Marbuta" . #\u+0629) ("Arabic_Letter_Teh" . #\u+062a) ("Arabic_Letter_Theh" . #\u+062b) ("Arabic_Letter_Jeem" . #\u+062c) ("Arabic_Letter_Hah" . #\u+062d) ("Arabic_Letter_Khah" . #\u+062e) ("Arabic_Letter_Dal" . #\u+062f) ("Arabic_Letter_Thal" . #\u+0630) ("Arabic_Letter_Reh" . #\u+0631) ("Arabic_Letter_Zain" . #\u+0632) ("Arabic_Letter_Seen" . #\u+0633) ("Arabic_Letter_Sheen" . #\u+0634) ("Arabic_Letter_Sad" . #\u+0635) ("Arabic_Letter_Dad" . #\u+0636) ("Arabic_Letter_Tah" . #\u+0637) ("Arabic_Letter_Zah" . #\u+0638) ("Arabic_Letter_Ain" . #\u+0639) ("Arabic_Letter_Ghain" . #\u+063a) ("Arabic_Tatweel" . #\u+0640) ("Arabic_Letter_Feh" . #\u+0641) ("Arabic_Letter_Qaf" . #\u+0642) ("Arabic_Letter_Kaf" . #\u+0643) ("Arabic_Letter_Lam" . #\u+0644) ("Arabic_Letter_Meem" . #\u+0645) ("Arabic_Letter_Noon" . #\u+0646) ("Arabic_Letter_Heh" . #\u+0647) ("Arabic_Letter_Waw" . #\u+0648) ("Arabic_Letter_Alef_Maksura" . #\u+0649) ("Arabic_Letter_Yeh" . #\u+064a) ("Arabic_Fathatan" . #\u+064b) ("Arabic_Dammatan" . #\u+064c) ("Arabic_Kasratan" . #\u+064d) ("Arabic_Fatha" . #\u+064e) ("Arabic_Damma" . #\u+064f) ("Arabic_Kasra" . #\u+0650) ("Arabic_Shadda" . #\u+0651) ("Arabic_Sukun" . #\u+0652) ("Arabic_Maddah_Above" . #\u+0653) ("Arabic_Hamza_Above" . #\u+0654) ("Arabic_Hamza_Below" . #\u+0655) ("Arabic_Subscript_Alef" . #\u+0656) ("Arabic_Inverted_Damma" . #\u+0657) ("Arabic_Mark_Noon_Ghunna" . #\u+0658) ("Arabic_Zwarakay" . #\u+0659) ("Arabic_Vowel_Sign_Small_V_Above" . #\u+065a) ("Arabic_Vowel_Sign_Inverted_Small_V_Above" . #\u+065b) ("Arabic_Vowel_Sign_Dot_Below" . #\u+065c) ("Arabic_Reversed_Damma" . #\u+065d) ("Arabic_Fatha_With_Two_Dots" . #\u+065e) ("Arabic-Indic_Digit_Zero" . #\u+0660) ("Arabic-Indic_Digit_One" . #\u+0661) ("Arabic-Indic_Digit_Two" . #\u+0662) ("Arabic-Indic_Digit_Three" . #\u+0663) ("Arabic-Indic_Digit_Four" . #\u+0664) ("Arabic-Indic_Digit_Five" . #\u+0665) ("Arabic-Indic_Digit_Six" . #\u+0666) ("Arabic-Indic_Digit_Seven" . #\u+0667) ("Arabic-Indic_Digit_Eight" . #\u+0668) ("Arabic-Indic_Digit_Nine" . #\u+0669) ("Arabic_Percent_Sign" . #\u+066a) ("Arabic_Decimal_Separator" . #\u+066b) ("Arabic_Thousands_Separator" . #\u+066c) ("Arabic_Five_Pointed_Star" . #\u+066d) ("Arabic_Letter_Dotless_Beh" . #\u+066e) ("Arabic_Letter_Dotless_Qaf" . #\u+066f) ("Arabic_Letter_Superscript_Alef" . #\u+0670) ("Arabic_Letter_Alef_Wasla" . #\u+0671) ("Arabic_Letter_Alef_With_Wavy_Hamza_Above" . #\u+0672) ("Arabic_Letter_Alef_With_Wavy_Hamza_Below" . #\u+0673) ("Arabic_Letter_High_Hamza" . #\u+0674) ("Arabic_Letter_High_Hamza_Alef" . #\u+0675) ("Arabic_Letter_High_Hamza_Waw" . #\u+0676) ("Arabic_Letter_U_With_Hamza_Above" . #\u+0677) ("Arabic_Letter_High_Hamza_Yeh" . #\u+0678) ("Arabic_Letter_Tteh" . #\u+0679) ("Arabic_Letter_Tteheh" . #\u+067a) ("Arabic_Letter_Beeh" . #\u+067b) ("Arabic_Letter_Teh_With_Ring" . #\u+067c) ("Arabic_Letter_Teh_With_Three_Dots_Above_Downwards" . #\u+067d) ("Arabic_Letter_Peh" . #\u+067e) ("Arabic_Letter_Teheh" . #\u+067f) ("Arabic_Letter_Beheh" . #\u+0680) ("Arabic_Letter_Hah_With_Hamza_Above" . #\u+0681) ("Arabic_Letter_Hah_With_Two_Dots_Vertical_Above" . #\u+0682) ("Arabic_Letter_Nyeh" . #\u+0683) ("Arabic_Letter_Dyeh" . #\u+0684) ("Arabic_Letter_Hah_With_Three_Dots_Above" . #\u+0685) ("Arabic_Letter_Tcheh" . #\u+0686) ("Arabic_Letter_Tcheheh" . #\u+0687) ("Arabic_Letter_Ddal" . #\u+0688) ("Arabic_Letter_Dal_With_Ring" . #\u+0689) ("Arabic_Letter_Dal_With_Dot_Below" . #\u+068a) ("Arabic_Letter_Dal_With_Dot_Below_And_Small_Tah" . #\u+068b) ("Arabic_Letter_Dahal" . #\u+068c) ("Arabic_Letter_Ddahal" . #\u+068d) ("Arabic_Letter_Dul" . #\u+068e) ("Arabic_Letter_Dal_With_Three_Dots_Above_Downwards" . #\u+068f) ("Arabic_Letter_Dal_With_Four_Dots_Above" . #\u+0690) ("Arabic_Letter_Rreh" . #\u+0691) ("Arabic_Letter_Reh_With_Small_V" . #\u+0692) ("Arabic_Letter_Reh_With_Ring" . #\u+0693) ("Arabic_Letter_Reh_With_Dot_Below" . #\u+0694) ("Arabic_Letter_Reh_With_Small_V_Below" . #\u+0695) ("Arabic_Letter_Reh_With_Dot_Below_And_Dot_Above" . #\u+0696) ("Arabic_Letter_Reh_With_Two_Dots_Above" . #\u+0697) ("Arabic_Letter_Jeh" . #\u+0698) ("Arabic_Letter_Reh_With_Four_Dots_Above" . #\u+0699) ("Arabic_Letter_Seen_With_Dot_Below_And_Dot_Above" . #\u+069a) ("Arabic_Letter_Seen_With_Three_Dots_Below" . #\u+069b) ("Arabic_Letter_Seen_With_Three_Dots_Below_And_Three_Dots_Above" . #\u+069c) ("Arabic_Letter_Sad_With_Two_Dots_Below" . #\u+069d) ("Arabic_Letter_Sad_With_Three_Dots_Above" . #\u+069e) ("Arabic_Letter_Tah_With_Three_Dots_Above" . #\u+069f) ("Arabic_Letter_Ain_With_Three_Dots_Above" . #\u+06a0) ("Arabic_Letter_Dotless_Feh" . #\u+06a1) ("Arabic_Letter_Feh_With_Dot_Moved_Below" . #\u+06a2) ("Arabic_Letter_Feh_With_Dot_Below" . #\u+06a3) ("Arabic_Letter_Veh" . #\u+06a4) ("Arabic_Letter_Feh_With_Three_Dots_Below" . #\u+06a5) ("Arabic_Letter_Peheh" . #\u+06a6) ("Arabic_Letter_Qaf_With_Dot_Above" . #\u+06a7) ("Arabic_Letter_Qaf_With_Three_Dots_Above" . #\u+06a8) ("Arabic_Letter_Keheh" . #\u+06a9) ("Arabic_Letter_Swash_Kaf" . #\u+06aa) ("Arabic_Letter_Kaf_With_Ring" . #\u+06ab) ("Arabic_Letter_Kaf_With_Dot_Above" . #\u+06ac) ("Arabic_Letter_Ng" . #\u+06ad) ("Arabic_Letter_Kaf_With_Three_Dots_Below" . #\u+06ae) ("Arabic_Letter_Gaf" . #\u+06af) ("Arabic_Letter_Gaf_With_Ring" . #\u+06b0) ("Arabic_Letter_Ngoeh" . #\u+06b1) ("Arabic_Letter_Gaf_With_Two_Dots_Below" . #\u+06b2) ("Arabic_Letter_Gueh" . #\u+06b3) ("Arabic_Letter_Gaf_With_Three_Dots_Above" . #\u+06b4) ("Arabic_Letter_Lam_With_Small_V" . #\u+06b5) ("Arabic_Letter_Lam_With_Dot_Above" . #\u+06b6) ("Arabic_Letter_Lam_With_Three_Dots_Above" . #\u+06b7) ("Arabic_Letter_Lam_With_Three_Dots_Below" . #\u+06b8) ("Arabic_Letter_Noon_With_Dot_Below" . #\u+06b9) ("Arabic_Letter_Noon_Ghunna" . #\u+06ba) ("Arabic_Letter_Rnoon" . #\u+06bb) ("Arabic_Letter_Noon_With_Ring" . #\u+06bc) ("Arabic_Letter_Noon_With_Three_Dots_Above" . #\u+06bd) ("Arabic_Letter_Heh_Doachashmee" . #\u+06be) ("Arabic_Letter_Tcheh_With_Dot_Above" . #\u+06bf) ("Arabic_Letter_Heh_With_Yeh_Above" . #\u+06c0) ("Arabic_Letter_Heh_Goal" . #\u+06c1) ("Arabic_Letter_Heh_Goal_With_Hamza_Above" . #\u+06c2) ("Arabic_Letter_Teh_Marbuta_Goal" . #\u+06c3) ("Arabic_Letter_Waw_With_Ring" . #\u+06c4) ("Arabic_Letter_Kirghiz_Oe" . #\u+06c5) ("Arabic_Letter_Oe" . #\u+06c6) ("Arabic_Letter_U" . #\u+06c7) ("Arabic_Letter_Yu" . #\u+06c8) ("Arabic_Letter_Kirghiz_Yu" . #\u+06c9) ("Arabic_Letter_Waw_With_Two_Dots_Above" . #\u+06ca) ("Arabic_Letter_Ve" . #\u+06cb) ("Arabic_Letter_Farsi_Yeh" . #\u+06cc) ("Arabic_Letter_Yeh_With_Tail" . #\u+06cd) ("Arabic_Letter_Yeh_With_Small_V" . #\u+06ce) ("Arabic_Letter_Waw_With_Dot_Above" . #\u+06cf) ("Arabic_Letter_E" . #\u+06d0) ("Arabic_Letter_Yeh_With_Three_Dots_Below" . #\u+06d1) ("Arabic_Letter_Yeh_Barree" . #\u+06d2) ("Arabic_Letter_Yeh_Barree_With_Hamza_Above" . #\u+06d3) ("Arabic_Full_Stop" . #\u+06d4) ("Arabic_Letter_Ae" . #\u+06d5) ("Arabic_Small_High_Ligature_Sad_With_Lam_With_Alef_Maksura" . #\u+06d6) ("Arabic_Small_High_Ligature_Qaf_With_Lam_With_Alef_Maksura" . #\u+06d7) ("Arabic_Small_High_Meem_Initial_Form" . #\u+06d8) ("Arabic_Small_High_Lam_Alef" . #\u+06d9) ("Arabic_Small_High_Jeem" . #\u+06da) ("Arabic_Small_High_Three_Dots" . #\u+06db) ("Arabic_Small_High_Seen" . #\u+06dc) ("Arabic_End_Of_Ayah" . #\u+06dd) ("Arabic_Start_Of_Rub_El_Hizb" . #\u+06de) ("Arabic_Small_High_Rounded_Zero" . #\u+06df) ("Arabic_Small_High_Upright_Rectangular_Zero" . #\u+06e0) ("Arabic_Small_High_Dotless_Head_Of_Khah" . #\u+06e1) ("Arabic_Small_High_Meem_Isolated_Form" . #\u+06e2) ("Arabic_Small_Low_Seen" . #\u+06e3) ("Arabic_Small_High_Madda" . #\u+06e4) ("Arabic_Small_Waw" . #\u+06e5) ("Arabic_Small_Yeh" . #\u+06e6) ("Arabic_Small_High_Yeh" . #\u+06e7) ("Arabic_Small_High_Noon" . #\u+06e8) ("Arabic_Place_Of_Sajdah" . #\u+06e9) ("Arabic_Empty_Centre_Low_Stop" . #\u+06ea) ("Arabic_Empty_Centre_High_Stop" . #\u+06eb) ("Arabic_Rounded_High_Stop_With_Filled_Centre" . #\u+06ec) ("Arabic_Small_Low_Meem" . #\u+06ed) ("Arabic_Letter_Dal_With_Inverted_V" . #\u+06ee) ("Arabic_Letter_Reh_With_Inverted_V" . #\u+06ef) ("Extended_Arabic-Indic_Digit_Zero" . #\u+06f0) ("Extended_Arabic-Indic_Digit_One" . #\u+06f1) ("Extended_Arabic-Indic_Digit_Two" . #\u+06f2) ("Extended_Arabic-Indic_Digit_Three" . #\u+06f3) ("Extended_Arabic-Indic_Digit_Four" . #\u+06f4) ("Extended_Arabic-Indic_Digit_Five" . #\u+06f5) ("Extended_Arabic-Indic_Digit_Six" . #\u+06f6) ("Extended_Arabic-Indic_Digit_Seven" . #\u+06f7) ("Extended_Arabic-Indic_Digit_Eight" . #\u+06f8) ("Extended_Arabic-Indic_Digit_Nine" . #\u+06f9) ("Arabic_Letter_Sheen_With_Dot_Below" . #\u+06fa) ("Arabic_Letter_Dad_With_Dot_Below" . #\u+06fb) ("Arabic_Letter_Ghain_With_Dot_Below" . #\u+06fc) ("Arabic_Sign_Sindhi_Ampersand" . #\u+06fd) ("Arabic_Sign_Sindhi_Postposition_Men" . #\u+06fe) ("Arabic_Letter_Heh_With_Inverted_V" . #\u+06ff) ("Syriac_End_Of_Paragraph" . #\u+0700) ("Syriac_Supralinear_Full_Stop" . #\u+0701) ("Syriac_Sublinear_Full_Stop" . #\u+0702) ("Syriac_Supralinear_Colon" . #\u+0703) ("Syriac_Sublinear_Colon" . #\u+0704) ("Syriac_Horizontal_Colon" . #\u+0705) ("Syriac_Colon_Skewed_Left" . #\u+0706) ("Syriac_Colon_Skewed_Right" . #\u+0707) ("Syriac_Supralinear_Colon_Skewed_Left" . #\u+0708) ("Syriac_Sublinear_Colon_Skewed_Right" . #\u+0709) ("Syriac_Contraction" . #\u+070a) ("Syriac_Harklean_Obelus" . #\u+070b) ("Syriac_Harklean_Metobelus" . #\u+070c) ("Syriac_Harklean_Asteriscus" . #\u+070d) ("Syriac_Abbreviation_Mark" . #\u+070f) ("Syriac_Letter_Alaph" . #\u+0710) ("Syriac_Letter_Superscript_Alaph" . #\u+0711) ("Syriac_Letter_Beth" . #\u+0712) ("Syriac_Letter_Gamal" . #\u+0713) ("Syriac_Letter_Gamal_Garshuni" . #\u+0714) ("Syriac_Letter_Dalath" . #\u+0715) ("Syriac_Letter_Dotless_Dalath_Rish" . #\u+0716) ("Syriac_Letter_He" . #\u+0717) ("Syriac_Letter_Waw" . #\u+0718) ("Syriac_Letter_Zain" . #\u+0719) ("Syriac_Letter_Heth" . #\u+071a) ("Syriac_Letter_Teth" . #\u+071b) ("Syriac_Letter_Teth_Garshuni" . #\u+071c) ("Syriac_Letter_Yudh" . #\u+071d) ("Syriac_Letter_Yudh_He" . #\u+071e) ("Syriac_Letter_Kaph" . #\u+071f) ("Syriac_Letter_Lamadh" . #\u+0720) ("Syriac_Letter_Mim" . #\u+0721) ("Syriac_Letter_Nun" . #\u+0722) ("Syriac_Letter_Semkath" . #\u+0723) ("Syriac_Letter_Final_Semkath" . #\u+0724) ("Syriac_Letter_E" . #\u+0725) ("Syriac_Letter_Pe" . #\u+0726) ("Syriac_Letter_Reversed_Pe" . #\u+0727) ("Syriac_Letter_Sadhe" . #\u+0728) ("Syriac_Letter_Qaph" . #\u+0729) ("Syriac_Letter_Rish" . #\u+072a) ("Syriac_Letter_Shin" . #\u+072b) ("Syriac_Letter_Taw" . #\u+072c) ("Syriac_Letter_Persian_Bheth" . #\u+072d) ("Syriac_Letter_Persian_Ghamal" . #\u+072e) ("Syriac_Letter_Persian_Dhalath" . #\u+072f) ("Syriac_Pthaha_Above" . #\u+0730) ("Syriac_Pthaha_Below" . #\u+0731) ("Syriac_Pthaha_Dotted" . #\u+0732) ("Syriac_Zqapha_Above" . #\u+0733) ("Syriac_Zqapha_Below" . #\u+0734) ("Syriac_Zqapha_Dotted" . #\u+0735) ("Syriac_Rbasa_Above" . #\u+0736) ("Syriac_Rbasa_Below" . #\u+0737) ("Syriac_Dotted_Zlama_Horizontal" . #\u+0738) ("Syriac_Dotted_Zlama_Angular" . #\u+0739) ("Syriac_Hbasa_Above" . #\u+073a) ("Syriac_Hbasa_Below" . #\u+073b) ("Syriac_Hbasa-Esasa_Dotted" . #\u+073c) ("Syriac_Esasa_Above" . #\u+073d) ("Syriac_Esasa_Below" . #\u+073e) ("Syriac_Rwaha" . #\u+073f) ("Syriac_Feminine_Dot" . #\u+0740) ("Syriac_Qushshaya" . #\u+0741) ("Syriac_Rukkakha" . #\u+0742) ("Syriac_Two_Vertical_Dots_Above" . #\u+0743) ("Syriac_Two_Vertical_Dots_Below" . #\u+0744) ("Syriac_Three_Dots_Above" . #\u+0745) ("Syriac_Three_Dots_Below" . #\u+0746) ("Syriac_Oblique_Line_Above" . #\u+0747) ("Syriac_Oblique_Line_Below" . #\u+0748) ("Syriac_Music" . #\u+0749) ("Syriac_Barrekh" . #\u+074a) ("Syriac_Letter_Sogdian_Zhain" . #\u+074d) ("Syriac_Letter_Sogdian_Khaph" . #\u+074e) ("Syriac_Letter_Sogdian_Fe" . #\u+074f) ("Arabic_Letter_Beh_With_Three_Dots_Horizontally_Below" . #\u+0750) ("Arabic_Letter_Beh_With_Dot_Below_And_Three_Dots_Above" . #\u+0751) ("Arabic_Letter_Beh_With_Three_Dots_Pointing_Upwards_Below" . #\u+0752) ("Arabic_Letter_Beh_With_Three_Dots_Pointing_Upwards_Below_And_Two_Dots_Above" . #\u+0753) ("Arabic_Letter_Beh_With_Two_Dots_Below_And_Dot_Above" . #\u+0754) ("Arabic_Letter_Beh_With_Inverted_Small_V_Below" . #\u+0755) ("Arabic_Letter_Beh_With_Small_V" . #\u+0756) ("Arabic_Letter_Hah_With_Two_Dots_Above" . #\u+0757) ("Arabic_Letter_Hah_With_Three_Dots_Pointing_Upwards_Below" . #\u+0758) ("Arabic_Letter_Dal_With_Two_Dots_Vertically_Below_And_Small_Tah" . #\u+0759) ("Arabic_Letter_Dal_With_Inverted_Small_V_Below" . #\u+075a) ("Arabic_Letter_Reh_With_Stroke" . #\u+075b) ("Arabic_Letter_Seen_With_Four_Dots_Above" . #\u+075c) ("Arabic_Letter_Ain_With_Two_Dots_Above" . #\u+075d) ("Arabic_Letter_Ain_With_Three_Dots_Pointing_Downwards_Above" . #\u+075e) ("Arabic_Letter_Ain_With_Two_Dots_Vertically_Above" . #\u+075f) ("Arabic_Letter_Feh_With_Two_Dots_Below" . #\u+0760) ("Arabic_Letter_Feh_With_Three_Dots_Pointing_Upwards_Below" . #\u+0761) ("Arabic_Letter_Keheh_With_Dot_Above" . #\u+0762) ("Arabic_Letter_Keheh_With_Three_Dots_Above" . #\u+0763) ("Arabic_Letter_Keheh_With_Three_Dots_Pointing_Upwards_Below" . #\u+0764) ("Arabic_Letter_Meem_With_Dot_Above" . #\u+0765) ("Arabic_Letter_Meem_With_Dot_Below" . #\u+0766) ("Arabic_Letter_Noon_With_Two_Dots_Below" . #\u+0767) ("Arabic_Letter_Noon_With_Small_Tah" . #\u+0768) ("Arabic_Letter_Noon_With_Small_V" . #\u+0769) ("Arabic_Letter_Lam_With_Bar" . #\u+076a) ("Arabic_Letter_Reh_With_Two_Dots_Vertically_Above" . #\u+076b) ("Arabic_Letter_Reh_With_Hamza_Above" . #\u+076c) ("Arabic_Letter_Seen_With_Two_Dots_Vertically_Above" . #\u+076d) ("Thaana_Letter_Haa" . #\u+0780) ("Thaana_Letter_Shaviyani" . #\u+0781) ("Thaana_Letter_Noonu" . #\u+0782) ("Thaana_Letter_Raa" . #\u+0783) ("Thaana_Letter_Baa" . #\u+0784) ("Thaana_Letter_Lhaviyani" . #\u+0785) ("Thaana_Letter_Kaafu" . #\u+0786) ("Thaana_Letter_Alifu" . #\u+0787) ("Thaana_Letter_Vaavu" . #\u+0788) ("Thaana_Letter_Meemu" . #\u+0789) ("Thaana_Letter_Faafu" . #\u+078a) ("Thaana_Letter_Dhaalu" . #\u+078b) ("Thaana_Letter_Thaa" . #\u+078c) ("Thaana_Letter_Laamu" . #\u+078d) ("Thaana_Letter_Gaafu" . #\u+078e) ("Thaana_Letter_Gnaviyani" . #\u+078f) ("Thaana_Letter_Seenu" . #\u+0790) ("Thaana_Letter_Daviyani" . #\u+0791) ("Thaana_Letter_Zaviyani" . #\u+0792) ("Thaana_Letter_Taviyani" . #\u+0793) ("Thaana_Letter_Yaa" . #\u+0794) ("Thaana_Letter_Paviyani" . #\u+0795) ("Thaana_Letter_Javiyani" . #\u+0796) ("Thaana_Letter_Chaviyani" . #\u+0797) ("Thaana_Letter_Ttaa" . #\u+0798) ("Thaana_Letter_Hhaa" . #\u+0799) ("Thaana_Letter_Khaa" . #\u+079a) ("Thaana_Letter_Thaalu" . #\u+079b) ("Thaana_Letter_Zaa" . #\u+079c) ("Thaana_Letter_Sheenu" . #\u+079d) ("Thaana_Letter_Saadhu" . #\u+079e) ("Thaana_Letter_Daadhu" . #\u+079f) ("Thaana_Letter_To" . #\u+07a0) ("Thaana_Letter_Zo" . #\u+07a1) ("Thaana_Letter_Ainu" . #\u+07a2) ("Thaana_Letter_Ghainu" . #\u+07a3) ("Thaana_Letter_Qaafu" . #\u+07a4) ("Thaana_Letter_Waavu" . #\u+07a5) ("Thaana_Abafili" . #\u+07a6) ("Thaana_Aabaafili" . #\u+07a7) ("Thaana_Ibifili" . #\u+07a8) ("Thaana_Eebeefili" . #\u+07a9) ("Thaana_Ubufili" . #\u+07aa) ("Thaana_Ooboofili" . #\u+07ab) ("Thaana_Ebefili" . #\u+07ac) ("Thaana_Eybeyfili" . #\u+07ad) ("Thaana_Obofili" . #\u+07ae) ("Thaana_Oaboafili" . #\u+07af) ("Thaana_Sukun" . #\u+07b0) ("Thaana_Letter_Naa" . #\u+07b1) ("Nko_Digit_Zero" . #\u+07c0) ("Nko_Digit_One" . #\u+07c1) ("Nko_Digit_Two" . #\u+07c2) ("Nko_Digit_Three" . #\u+07c3) ("Nko_Digit_Four" . #\u+07c4) ("Nko_Digit_Five" . #\u+07c5) ("Nko_Digit_Six" . #\u+07c6) ("Nko_Digit_Seven" . #\u+07c7) ("Nko_Digit_Eight" . #\u+07c8) ("Nko_Digit_Nine" . #\u+07c9) ("Nko_Letter_A" . #\u+07ca) ("Nko_Letter_Ee" . #\u+07cb) ("Nko_Letter_I" . #\u+07cc) ("Nko_Letter_E" . #\u+07cd) ("Nko_Letter_U" . #\u+07ce) ("Nko_Letter_Oo" . #\u+07cf) ("Nko_Letter_O" . #\u+07d0) ("Nko_Letter_Dagbasinna" . #\u+07d1) ("Nko_Letter_N" . #\u+07d2) ("Nko_Letter_Ba" . #\u+07d3) ("Nko_Letter_Pa" . #\u+07d4) ("Nko_Letter_Ta" . #\u+07d5) ("Nko_Letter_Ja" . #\u+07d6) ("Nko_Letter_Cha" . #\u+07d7) ("Nko_Letter_Da" . #\u+07d8) ("Nko_Letter_Ra" . #\u+07d9) ("Nko_Letter_Rra" . #\u+07da) ("Nko_Letter_Sa" . #\u+07db) ("Nko_Letter_Gba" . #\u+07dc) ("Nko_Letter_Fa" . #\u+07dd) ("Nko_Letter_Ka" . #\u+07de) ("Nko_Letter_La" . #\u+07df) ("Nko_Letter_Na_Woloso" . #\u+07e0) ("Nko_Letter_Ma" . #\u+07e1) ("Nko_Letter_Nya" . #\u+07e2) ("Nko_Letter_Na" . #\u+07e3) ("Nko_Letter_Ha" . #\u+07e4) ("Nko_Letter_Wa" . #\u+07e5) ("Nko_Letter_Ya" . #\u+07e6) ("Nko_Letter_Nya_Woloso" . #\u+07e7) ("Nko_Letter_Jona_Ja" . #\u+07e8) ("Nko_Letter_Jona_Cha" . #\u+07e9) ("Nko_Letter_Jona_Ra" . #\u+07ea) ("Nko_Combining_Short_High_Tone" . #\u+07eb) ("Nko_Combining_Short_Low_Tone" . #\u+07ec) ("Nko_Combining_Short_Rising_Tone" . #\u+07ed) ("Nko_Combining_Long_Descending_Tone" . #\u+07ee) ("Nko_Combining_Long_High_Tone" . #\u+07ef) ("Nko_Combining_Long_Low_Tone" . #\u+07f0) ("Nko_Combining_Long_Rising_Tone" . #\u+07f1) ("Nko_Combining_Nasalization_Mark" . #\u+07f2) ("Nko_Combining_Double_Dot_Above" . #\u+07f3) ("Nko_High_Tone_Apostrophe" . #\u+07f4) ("Nko_Low_Tone_Apostrophe" . #\u+07f5) ("Nko_Symbol_Oo_Dennen" . #\u+07f6) ("Nko_Symbol_Gbakurunen" . #\u+07f7) ("Nko_Comma" . #\u+07f8) ("Nko_Exclamation_Mark" . #\u+07f9) ("Nko_Lajanyalan" . #\u+07fa) ("Figure_Space" . #\u+2007) ("Zero_Width_Space" . #\u+200b) ("Line_Separator" . #\u+2028) ("Paragraph_Separator" . #\u+2029) ("Replacement_Character" . #\u+fffd) ("Skull_And_Crossbones" . #\u+2620))) (destructuring-bind (name . char) pair (register-character-name name char))) ;;;(NAME-CHAR name) ;;;If name has an entry in the *NAME->CHAR*, return first such entry. ;;;Otherwise, if it consists of one char, return it. ;;;Otherwise, if it consists of two chars, the first of which is ^, ;;; return %code-char(c xor 64), where c is the uppercased second char. ;;;Otherwise, if it starts with the prefix "u+" or "U+" followed by ;;; hex digits, the number denoted by those hex digits is interpreted as the ;;; unicode code of the character; if this value is less than ;;; CHAR-CODE-LIMIT, CODE-CHAR of that value is returned. ;;;Otherwise, if it consists of octal digits, the number denoted by ;;; those octal digits is interpreted as per the U+ case above. ;;;Otherwise return NIL. (defun name-char (name) "Given an argument acceptable to STRING, NAME-CHAR returns a character whose name is that string, if one exists. Otherwise, NIL is returned." (if (characterp name) name (let* ((name (string name))) (let* ((namelen (length name))) (declare (fixnum namelen)) (or (gethash name *name->char*) (if (= namelen 1) (char name 0) (if (and (= namelen 2) (eq (char name 0) #\^)) (let* ((c1 (char-code (char-upcase (char name 1))))) (if (and (>= c1 64) (< c1 96)) (code-char (the fixnum (logxor (the fixnum c1) #x40))))) (let* ((n 0) (start 1)) (declare (fixnum start)) (or (if (and (> namelen 1) (or (eql (char name 0) #\U) (eql (char name 0) #\u)) (or (= namelen 2) (progn (when (eql (char name 1) #\+) (incf start)) t))) (do* ((i start (1+ i))) ((= i namelen) (if (< n char-code-limit) (code-char n))) (declare (fixnum i)) (let* ((pos (position (char-upcase (char name i)) "0123456789ABCDEF"))) (if pos (setq n (logior (ash n 4) pos)) (progn (setq n 0) (return)))))) (dotimes (i namelen (if (< n char-code-limit) (code-char n))) (let* ((code (the fixnum (- (the fixnum (char-code (char name i))) (char-code #\0))))) (declare (fixnum code)) (if (and (>= code 0) (<= code 7)) (setq n (logior code (the fixnum (ash n 3)))) (return))))))))))))) (eval-when (:compile-toplevel :load-toplevel :execute) (defconstant wsp #.(let ((str (make-string 6 :element-type 'base-char))) (set-schar str 0 #\Space) (set-schar str 1 #\^I) (set-schar str 2 #\^L) (set-schar str 3 #\^@) (set-schar str 4 #\^J) (set-schar str 5 (code-char #xa0)) str)) (defconstant wsp&cr #.(let ((str (make-string 7 :element-type 'base-char))) (set-schar str 0 #\Space) (set-schar str 1 #\^M) (set-schar str 2 #\^I) (set-schar str 3 #\^L) (set-schar str 4 #\^@) (set-schar str 5 #\^J) (set-schar str 0 #\Space) (set-schar str 6 (code-char #xa0)) str)) ) (defun whitespacep (char) (eql $cht_wsp (%character-attribute char (rdtab.ttab *readtable*)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Readtables ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Readtable = istructure with data [1] type-table and [2] macro-char-alist ;;; Type-table is a 256 byte ivector with a type byte for each char. ;;; macro-char-alist is a list of (char . defn). The defn is either a ;;; cons of (#'read-dispatch . macro-char-alist) for ;;; dispatch macros, or it is a function or a symbol to call for simple macros. (defun readtablep (object) (istruct-typep object 'readtable)) (defun readtable-arg (object) (if (null object) (setq object *readtable*)) (unless (istruct-typep object 'readtable) (report-bad-arg object 'readtable)) object) (eval-when (:compile-toplevel :execute) (def-accessors %svref token.string token.ipos token.opos token.len ) (defmacro with-token-buffer ((name) &body body &environment env) (multiple-value-bind (body decls) (parse-body body env nil) `(let* ((,name (vector (%get-token-string 16) 0 0 16 nil))) (declare (dynamic-extent ,name)) (unwind-protect (locally ,@decls ,@body) (%return-token-string ,name))))) ) (defun read-dispatch (stream char) (let* ((info (cdr (assq char (rdtab.alist *readtable*))))) (with-token-buffer (tb) (let* ((subchar nil) (numarg nil)) (loop (if (digit-char-p (setq subchar (%read-char-no-eof stream))) (%add-char-to-token subchar tb) (return (setq subchar (char-upcase subchar) numarg (%token-to-number tb 10))))) (let* ((dispfun (cdr (assq subchar (cdr info))))) ; <== WAS char (if dispfun (funcall dispfun stream subchar numarg) (signal-reader-error stream "Undefined character ~S in a ~S dispatch macro." subchar char))))))) ;;; This -really- gets initialized later in the file (defvar %standard-readtable% (let* ((ttab (make-sparse-vector char-code-limit '(unsigned-byte 8) $cht_cnst)) (macs `((#\# . (,#'read-dispatch)))) (case :upcase)) (dotimes (ch (1+ (char-code #\Space))) (setf (sparse-vector-ref ttab ch) $cht_wsp)) (setf (sparse-vector-ref ttab #xa0) $cht_wsp) (setf (sparse-vector-ref ttab (char-code #\\)) $cht_sesc) (setf (sparse-vector-ref ttab (char-code #\|)) $cht_mesc) (setf (sparse-vector-ref ttab (char-code #\#)) $cht_ntmac) (setf (sparse-vector-ref ttab (char-code #\Backspace)) $cht_ill) (setf (sparse-vector-ref ttab (char-code #\Rubout)) $cht_ill) (%istruct 'readtable ttab macs case))) (defvar %initial-readtable%) (setq *readtable* %standard-readtable%) (def-standard-initial-binding *readtable* ) (queue-fixup (setq %initial-readtable% (copy-readtable *readtable*))) (defun copy-readtable (&optional (from *readtable*) to) (setq from (if from (readtable-arg from) %standard-readtable%)) (let* ((fttab (rdtab.ttab from))) (setq to (if to (readtable-arg to) (%istruct 'readtable (copy-sparse-vector fttab) nil (rdtab.case from)))) (setf (rdtab.alist to) (copy-tree (rdtab.alist from))) (setf (rdtab.case to) (rdtab.case from)) to)) (declaim (inline %character-attribute)) (defun %character-attribute (char attrtab) (declare (character char) (optimize (speed 3) (safety 0))) (let* ((code (char-code char))) (declare (fixnum code)) (sparse-vector-ref attrtab code))) (defun %set-character-attribute (char readtable attr) (let* ((code (char-code char)) (attrtab (rdtab.ttab readtable))) (declare (type (mod #x110000) code)) (setf (sparse-vector-ref attrtab code) attr))) ;;; returns: (values attrib ), where ;;; = (char . fn), if terminating macro ;;; = (char . (fn . dispatch-alist)), if dispatching macro ;;; = nil otherwise (defun %get-readtable-char (char &optional (readtable *readtable*)) (setq char (require-type char 'character)) (let* ((attr (%character-attribute char (rdtab.ttab readtable)))) (declare (fixnum attr)) (values attr (if (logbitp $cht_macbit attr) (assoc char (rdtab.alist readtable)))))) (defun set-syntax-from-char (to-char from-char &optional to-readtable from-readtable) "Causes the syntax of TO-CHAR to be the same as FROM-CHAR in the optional readtable (defaults to the current readtable). The FROM-TABLE defaults to the standard Lisp readtable when NIL." (setq to-char (require-type to-char 'base-char)) (setq from-char (require-type from-char 'base-char)) (setq to-readtable (readtable-arg to-readtable)) (setq from-readtable (readtable-arg (or from-readtable %initial-readtable%))) (multiple-value-bind (from-attr from-info) (%get-readtable-char from-char from-readtable) (let* ((new-tree (copy-tree (cdr from-info))) (old-to-info (nth-value 1 (%get-readtable-char to-char to-readtable)))) (without-interrupts (if from-info (if old-to-info (setf (cdr old-to-info) new-tree) (push (cons to-char new-tree) (rdtab.alist to-readtable))) (if old-to-info (setf (rdtab.alist to-readtable) (delq old-to-info (rdtab.alist to-readtable))))) (%set-character-attribute to-char to-readtable (if (and (= from-attr $cht_cnst) (member to-char '(#\Newline #\Linefeed #\Page #\Return #\Space #\Tab #\Backspace #\Rubout))) $cht_ill from-attr))) t))) (defun get-macro-character (char &optional readtable) "Return the function associated with the specified CHAR which is a macro character, or NIL if there is no such function. As a second value, return T if CHAR is a macro character which is non-terminating, i.e. which can be embedded in a symbol name." (setq readtable (readtable-arg readtable)) (multiple-value-bind (attr info) (%get-readtable-char char readtable) (declare (fixnum attr) (list info)) (let* ((def (cdr info))) (values (if (consp def) (car def) def) (= attr $cht_ntmac))))) (defun set-macro-character (char fn &optional non-terminating-p readtable) "Causes CHAR to be a macro character which invokes FUNCTION when seen by the reader. The NON-TERMINATINGP flag can be used to make the macro character non-terminating, i.e. embeddable in a symbol name." (setq char (require-type char 'base-char)) (setq readtable (readtable-arg readtable)) (when fn (unless (or (symbolp fn) (functionp fn)) (setq fn (require-type fn '(or symbol function))))) (let* ((info (nth-value 1 (%get-readtable-char char readtable)))) (declare (list info)) (without-interrupts (%set-character-attribute char readtable (if (null fn) $cht_cnst (if non-terminating-p $cht_ntmac $cht_tmac))) (if (and (null fn) info) (setf (rdtab.alist readtable) (delete info (rdtab.alist readtable) :test #'eq)) (if (null info) (push (cons char fn) (rdtab.alist readtable)) (let* ((def (cdr info))) (if (atom def) (setf (cdr info) fn) ; Non-dispatching (setf (car def) fn)))))) ; Dispatching t)) (defun readtable-case (readtable) (unless (istruct-typep readtable 'readtable) (report-bad-arg readtable 'readtable)) (let* ((case (rdtab.case (readtable-arg readtable)))) (if (symbolp case) case (%car (rassoc case (readtable-case-keywords) :test #'eq))))) (defun %set-readtable-case (readtable case) (unless (istruct-typep readtable 'readtable) (report-bad-arg readtable 'readtable)) (check-type case (member :upcase :downcase :preserve :invert)) (setf (rdtab.case (readtable-arg readtable)) case)) (defsetf readtable-case %set-readtable-case) (defun make-dispatch-macro-character (char &optional non-terminating-p readtable) "Cause CHAR to become a dispatching macro character in readtable (which defaults to the current readtable). If NON-TERMINATING-P, the char will be non-terminating." (setq readtable (readtable-arg readtable)) (setq char (require-type char 'base-char)) (let* ((info (nth-value 1 (%get-readtable-char char readtable)))) (declare (list info)) (without-interrupts (%set-character-attribute char readtable (if non-terminating-p $cht_ntmac $cht_tmac)) (if info (rplacd (cdr info) nil) (push (cons char (cons #'read-dispatch nil)) (rdtab.alist readtable))))) t) (defun get-dispatch-macro-character (disp-ch sub-ch &optional (readtable *readtable*)) "Return the macro character function for SUB-CHAR under DISP-CHAR or NIL if there is no associated function." (setq readtable (readtable-arg (or readtable %initial-readtable%))) (setq disp-ch (require-type disp-ch 'base-char)) (setq sub-ch (char-upcase (require-type sub-ch 'base-char))) (unless (digit-char-p sub-ch 10) (let* ((def (cdr (nth-value 1 (%get-readtable-char disp-ch readtable))))) (if (consp def) (cdr (assq sub-ch (cdr def))) (error "~A is not a dispatching macro character in ~s ." disp-ch readtable))))) (defun set-dispatch-macro-character (disp-ch sub-ch fn &optional readtable) "Cause FUNCTION to be called whenever the reader reads DISP-CHAR followed by SUB-CHAR." (setq readtable (readtable-arg readtable)) (setq disp-ch (require-type disp-ch 'base-char)) (setq sub-ch (char-upcase (require-type sub-ch 'base-char))) (when (digit-char-p sub-ch 10) (error "subchar can't be a decimal digit - ~a ." sub-ch)) (let* ((info (nth-value 1 (%get-readtable-char disp-ch readtable))) (def (cdr info))) (declare (list info)) (unless (consp def) (error "~A is not a dispatching macro character in ~s ." disp-ch readtable)) (let* ((alist (cdr def)) (pair (assq sub-ch alist))) (if pair (setf (cdr pair) fn) (push (cons sub-ch fn) (cdr def)))) t)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Reader ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (def-standard-initial-binding *read-eval* t "When nil, #. signals an error.") (def-standard-initial-binding *read-default-float-format* 'single-float) (def-standard-initial-binding *read-suppress* nil "Suppress most interpreting in the reader when T.") (def-standard-initial-binding *read-base* 10. "the radix that Lisp reads numbers in") (defvar %read-objects% nil) (defvar %keep-whitespace% nil) (def-standard-initial-binding %token-strings% (%cons-pool nil)) (defun %return-token-string (token) (let* ((str (token.string token)) (pool %token-strings%)) (setf (token.string token) nil) (without-interrupts (setf (pool.data pool) (cheap-cons str (pool.data pool)))))) ;;;Look for an exact match, else create a simple-string. (defun %get-token-string (len) (declare (fixnum len)) (without-interrupts (do* ((pool %token-strings%) (head (cons nil (pool.data pool))) (prev head next) (next (cdr prev) (cdr next))) ((null next) (make-string len :element-type 'base-char)) (declare (dynamic-extent head) (list head prev next)) (let* ((s (car next))) (when (= len (length s)) (rplacd prev (cdr next)) (setf (pool.data pool) (cdr head)) (free-cons next) (return s)))))) (defun %extend-token-string (token) (let* ((old-string (token.string token)) (old-length (token.len token))) (declare (fixnum old-length)) (let* ((new-length (the fixnum (ash old-length 1))) (new-string (%get-token-string new-length))) (dotimes (i old-length) (setf (%schar new-string i) (%schar old-string i))) (%return-token-string token) (setf (token.string token) new-string (token.len token) new-length) token))) (defun %add-char-to-token (char token) (let* ((len (token.len token)) (opos (token.opos token))) (declare (fixnum len opos)) (when (= opos len) (%extend-token-string token)) (setf (token.opos token) (the fixnum (1+ opos)) (%schar (token.string token) opos) char))) (defun %string-from-token (token) (let* ((opos (token.opos token)) (ipos (token.ipos token)) (tstr (token.string token)) (len (the fixnum (- opos ipos))) (string (make-string len :element-type 'base-char))) (do* ((k 0 (1+ k)) (i ipos (1+ i))) ((= i opos) string) (declare (fixnum i k)) (setf (%schar string k) (%schar tstr i))))) (defun %next-token-char (token) (let* ((ipos (token.ipos token))) (declare (fixnum ipos)) (when (< ipos (the fixnum (token.opos token))) (setf (token.ipos token) (the fixnum (1+ ipos))) (%schar (token.string token) ipos)))) (defun input-stream-arg (stream) (cond ((null stream) *standard-input*) ((eq stream t) *terminal-io*) ;Otherwise, let ASK complain... (t stream))) (defun %read-char-no-eof (stream) (read-char stream)) (defun %next-char-and-attr (stream &optional (attrtab (rdtab.ttab *readtable*))) (let* ((ch (read-char stream nil :eof))) (if (eq ch :eof) (values nil nil) (values ch (%character-attribute ch attrtab))))) (defun %next-non-whitespace-char-and-attr (stream) (let* ((attrtab (rdtab.ttab *readtable*))) (loop (multiple-value-bind (ch attr) (%next-char-and-attr stream attrtab) (if (null ch) (return (values nil nil)) (unless (eql attr $cht_wsp) (return (values ch attr)))))))) (defun %next-char-and-attr-no-eof (stream &optional (attrtab (rdtab.ttab *readtable*))) (let* ((ch (%read-char-no-eof stream))) (values ch (%character-attribute ch attrtab)))) (defun %next-non-whitespace-char-and-attr-no-eof (stream) (let* ((attrtab (rdtab.ttab *readtable*))) (loop (multiple-value-bind (ch attr) (%next-char-and-attr-no-eof stream attrtab) (declare (fixnum attr)) (unless (= attr $cht_wsp) (return (values ch attr))))))) ;;; "escapes" is a list of escaped character positions, in reverse order (defun %casify-token (token escapes) (let* ((case (readtable-case *readtable*)) (opos (token.opos token)) (string (token.string token))) (declare (fixnum opos)) (if (and (null escapes) (eq case :upcase)) ; Most common case, pardon the pun ; %strup is faster - boot probs tho (dotimes (i opos) (setf (%schar string i) (char-upcase (%schar string i)))) (unless (eq case :preserve) (when (eq case :invert) (let* ((lower-seen nil) (upper-seen nil)) (do* ((i (1- opos) (1- i)) (esclist escapes) (nextesc (if esclist (pop esclist) -1))) ((< i 0) (if upper-seen (unless lower-seen (setq case :downcase)) (when lower-seen (setq case :upcase)))) (declare (fixnum i nextesc)) (if (= nextesc i) (setq nextesc (if esclist (pop esclist) -1)) (let* ((ch (%schar string i))) (if (upper-case-p ch) (setq upper-seen t) (if (lower-case-p ch) (setq lower-seen t)))))))) (if (eq case :upcase) (do* ((i (1- opos) (1- i)) (nextesc (if escapes (pop escapes) -1))) ((< i 0)) (declare (fixnum i nextesc)) (if (= nextesc i) (setq nextesc (if escapes (pop escapes) -1)) (setf (%schar string i) (char-upcase (%schar string i))))) (if (eq case :downcase) (do* ((i (1- opos) (1- i)) (nextesc (if escapes (pop escapes) -1))) ((< i 0)) (declare (fixnum i nextesc)) (if (= nextesc i) (setq nextesc (if escapes (pop escapes) -1)) (setf (%schar string i) (char-downcase (%schar string i))))))))))) ;;; MCL's reader has historically treated ||:foo as a reference to the ;;; symbol FOO in the package which has the null string as its name. ;;; Some other implementations treat it as a keyword. This takes an ;;; argument indicating whether or not something was "seen" before the ;;; first colon was read, even if that thing caused no characters to ;;; be added to the token. (defun %token-package (token colonpos seenbeforecolon stream) (declare (ignorable stream)) (if colonpos (if (and (eql colonpos 0) (not seenbeforecolon)) *keyword-package* (let* ((string (token.string token))) (or (%find-pkg string colonpos) (subseq string 0 colonpos) #+nomore (signal-reader-error stream "Reference to unknown package ~s." (subseq string 0 colonpos))))) *package*)) ;;; Returns 4 values: reversed list of escaped character positions, ;;; explicit package (if unescaped ":" or "::") or nil, t iff any ;;; non-dot, non-escaped chars in token, and t if either no explicit ;;; package or "::" (defun %collect-xtoken (token stream 1stchar) (let* ((escapes ()) (nondots nil) (explicit-package *read-suppress*) (double-colon t) (multi-escaped nil)) (do* ((attrtab (rdtab.ttab *readtable*)) (char 1stchar (read-char stream nil :eof ))) ((eq char :eof)) (flet ((add-note-escape-pos (char token escapes) (push (token.opos token) escapes) (%add-char-to-token char token) escapes)) (let* ((attr (%character-attribute char attrtab))) (declare (fixnum attr)) (when (or (= attr $cht_tmac) (= attr $cht_wsp)) (when (or (not (= attr $cht_wsp)) %keep-whitespace%) (unread-char char stream)) (return )) (if (= attr $cht_ill) (signal-reader-error stream "Illegal character ~S." char) (if (= attr $cht_sesc) (setq nondots t escapes (add-note-escape-pos (%read-char-no-eof stream) token escapes)) (if (= attr $cht_mesc) (progn (setq nondots t) (loop (multiple-value-bind (nextchar nextattr) (%next-char-and-attr-no-eof stream attrtab) (declare (fixnum nextattr)) (if (= nextattr $cht_mesc) (return (setq multi-escaped t)) (if (= nextattr $cht_sesc) (setq escapes (add-note-escape-pos (%read-char-no-eof stream) token escapes)) (setq escapes (add-note-escape-pos nextchar token escapes))))))) (let* ((opos (token.opos token))) ; Add char to token, note 1st colonpos (declare (fixnum opos)) (if (and (eq char #\:) ; (package-delimiter-p char ?) (not explicit-package)) (let* ((nextch (%read-char-no-eof stream))) (if (eq nextch #\:) (setq double-colon t) (progn (unread-char nextch stream) (setq double-colon nil))) (%casify-token token escapes) (setq explicit-package (%token-package token opos nondots stream) nondots t escapes nil) (setf (token.opos token) 0)) (progn (unless (eq char #\.) (setq nondots t)) (%add-char-to-token char token)))))))))) (values (or escapes multi-escaped) (if *read-suppress* nil explicit-package) nondots double-colon))) (defun %validate-radix (radix) (if (and (typep radix 'fixnum) (>= (the fixnum radix) 2) (<= (the fixnum radix) 36)) radix (progn (check-type radix (integer 2 36)) radix))) (defun %token-to-number (token radix &optional no-rat) (new-numtoken (token.string token) (token.ipos token) (token.opos token) radix no-rat)) ;;; If we're allowed to have a single "." in this context, DOT-OK is some distinguished ;;; value that's returned to the caller when exactly one dot is present. (defun %parse-token (stream firstchar dot-ok) (with-token-buffer (tb) (multiple-value-bind (escapes explicit-package nondots double-colon) (%collect-xtoken tb stream firstchar) (unless *read-suppress* (let* ((string (token.string tb)) (len (token.opos tb))) (declare (fixnum len)) (if (not nondots) (if (= len 1) (or dot-ok (signal-reader-error stream "Dot context error in ~s." (%string-from-token tb))) (signal-reader-error stream "Illegal symbol syntax in ~s." (%string-from-token tb))) ;; Something other than a buffer full of dots. Thank god. (let* ((num (if (null escapes) (handler-case (%token-to-number tb (%validate-radix *read-base*)) (arithmetic-error (c) (error 'impossible-number :stream stream :token (%string-from-token tb) :condition c)))))) (if (and num (not explicit-package)) num (if (and (zerop len) (null escapes)) (%err-disp $XBADSYM) (progn ; Muck with readtable case of extended token. (%casify-token tb (unless (atom escapes) escapes)) (let* ((pkg (if explicit-package (pkg-arg explicit-package) *package*))) (if (or double-colon (eq pkg *keyword-package*)) (with-package-lock (pkg) (multiple-value-bind (symbol access internal-offset external-offset) (%find-symbol string len pkg) (if access symbol (%add-symbol (%string-from-token tb) pkg internal-offset external-offset)))) (multiple-value-bind (found symbol) (%get-htab-symbol string len (pkg.etab pkg)) (if found symbol (let* ((token (%string-from-token tb)) (symbol (find-symbol token pkg))) (with-simple-restart (continue "~:[Create and use the internal symbol ~a::~a~;Use the internal symbol ~:*~s~]" symbol (package-name pkg) token) (%err-disp $XNOESYM token pkg)) (or symbol (intern token pkg))))))))))))))))) #| (defun %parse-token-test (string &key dot-ok (case (readtable-case *readtable*))) (let* ((stream (make-string-input-stream string)) (oldcase (readtable-case *readtable*))) (unwind-protect (progn (setf (readtable-case *readtable*) case) (%parse-token stream (read-char stream t) dot-ok)) (setf (readtable-case *readtable*) oldcase)))) (%parse-token-test "ABC") (%parse-token-test "TRAPS::_DEBUGGER") (%parse-token-test "3.14159") (ignore-errors (%parse-token-test "BAD-PACKAGE:WORSE-SYMBOL")) (ignore-errors (%parse-token-test "CCL::")) (%parse-token-test "TRAPS::_debugger" :case :preserve) (%parse-token-test ":foo") |# ;;; firstchar must not be whitespace. ;;; People who think that there's so much overhead in all of ;;; this (multiple-value-list, etc.) should probably consider ;;; rewriting those parts of the CLOS and I/O code that make ;;; using things like READ-CHAR impractical... ;;; mb: the reason multiple-value-list is used here is that we need to distunguish between the ;;; recursive parse call returning (values nil) and (values). (defun %parse-expression (stream firstchar dot-ok) (let* ((readtable *readtable*) (attrtab (rdtab.ttab readtable)) (attr (%character-attribute firstchar attrtab)) (start-pos (stream-position stream))) (declare (fixnum attr)) (when (eql attr $cht_ill) (signal-reader-error stream "Illegal character ~S." firstchar)) (let* ((vals (multiple-value-list (if (not (logbitp $cht_macbit attr)) (%parse-token stream firstchar dot-ok) (let* ((def (cdr (assq firstchar (rdtab.alist readtable))))) (cond ((null def)) ((atom def) (funcall def stream firstchar)) #+no ; include if %initial-readtable% broken (see above) ((and (consp (car def)) (eq (caar def) 'function)) (funcall (cadar def) stream firstchar)) ((functionp (car def)) (funcall (car def) stream firstchar)) (t (error "Bogus default dispatch fn: ~S" (car def)) nil)))))) (end-pos (and start-pos (stream-position stream)))) (declare (dynamic-extent vals) (list vals)) (if (null vals) (values nil nil) (destructuring-bind (form &optional nested-source-notes) vals ;; Can't really trust random reader macros to return source notes... (unless (and (consp nested-source-notes) (source-note-p (car nested-source-notes))) (setq nested-source-notes nil)) (values form t (and start-pos (record-source-note :form form :stream stream :start-pos (1- start-pos) :end-pos end-pos :subform-notes nested-source-notes)))))))) #| (defun %parse-expression-test (string) (let* ((stream (make-string-input-stream string))) (%parse-expression stream (read-char stream t) nil))) (%parse-expression-test ";hello") (%parse-expression-test "#'cdr") (%parse-expression-test "#+foo 1 2") |# (defun %read-list-expression (stream dot-ok &optional (termch #\))) (loop (let* ((firstch (%next-non-whitespace-char-and-attr-no-eof stream))) (if (eq firstch termch) (return (values nil nil nil)) (multiple-value-bind (val val-p source-info) (%parse-expression stream firstch dot-ok) (if val-p (return (values val t source-info)))))))) (defun read-list (stream &optional nodots (termch #\))) (let* ((dot-ok (cons nil nil)) (head (cons nil nil)) (tail head) (source-note-list nil)) (declare (dynamic-extent dot-ok head) (list head tail)) (if nodots (setq dot-ok nil)) (multiple-value-bind (firstform firstform-p firstform-source-note) (%read-list-expression stream dot-ok termch) (when firstform-source-note (push firstform-source-note source-note-list)) (when firstform-p (if (and dot-ok (eq firstform dot-ok)) ; just read a dot (signal-reader-error stream "Dot context error.")) (rplacd tail (setq tail (cons firstform nil))) (loop (multiple-value-bind (nextform nextform-p nextform-source-note) (%read-list-expression stream dot-ok termch) (when nextform-source-note (push nextform-source-note source-note-list)) (if (not nextform-p) (return)) (if (and dot-ok (eq nextform dot-ok)) ; just read a dot (if (multiple-value-bind (lastform lastform-p lastform-source-note) (%read-list-expression stream nil termch) (when lastform-source-note (push lastform-source-note source-note-list)) (and lastform-p (progn (rplacd tail lastform) (not (nth-value 1 (%read-list-expression stream nil termch)))))) (return) (signal-reader-error stream "Dot context error.")) (rplacd tail (setq tail (cons nextform nil)))))))) (values (cdr head) source-note-list))) #| (defun read-list-test (string &optional nodots) (read-list (make-string-input-stream string) nodots)) (read-list-test ")") (read-list-test "a b c)" t) (read-list-test "a b ;hello c)" t) |# (set-macro-character #\( #'(lambda (stream ignore) (declare (ignore ignore)) (read-list stream nil #\)))) (set-macro-character #\' (nfunction |'-reader| (lambda (stream ignore) (declare (ignore ignore)) (multiple-value-bind (form source-note) (read-internal stream t nil t) (values `(quote ,form) (and source-note (list source-note))))))) (defparameter *alternate-line-terminator* #+darwin-target #\Return #-darwin-target nil "This variable is currently only used by the standard reader macro function for #\; (single-line comments); that function reads successive characters until EOF, a #\NewLine is read, or a character EQL to the value of *alternate-line-terminator* is read. In Clozure CL for Darwin, the value of this variable is initially #\Return ; in Clozure CL for other OSes, it's initially NIL.") (set-macro-character #\; (nfunction |;-reader| (lambda (stream ignore) (declare (ignore ignore)) (let* ((ch nil)) (loop (if (or (eq :eof (setq ch (read-char stream nil :eof))) (eq ch #\NewLine) (eq ch *alternate-line-terminator*)) (return (values)))))))) (defun read-string (stream termch) (with-token-buffer (tb) (do* ((attrs (rdtab.ttab *readtable*)) (ch (%read-char-no-eof stream) (%read-char-no-eof stream))) ((eq ch termch) (%string-from-token tb)) (if (= (the fixnum (%character-attribute ch attrs)) $CHT_SESC) (setq ch (%read-char-no-eof stream))) (%add-char-to-token ch tb)))) (set-macro-character #\" #'read-string) (defparameter *ignore-extra-close-parenthesis* nil) (set-macro-character #\) #'(lambda (stream ch) (let* ((pos (if (typep stream 'file-stream) (file-position stream)))) (if *ignore-extra-close-parenthesis* (warn "Ignoring extra \"~c\" ~@[near position ~d~] on ~s ." ch pos stream) (signal-reader-error stream "Unmatched ')' ~@[near position ~d~]." pos))))) (eval-when (:load-toplevel) ; But not when mousing around! (make-dispatch-macro-character #\# t)) (set-dispatch-macro-character #\# #\( (nfunction |#(-reader| (lambda (stream subchar numarg) (declare (ignore subchar)) (if (or (null numarg) *read-suppress*) (multiple-value-bind (lst notes) (read-list stream t) (let* ((len (length lst)) (vec (make-array len))) (declare (list lst) (fixnum len) (simple-vector vec)) (dotimes (i len) (setf (svref vec i) (pop lst))) (values vec notes))) (locally (declare (fixnum numarg)) (do* ((vec (make-array numarg)) (notes ()) (lastform) (i 0 (1+ i))) ((multiple-value-bind (form form-p source-info) (%read-list-expression stream nil) (if form-p (progn (setq lastform form) (when source-info (push source-info notes))) (unless (= i numarg) (if (= i 0) (%err-disp $XARROOB -1 vec) (do* ((j i (1+ j))) ((= j numarg)) (declare (fixnum j)) (setf (svref vec j) lastform))))) (not form-p)) (values vec notes)) (declare (fixnum i)) (setf (svref vec i) lastform))))))) (defun %read-rational (stream subchar radix) (declare (ignore subchar)) (with-token-buffer (tb) (multiple-value-bind (escapes xpackage) (%collect-xtoken tb stream (%next-non-whitespace-char-and-attr-no-eof stream)) (unless *read-suppress* (let* ((val (%token-to-number tb radix))) (or (and (null escapes) (null xpackage) (typep val 'rational) val) (%err-disp $xbadnum))))))) (defun require-numarg (subchar numarg) (or numarg *read-suppress* (error "Numeric argument required for #~A reader macro ." subchar))) (defun require-no-numarg (subchar numarg) (if (and numarg (not *read-suppress*)) (error "Spurious numeric argument in #~D~A reader macro ." numarg subchar))) (defun read-eval (stream subchar numarg) (require-no-numarg subchar numarg) (if *read-eval* (let* ((exp (%read-list-expression stream nil))) (unless *read-suppress* (eval exp))) (signal-reader-error stream "#. reader macro invoked when ~S is false ." '*read-eval*))) (set-dispatch-macro-character #\# #\C #'(lambda (stream char arg) (require-no-numarg char arg ) (multiple-value-bind (form note) (read-internal stream t nil t) (values (unless *read-suppress* (apply #'complex form)) (and note (list note)))))) (set-dispatch-macro-character #\# #\. #'read-eval) ;;; Read a valid, non-numeric token string from stream; *READ-SUPPRESS* ;;; is known to be false. (defun read-symbol-token (stream) (multiple-value-bind (firstch attr) (%next-non-whitespace-char-and-attr-no-eof stream) (declare (fixnum attr)) (with-token-buffer (tb) (if (or (= attr $CHT_ILL) (logbitp $cht_macbit attr) (multiple-value-bind (escapes explicit-package nondots) (%collect-xtoken tb stream firstch) (declare (ignore nondots)) (%casify-token tb (unless (atom escapes) escapes)) (or explicit-package (and (not escapes) (%token-to-number tb (%validate-radix *read-base*)))))) (%err-disp $XBADSYM) (%string-from-token tb))))) (set-dispatch-macro-character #\# #\: #'(lambda (stream subchar numarg) (require-no-numarg subchar numarg) (if (not *read-suppress*) (make-symbol (read-symbol-token stream)) (progn (%read-list-expression stream nil) nil)))) (set-dispatch-macro-character #\# #\b #'(lambda (stream subchar numarg) (require-no-numarg subchar numarg) (%read-rational stream subchar 2))) (set-dispatch-macro-character #\# #\o #'(lambda (stream subchar numarg) (require-no-numarg subchar numarg) (%read-rational stream subchar 8))) (set-dispatch-macro-character #\# #\x #'(lambda (stream subchar numarg) (require-no-numarg subchar numarg) (%read-rational stream subchar 16))) (set-dispatch-macro-character #\# #\r #'(lambda (stream subchar numarg) (unless *read-suppress* (require-numarg subchar numarg) (check-type numarg (integer 2 36))) (%read-rational stream subchar numarg))) (set-dispatch-macro-character #\# #\' (nfunction |#'-reader| (lambda (stream subchar numarg) (require-no-numarg subchar numarg) (multiple-value-bind (form note) (read-internal stream t nil t) (values `(function ,form) (and note (list note))))))) (set-dispatch-macro-character #\# #\| (nfunction |#\|-reader| (lambda (stream subchar numarg) (require-no-numarg subchar numarg) (do* ((lastch nil ch) (ch ) (level 1)) ((= level 0) (values)) (declare (fixnum level)) (setq ch (%read-char-no-eof stream)) (if (and (eq ch #\|) (eq lastch #\#)) (progn (setq ch nil) (incf level)) (if (and (eq ch #\#) (eq lastch #\|)) (progn (setq ch nil) (decf level)))))))) (defun %unreadable (stream description) (signal-reader-error stream "~S encountered." description)) (set-dispatch-macro-character #\# #\< #'(lambda (stream &rest ignore) (declare (ignore ignore)) (%unreadable stream "#<"))) (dolist (ch '(#\null #\tab #\linefeed #\page #\return #\space #\312)) (set-dispatch-macro-character #\# ch #'(lambda (stream &rest ignore) (declare (ignore ignore)) (%unreadable stream "#")))) (set-dispatch-macro-character #\# #\) #'(lambda (stream &rest ignore) (declare (ignore ignore)) (%unreadable stream "#)"))) (set-dispatch-macro-character #\# #\\ #'(lambda (stream subchar numarg) (require-no-numarg subchar numarg) (with-token-buffer (tb) (%collect-xtoken tb stream #\\) (unless *read-suppress* (let* ((str (%string-from-token tb))) (or (name-char str) (error "Unknown character name - \"~a\" ." str))))))) ;;;Since some built-in read macros used to use internal reader entry points ;;;for efficiency, we couldn't reliably offer a protocol for stream-dependent ;;;recursive reading. So recursive reads always get done via tyi's, and streams ;;;only get to intercept toplevel reads. (defun read (&optional stream (eof-error-p t) eof-value recursive-p) (declare (resident)) ;; just return the first value of read-internal (values (read-internal stream eof-error-p eof-value recursive-p))) (defun read-internal (stream eof-error-p eof-value recursive-p) (setq stream (input-stream-arg stream)) (if recursive-p (%read-form stream (if eof-error-p 0) nil) (let ((%read-objects% nil) (%keep-whitespace% nil)) (%read-form stream (if eof-error-p 0) eof-value)))) (defun read-preserving-whitespace (&optional stream (eof-error-p t) eof-value recursive-p) "Read from STREAM and return the value read, preserving any whitespace that followed the object." (setq stream (input-stream-arg stream)) (values (if recursive-p (%read-form stream 0 nil) (let ((%read-objects% nil) (%keep-whitespace% t)) (%read-form stream (if eof-error-p 0) eof-value))))) (defun read-delimited-list (char &optional stream recursive-p) "Read Lisp values from INPUT-STREAM until the next character after a value's representation is CHAR, and return the objects as a list." (setq char (require-type char 'character)) (setq stream (input-stream-arg stream)) (values (let ((%keep-whitespace% nil)) (if recursive-p (%read-form stream char nil) (let ((%read-objects% nil)) (%read-form stream char nil)))))) (defun read-conditional (stream subchar int) (declare (ignore int)) (cond ((eq subchar (read-feature stream)) (multiple-value-bind (form note) (read-internal stream t nil t) (values form (and note (list note))))) (t (let* ((*read-suppress* t)) (read stream t nil t) (values))))) (defun read-feature (stream) (let* ((f (let* ((*package* *keyword-package*)) (read stream t nil t)))) (labels ((eval-feature (form) (cond ((atom form) (member form *features*)) ((eq (car form) :not) (not (eval-feature (cadr form)))) ((eq (car form) :and) (dolist (subform (cdr form) t) (unless (eval-feature subform) (return)))) ((eq (car form) :or) (dolist (subform (cdr form) nil) (when (eval-feature subform) (return t)))) (t (%err-disp $XRDFEATURE form))))) (if (eval-feature f) #\+ #\-)))) (set-dispatch-macro-character #\# #\+ #'read-conditional) (set-dispatch-macro-character #\# #\- #'read-conditional) (defun %read-form (stream arg eof-val) "Read a lisp form from STREAM arg=0 : read form, error if eof arg=nil : read form, eof-val if eof. arg=char : read delimited list" (declare (resident)) (check-type *readtable* readtable) (check-type *package* package) (if (and arg (not (eq arg 0))) (read-list stream nil arg) (loop (let* ((ch (%next-non-whitespace-char-and-attr stream))) (if (null ch) (if arg (error 'end-of-file :stream stream) (return eof-val)) (multiple-value-bind (form form-p source-note) (%parse-expression stream ch nil) (when form-p (return (values (if *read-suppress* nil form) source-note))))))))) ;;;Until load backquote... (set-macro-character #\` #'(lambda (stream char) (declare (ignore stream)) (%err-disp $xbadmac char))) (set-macro-character #\, (get-macro-character #\`)) (set-dispatch-macro-character #\# #\P (qlfun |#P-reader| (stream char flags &aux (invalid-string "Invalid flags (~S) for pathname ~S")) (declare (ignore char)) (when (null flags) (setq flags 0)) (unless (memq flags '(0 1 2 3 4)) (unless *read-suppress* (report-bad-arg flags '(integer 0 4)))) (multiple-value-bind (path note) (read-internal stream t nil t) (unless *read-suppress* (unless (stringp path) (report-bad-arg path 'string)) (setq path (pathname path)) (when (%ilogbitp 0 flags) (when (%pathname-type path) (error invalid-string flags path)) (setf (%pathname-type path) :unspecific)) (when (%ilogbitp 1 flags) (when (%pathname-name path) (error invalid-string flags path)) (setf (%pathname-name path) "")) (values path (and note (list note))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defstruct (source-note (:conc-name "SOURCE-NOTE.") (:constructor %make-source-note)) ;; For an inner source form, the source-note of the outer source form. ;; For outer source note, octets source filename ;; start and end file positions (NOT characters positions) file-range) (defun make-source-note (&key filename start-pos end-pos source) (%make-source-note :filename filename :file-range (encode-file-range start-pos end-pos) :source source)) (defmethod print-object ((sn source-note) stream) (print-unreadable-object (sn stream :type t :identity nil) (print-source-note sn stream))) (defun print-source-note (sn stream) (let* ((file (source-note-filename sn)) (text (ignore-errors (source-note-text sn)))) (when file ;; Should fix this when record the name. (when (eq (pathname-version file) :newest) (setq file (namestring (make-pathname :version nil :defaults file))))) (when text (setq text (string-sans-most-whitespace text 121)) (when (> (length text) 120) (setq text (concatenate 'string (subseq text 0 120) "...")))) (if file (format stream "~s:~s-~s ~s" file (source-note-start-pos sn) (source-note-end-pos sn) text) (format stream "Interactive ~s" text)))) (defun source-note-filename (source) (if (source-note-p source) (source-note.filename source) ;; else null or a pathname, as in record-source-file source)) (defun (setf source-note-filename) (filename source-note) (setf (source-note.filename (require-type source-note 'source-note)) filename)) ;; Since source notes are optional, it simplifies a lot of code ;; to have these accessors allow NIL. (defun source-note-source (source-note) (when source-note (source-note.source (require-type source-note 'source-note)))) (defun source-note-file-range (source-note) (when source-note (source-note.file-range (require-type source-note 'source-note)))) (defun source-note-start-pos (source-note) (let ((range (source-note-file-range source-note))) (when range (if (consp range) (car range) (ash range -14))))) (defun source-note-end-pos (source-note) (let ((range (source-note-file-range source-note))) (when range (if (consp range) (cdr range) (+ (ash range -14) (logand range #x3FFF)))))) (defun encode-file-range (start-pos end-pos) (let ((len (- end-pos start-pos))) (if (< len (ash 1 14)) (+ (ash start-pos 14) len) (cons start-pos end-pos)))) (defun decode-file-range (range) (when range (if (consp range) (values (car range) (cdr range)) (let ((start-pos (ash range -14))) (values start-pos (+ start-pos (logand range #x3FFF))))))) (defun source-note-text (source-note &optional start end) (when source-note (let* ((source (source-note-source source-note)) (start-pos (source-note-start-pos source-note)) (end-pos (source-note-end-pos source-note)) (start (or start start-pos)) (end (or end end-pos))) (etypecase source (source-note (assert (<= (source-note-start-pos source) start end (source-note-end-pos source))) (source-note-text source start end)) ((simple-array (unsigned-byte 8) (*)) (decf start start-pos) (decf end start-pos) (assert (and (<= 0 start end (length source)))) (decode-string-from-octets source :start start :end end :external-format :utf-8)) (null source))))) (defun source-note-toplevel-note (source-note) (when source-note (loop for source = (source-note-source source-note) while (source-note-p source) do (setq source-note source)) source-note)) (defvar *recording-source-streams* ()) (defun record-source-note (&key form stream start-pos end-pos subform-notes) (let ((recording (assq stream *recording-source-streams*))) (when (and recording (not *read-suppress*)) (destructuring-bind (map file-name stream-offset) (cdr recording) (let* ((prev (gethash form map)) (note (make-source-note :filename file-name :start-pos (+ stream-offset start-pos) :end-pos (+ stream-offset end-pos)))) (setf (gethash form map) (cond ((null prev) note) ((consp prev) (cons note prev)) (t (list note prev)))) (loop for subnote in subform-notes do (when (source-note-source subnote) (error "Subnote ~s already owned?" subnote)) do (setf (source-note.source subnote) note)) note))))) (defun read-recording-source (stream &key eofval file-name start-offset map save-source-text) "Read a top-level form, perhaps recording source locations. If MAP is NIL, just reads a form as if by READ. If MAP is non-NIL, returns a second value of a source-note object describing the form. In addition, if MAP is a hash table, it gets filled with source-note's for all non-atomic nested subforms." (when (null start-offset) (setq start-offset 0)) (typecase map (null (values (read-internal stream nil eofval nil) nil)) (hash-table (let* ((stream (recording-input-stream stream)) (recording (list stream map file-name start-offset)) (*recording-source-streams* (cons recording *recording-source-streams*))) (declare (dynamic-extent recording *recording-source-streams*)) (multiple-value-bind (form source-note) (read-internal stream nil eofval nil) (when (and source-note (not (eq form eofval))) (assert (null (source-note-source source-note))) (loop for form being the hash-key using (hash-value note) of map do (cond ((eq note source-note) nil) ;; Remove entries with multiple source notes, which can happen ;; for atoms. If we can't tell which instance we mean, then we ;; don't have useful source info. ((listp note) (remhash form map)) ((loop for p = note then (source-note-source p) while (source-note-p p) thereis (eq p source-note)) ;; Flatten the backpointers so each subnote points directly ;; to the toplevel note. (setf (source-note.source note) source-note)))) (when save-source-text (setf (source-note.source source-note) (fetch-octets-from-stream stream (- (source-note-start-pos source-note) start-offset) (- (source-note-end-pos source-note) start-offset))))) (values form source-note)))) (T ;; not clear if this is ever useful (let* ((start-pos (stream-position stream)) (form (read-internal stream nil eofval nil)) (end-pos (and start-pos (neq form eofval) (stream-position stream))) (source-note (and end-pos (make-source-note :filename file-name :start-pos (+ start-offset start-pos) :end-pos (+ start-offset end-pos))))) (when (and source-note save-source-text) (setf (source-note.source source-note) (fetch-octets-from-stream stream start-pos end-pos))) (values form source-note))))) (defmethod fetch-octets-from-stream ((stream input-stream) start-offset end-offset) ;; We basically want to read the bytes between two positions, but there is no ;; direct interface for that. So we let the stream decode and then we re-encode. ;; (Just as well, since otherwise we'd have to remember the file's encoding). (declare (fixnum start-offset)) (when (< start-offset end-offset) (let* ((cur-pos (stream-position stream)) (noctets (- end-offset start-offset)) (vec (make-array noctets :element-type '(unsigned-byte 8))) (index 0) (crlfp (eq :crlf (cdr (assoc (external-format-line-termination (stream-external-format stream)) *canonical-line-termination-conventions*))))) (declare (type fixnum end-offset noctets index) (type (simple-array (unsigned-byte 8) (*)) vec)) (macrolet ((out (code) `(progn (setf (aref vec index) ,code) (when (eql (incf index) noctets) (return))))) (stream-position stream start-offset) (loop (let ((code (char-code (read-char stream)))) (declare (fixnum code)) (cond ((< code #x80) (when (and crlfp (= code (char-code #\NewLine))) (out (char-code #\Return))) (out code)) ((< code #x800) (out (logior #xc0 (ldb (byte 5 6) code))) (out (logior #x80 (ldb (byte 6 0) code)))) ((< code #x10000) (out (logior #xe0 (ldb (byte 4 12) code))) (out (logior #x80 (ldb (byte 6 6) code))) (out (logior #x80 (ldb (byte 6 0) code)))) (t (out (logior #xf0 (ldb (byte 3 18) code))) (out (logior #xe0 (ldb (byte 6 12) code))) (out (logior #x80 (ldb (byte 6 6) code))) (out (logior #x80 (ldb (byte 6 0) code)))))))) (stream-position stream cur-pos) vec))) (defun ensure-source-note-text (source-note &key (if-does-not-exist nil)) "Fetch source text from file if don't have it" (setq if-does-not-exist (require-type if-does-not-exist '(member :error nil))) (if source-note (let ((source (source-note-source source-note)) (filename (source-note-filename source-note))) (etypecase source (null (if filename (with-open-file (stream filename :if-does-not-exist if-does-not-exist) (when stream (let ((start (source-note-start-pos source-note)) (end (source-note-end-pos source-note)) (len (file-length stream))) (if (<= end len) (setf (source-note.source source-note) (fetch-octets-from-stream stream start end)) (when if-does-not-exist (error 'simple-file-error :pathname filename :error-type "File ~s changed since source info recorded")))))) (when if-does-not-exist (error "Missing source text in internative source note")))) (source-note (ensure-source-note-text source)) ((simple-array (unsigned-byte 8) (*)) source))) (when if-does-not-exist (error "Missing source note")))) ;; This can be called explicitly by macros that do more complicated transforms (defun note-source-transformation (original new) (nx-note-source-transformation original new)) ;;; Wrapper stream for recording source of non-random-access streams. (defclass recording-character-input-stream (fundamental-stream character-input-stream) ((input-stream :initarg :input-stream) (string :initform (make-array 1024 :element-type 'character :fill-pointer 0 :adjustable t)))) (defmethod stream-element-type ((s recording-character-input-stream)) (with-slots (input-stream) s (stream-element-type input-stream))) (defmethod stream-read-char ((s recording-character-input-stream)) (with-slots (input-stream string) s (let ((char (stream-read-char input-stream))) (when (and char (neq char :eof)) (vector-push-extend char string)) char))) (defmethod stream-read-char-no-hang ((s recording-character-input-stream)) (with-slots (input-stream string) s (let ((char (stream-read-char-no-hang input-stream))) (when (and char (neq char :eof)) (vector-push-extend char string)) char))) (defmethod stream-peek-char ((s recording-character-input-stream)) (with-slots (input-stream) s (stream-peek-char input-stream))) (defmethod stream-listen ((s recording-character-input-stream)) (with-slots (input-stream) s (stream-listen input-stream))) (defmethod stream-read-line ((s recording-character-input-stream)) (generic-read-line s)) (defmethod stream-read-list ((s recording-character-input-stream) list count) (generic-character-read-list s list count)) (defmethod stream-read-vector ((s recording-character-input-stream) vector start end) (generic-character-read-vector s vector start end)) (defmethod stream-unread-char ((s recording-character-input-stream) char) (with-slots (input-stream string) s (vector-pop string) ;; Error if no characters read since last reset. (stream-unread-char input-stream char))) (defmethod stream-eofp ((s recording-character-input-stream)) (with-slots (input-stream) s (stream-eofp input-stream))) (defmethod stream-clear-input ((s recording-character-input-stream)) (with-slots (input-stream) s (stream-clear-input input-stream))) (defmethod stream-position ((s recording-character-input-stream) &optional newpos) (with-slots (string) s (unless newpos (fill-pointer string)))) (defun recording-input-stream (stream) (let ((pos (stream-position stream))) (if (and pos (stream-position stream pos)) stream (make-instance 'recording-character-input-stream :input-stream stream)))) (defmethod fetch-octets-from-stream ((s recording-character-input-stream) start-offset end-offset) (declare (fixnum start-offset end-offset)) (with-slots (string) s (when (< start-offset end-offset) (let* ((sstring (array-data-and-offset string)) (noctets (loop for i fixnum from start-offset below end-offset as code fixnum = (%char-code (%schar sstring i)) sum (cond ((< code #x80) 1) ((< code #x800) 2) ((< code #x10000) 3) (t 4)) of-type fixnum)) (vec (make-array noctets :element-type '(unsigned-byte 8))) (index 0)) (declare (type fixnum noctets index) (type simple-base-string sstring) (type (simple-array (unsigned-byte 8) (*)) vec)) (macrolet ((out (octet) `(progn (setf (aref vec index) ,octet) (incf index)))) (loop for i fixnum from start-offset below end-offset as code fixnum = (%char-code (%schar sstring i)) do (cond ((< code #x80) (out code)) ((< code #x800) (out (logior #xc0 (ldb (byte 5 6) code))) (out (logior #x80 (ldb (byte 6 0) code)))) ((< code #x10000) (out (logior #xe0 (ldb (byte 4 12) code))) (out (logior #x80 (ldb (byte 6 6) code))) (out (logior #x80 (ldb (byte 6 0) code)))) (t (out (logior #xf0 (ldb (byte 3 18) code))) (out (logior #xe0 (ldb (byte 6 12) code))) (out (logior #x80 (ldb (byte 6 6) code))) (out (logior #x80 (ldb (byte 6 0) code))))))) (setf (fill-pointer string) 0) ;; reset vec)))) ; end