module

UnicodeNormalize

ruby latest stable

define UnicodeNormalize module here so that we don’t have to look it up


automatically generated by template/unicode_norm_gen.tmpl

Constants

ACCENTS = accents

CLASS_TABLE = class_table.freeze

COMPOSITION_TABLE = {\n"A\\u0300"=>"\\u00C0", "A\\u0301"=>"\\u00C1", "A\\u0302"=>"\\u00C2", "A\\u0303"=>"\\u00C3", "A\\u0308"=>"\\u00C4", "A\\u030A"=>"\\u00C5", "C\\u0327"=>"\\u00C7", "E\\u0300"=>"\\u00C8",\n"E\\u0301"=>"\\u00C9", "E\\u0302"=>"\\u00CA", "E\\u0308"=>"\\u00CB", "I\\u0300"=>"\\u00CC", "I\\u0301"=>"\\u00CD", "I\\u0302"=>"\\u00CE", "I\\u0308"=>"\\u00CF", "N\\u0303"=>"\\u00D1",\n"O\\u0300"=>"\\u00D2", "O\\u0301"=>"\\u00D3", "O\\u0302"=>"\\u00D4", "O\\u0303"=>"\\u00D5", "O\\u0308"=>"\\u00D6", "U\\u0300"=>"\\u00D9", "U\\u0301"=>"\\u00DA", "U\\u0302"=>"\\u00DB",\n"U\\u0308"=>"\\u00DC", "Y\\u0301"=>"\\u00DD", "a\\u0300"=>"\\u00E0", "a\\u0301"=>"\\u00E1", "a\\u0302"=>"\\u00E2", "a\\u0303"=>"\\u00E3", "a\\u0308"=>"\\u00E4", "a\\u030A"=>"\\u00E5",\n"c\\u0327"=>"\\u00E7", "e\\u0300"=>"\\u00E8", "e\\u0301"=>"\\u00E9", "e\\u0302"=>"\\u00EA", "e\\u0308"=>"\\u00EB", "i\\u0300"=>"\\u00EC", "i\\u0301"=>"\\u00ED", "i\\u0302"=>"\\u00EE",\n"i\\u0308"=>"\\u00EF", "n\\u0303"=>"\\u00F1", "o\\u0300"=>"\\u00F2", "o\\u0301"=>"\\u00F3", "o\\u0302"=>"\\u00F4", "o\\u0303"=>"\\u00F5", "o\\u0308"=>"\\u00F6", "u\\u0300"=>"\\u00F9",\n"u\\u0301"=>"\\u00FA", "u\\u0302"=>"\\u00FB", "u\\u0308"=>"\\u00FC", "y\\u0301"=>"\\u00FD", "y\\u0308"=>"\\u00FF", "A\\u0304"=>"\\u0100", "a\\u0304"=>"\\u0101", "A\\u0306"=>"\\u0102",\n"a\\u0306"=>"\\u0103", "A\\u0328"=>"\\u0104", "a\\u0328"=>"\\u0105", "C\\u0301"=>"\\u0106", "c\\u0301"=>"\\u0107", "C\\u0302"=>"\\u0108", "c\\u0302"=>"\\u0109", "C\\u0307"=>"\\u010A",\n"c\\u0307"=>"\\u010B", "C\\u030C"=>"\\u010C", "c\\u030C"=>"\\u010D", "D\\u030C"=>"\\u010E", "d\\u030C"=>"\\u010F", "E\\u0304"=>"\\u0112", "e\\u0304"=>"\\u0113", "E\\u0306"=>"\\u0114",\n"e\\u0306"=>"\\u0115", "E\\u0307"=>"\\u0116", "e\\u0307"=>"\\u0117", "E\\u0328"=>"\\u0118", "e\\u0328"=>"\\u0119", "E\\u030C"=>"\\u011A", "e\\u030C"=>"\\u011B", "G\\u0302"=>"\\u011C",\n"g\\u0302"=>"\\u011D", "G\\u0306"=>"\\u011E", "g\\u0306"=>"\\u011F", "G\\u0307"=>"\\u0120", "g\\u0307"=>"\\u0121", "G\\u0327"=>"\\u0122", "g\\u0327"=>"\\u0123", "H\\u0302"=>"\\u0124",\n"h\\u0302"=>"\\u0125", "I\\u0303"=>"\\u0128", "i\\u0303"=>"\\u0129", "I\\u0304"=>"\\u012A", "i\\u0304"=>"\\u012B", "I\\u0306"=>"\\u012C", "i\\u0306"=>"\\u012D", "I\\u0328"=>"\\u012E",\n"i\\u0328"=>"\\u012F", "I\\u0307"=>"\\u0130", "J\\u0302"=>"\\u0134", "j\\u0302"=>"\\u0135", "K\\u0327"=>"\\u0136", "k\\u0327"=>"\\u0137", "L\\u0301"=>"\\u0139", "l\\u0301"=>"\\u013A",\n"L\\u0327"=>"\\u013B", "l\\u0327"=>"\\u013C", "L\\u030C"=>"\\u013D", "l\\u030C"=>"\\u013E", "N\\u0301"=>"\\u0143", "n\\u0301"=>"\\u0144", "N\\u0327"=>"\\u0145", "n\\u0327"=>"\\u0146",\n"N\\u030C"=>"\\u0147", "n\\u030C"=>"\\u0148", "O\\u0304"=>"\\u014C", "o\\u0304"=>"\\u014D", "O\\u0306"=>"\\u014E", "o\\u0306"=>"\\u014F", "O\\u030B"=>"\\u0150", "o\\u030B"=>"\\u0151",\n"R\\u0301"=>"\\u0154", "r\\u0301"=>"\\u0155", "R\\u0327"=>"\\u0156", "r\\u0327"=>"\\u0157", "R\\u030C"=>"\\u0158", "r\\u030C"=>"\\u0159", "S\\u0301"=>"\\u015A", "s\\u0301"=>"\\u015B",\n"S\\u0302"=>"\\u015C", "s\\u0302"=>"\\u015D", "S\\u0327"=>"\\u015E", "s\\u0327"=>"\\u015F", "S\\u030C"=>"\\u0160", "s\\u030C"=>"\\u0161", "T\\u0327"=>"\\u0162", "t\\u0327"=>"\\u0163",\n"T\\u030C"=>"\\u0164", "t\\u030C"=>"\\u0165", "U\\u0303"=>"\\u0168", "u\\u0303"=>"\\u0169", "U\\u0304"=>"\\u016A", "u\\u0304"=>"\\u016B", "U\\u0306"=>"\\u016C", "u\\u0306"=>"\\u016D",\n"U\\u030A"=>"\\u016E", "u\\u030A"=>"\\u016F", "U\\u030B"=>"\\u0170", "u\\u030B"=>"\\u0171", "U\\u0328"=>"\\u0172", "u\\u0328"=>"\\u0173", "W\\u0302"=>"\\u0174", "w\\u0302"=>"\\u0175",\n"Y\\u0302"=>"\\u0176", "y\\u0302"=>"\\u0177", "Y\\u0308"=>"\\u0178", "Z\\u0301"=>"\\u0179", "z\\u0301"=>"\\u017A", "Z\\u0307"=>"\\u017B", "z\\u0307"=>"\\u017C", "Z\\u030C"=>"\\u017D",\n"z\\u030C"=>"\\u017E", "O\\u031B"=>"\\u01A0", "o\\u031B"=>"\\u01A1", "U\\u031B"=>"\\u01AF", "u\\u031B"=>"\\u01B0", "A\\u030C"=>"\\u01CD", "a\\u030C"=>"\\u01CE", "I\\u030C"=>"\\u01CF",\n"i\\u030C"=>"\\u01D0", "O\\u030C"=>"\\u01D1", "o\\u030C"=>"\\u01D2", "U\\u030C"=>"\\u01D3", "u\\u030C"=>"\\u01D4", "\\u00DC\\u0304"=>"\\u01D5", "\\u00FC\\u0304"=>"\\u01D6", "\\u00DC\\u0301"=>"\\u01D7",\n"\\u00FC\\u0301"=>"\\u01D8", "\\u00DC\\u030C"=>"\\u01D9", "\\u00FC\\u030C"=>"\\u01DA", "\\u00DC\\u0300"=>"\\u01DB", "\\u00FC\\u0300"=>"\\u01DC", "\\u00C4\\u0304"=>"\\u01DE", "\\u00E4\\u0304"=>"\\u01DF", "\\u0226\\u0304"=>"\\u01E0",\n"\\u0227\\u0304"=>"\\u01E1", "\\u00C6\\u0304"=>"\\u01E2", "\\u00E6\\u0304"=>"\\u01E3", "G\\u030C"=>"\\u01E6", "g\\u030C"=>"\\u01E7", "K\\u030C"=>"\\u01E8", "k\\u030C"=>"\\u01E9", "O\\u0328"=>"\\u01EA",\n"o\\u0328"=>"\\u01EB", "\\u01EA\\u0304"=>"\\u01EC", "\\u01EB\\u0304"=>"\\u01ED", "\\u01B7\\u030C"=>"\\u01EE", "\\u0292\\u030C"=>"\\u01EF", "j\\u030C"=>"\\u01F0", "G\\u0301"=>"\\u01F4", "g\\u0301"=>"\\u01F5",\n"N\\u0300"=>"\\u01F8", "n\\u0300"=>"\\u01F9", "\\u00C5\\u0301"=>"\\u01FA", "\\u00E5\\u0301"=>"\\u01FB", "\\u00C6\\u0301"=>"\\u01FC", "\\u00E6\\u0301"=>"\\u01FD", "\\u00D8\\u0301"=>"\\u01FE", "\\u00F8\\u0301"=>"\\u01FF",\n"A\\u030F"=>"\\u0200", "a\\u030F"=>"\\u0201", "A\\u0311"=>"\\u0202", "a\\u0311"=>"\\u0203", "E\\u030F"=>"\\u0204", "e\\u030F"=>"\\u0205", "E\\u0311"=>"\\u0206", "e\\u0311"=>"\\u0207",\n"I\\u030F"=>"\\u0208", "i\\u030F"=>"\\u0209", "I\\u0311"=>"\\u020A", "i\\u0311"=>"\\u020B", "O\\u030F"=>"\\u020C", "o\\u030F"=>"\\u020D", "O\\u0311"=>"\\u020E", "o\\u0311"=>"\\u020F",\n"R\\u030F"=>"\\u0210", "r\\u030F"=>"\\u0211", "R\\u0311"=>"\\u0212", "r\\u0311"=>"\\u0213", "U\\u030F"=>"\\u0214", "u\\u030F"=>"\\u0215", "U\\u0311"=>"\\u0216", "u\\u0311"=>"\\u0217",\n"S\\u03

DECOMPOSITION_TABLE = {\n"\\u00C0"=>"A\\u0300", "\\u00C1"=>"A\\u0301", "\\u00C2"=>"A\\u0302", "\\u00C3"=>"A\\u0303", "\\u00C4"=>"A\\u0308", "\\u00C5"=>"A\\u030A", "\\u00C7"=>"C\\u0327", "\\u00C8"=>"E\\u0300",\n"\\u00C9"=>"E\\u0301", "\\u00CA"=>"E\\u0302", "\\u00CB"=>"E\\u0308", "\\u00CC"=>"I\\u0300", "\\u00CD"=>"I\\u0301", "\\u00CE"=>"I\\u0302", "\\u00CF"=>"I\\u0308", "\\u00D1"=>"N\\u0303",\n"\\u00D2"=>"O\\u0300", "\\u00D3"=>"O\\u0301", "\\u00D4"=>"O\\u0302", "\\u00D5"=>"O\\u0303", "\\u00D6"=>"O\\u0308", "\\u00D9"=>"U\\u0300", "\\u00DA"=>"U\\u0301", "\\u00DB"=>"U\\u0302",\n"\\u00DC"=>"U\\u0308", "\\u00DD"=>"Y\\u0301", "\\u00E0"=>"a\\u0300", "\\u00E1"=>"a\\u0301", "\\u00E2"=>"a\\u0302", "\\u00E3"=>"a\\u0303", "\\u00E4"=>"a\\u0308", "\\u00E5"=>"a\\u030A",\n"\\u00E7"=>"c\\u0327", "\\u00E8"=>"e\\u0300", "\\u00E9"=>"e\\u0301", "\\u00EA"=>"e\\u0302", "\\u00EB"=>"e\\u0308", "\\u00EC"=>"i\\u0300", "\\u00ED"=>"i\\u0301", "\\u00EE"=>"i\\u0302",\n"\\u00EF"=>"i\\u0308", "\\u00F1"=>"n\\u0303", "\\u00F2"=>"o\\u0300", "\\u00F3"=>"o\\u0301", "\\u00F4"=>"o\\u0302", "\\u00F5"=>"o\\u0303", "\\u00F6"=>"o\\u0308", "\\u00F9"=>"u\\u0300",\n"\\u00FA"=>"u\\u0301", "\\u00FB"=>"u\\u0302", "\\u00FC"=>"u\\u0308", "\\u00FD"=>"y\\u0301", "\\u00FF"=>"y\\u0308", "\\u0100"=>"A\\u0304", "\\u0101"=>"a\\u0304", "\\u0102"=>"A\\u0306",\n"\\u0103"=>"a\\u0306", "\\u0104"=>"A\\u0328", "\\u0105"=>"a\\u0328", "\\u0106"=>"C\\u0301", "\\u0107"=>"c\\u0301", "\\u0108"=>"C\\u0302", "\\u0109"=>"c\\u0302", "\\u010A"=>"C\\u0307",\n"\\u010B"=>"c\\u0307", "\\u010C"=>"C\\u030C", "\\u010D"=>"c\\u030C", "\\u010E"=>"D\\u030C", "\\u010F"=>"d\\u030C", "\\u0112"=>"E\\u0304", "\\u0113"=>"e\\u0304", "\\u0114"=>"E\\u0306",\n"\\u0115"=>"e\\u0306", "\\u0116"=>"E\\u0307", "\\u0117"=>"e\\u0307", "\\u0118"=>"E\\u0328", "\\u0119"=>"e\\u0328", "\\u011A"=>"E\\u030C", "\\u011B"=>"e\\u030C", "\\u011C"=>"G\\u0302",\n"\\u011D"=>"g\\u0302", "\\u011E"=>"G\\u0306", "\\u011F"=>"g\\u0306", "\\u0120"=>"G\\u0307", "\\u0121"=>"g\\u0307", "\\u0122"=>"G\\u0327", "\\u0123"=>"g\\u0327", "\\u0124"=>"H\\u0302",\n"\\u0125"=>"h\\u0302", "\\u0128"=>"I\\u0303", "\\u0129"=>"i\\u0303", "\\u012A"=>"I\\u0304", "\\u012B"=>"i\\u0304", "\\u012C"=>"I\\u0306", "\\u012D"=>"i\\u0306", "\\u012E"=>"I\\u0328",\n"\\u012F"=>"i\\u0328", "\\u0130"=>"I\\u0307", "\\u0134"=>"J\\u0302", "\\u0135"=>"j\\u0302", "\\u0136"=>"K\\u0327", "\\u0137"=>"k\\u0327", "\\u0139"=>"L\\u0301", "\\u013A"=>"l\\u0301",\n"\\u013B"=>"L\\u0327", "\\u013C"=>"l\\u0327", "\\u013D"=>"L\\u030C", "\\u013E"=>"l\\u030C", "\\u0143"=>"N\\u0301", "\\u0144"=>"n\\u0301", "\\u0145"=>"N\\u0327", "\\u0146"=>"n\\u0327",\n"\\u0147"=>"N\\u030C", "\\u0148"=>"n\\u030C", "\\u014C"=>"O\\u0304", "\\u014D"=>"o\\u0304", "\\u014E"=>"O\\u0306", "\\u014F"=>"o\\u0306", "\\u0150"=>"O\\u030B", "\\u0151"=>"o\\u030B",\n"\\u0154"=>"R\\u0301", "\\u0155"=>"r\\u0301", "\\u0156"=>"R\\u0327", "\\u0157"=>"r\\u0327", "\\u0158"=>"R\\u030C", "\\u0159"=>"r\\u030C", "\\u015A"=>"S\\u0301", "\\u015B"=>"s\\u0301",\n"\\u015C"=>"S\\u0302", "\\u015D"=>"s\\u0302", "\\u015E"=>"S\\u0327", "\\u015F"=>"s\\u0327", "\\u0160"=>"S\\u030C", "\\u0161"=>"s\\u030C", "\\u0162"=>"T\\u0327", "\\u0163"=>"t\\u0327",\n"\\u0164"=>"T\\u030C", "\\u0165"=>"t\\u030C", "\\u0168"=>"U\\u0303", "\\u0169"=>"u\\u0303", "\\u016A"=>"U\\u0304", "\\u016B"=>"u\\u0304", "\\u016C"=>"U\\u0306", "\\u016D"=>"u\\u0306",\n"\\u016E"=>"U\\u030A", "\\u016F"=>"u\\u030A", "\\u0170"=>"U\\u030B", "\\u0171"=>"u\\u030B", "\\u0172"=>"U\\u0328", "\\u0173"=>"u\\u0328", "\\u0174"=>"W\\u0302", "\\u0175"=>"w\\u0302",\n"\\u0176"=>"Y\\u0302", "\\u0177"=>"y\\u0302", "\\u0178"=>"Y\\u0308", "\\u0179"=>"Z\\u0301", "\\u017A"=>"z\\u0301", "\\u017B"=>"Z\\u0307", "\\u017C"=>"z\\u0307", "\\u017D"=>"Z\\u030C",\n"\\u017E"=>"z\\u030C", "\\u01A0"=>"O\\u031B", "\\u01A1"=>"o\\u031B", "\\u01AF"=>"U\\u031B", "\\u01B0"=>"u\\u031B", "\\u01CD"=>"A\\u030C", "\\u01CE"=>"a\\u030C", "\\u01CF"=>"I\\u030C",\n"\\u01D0"=>"i\\u030C", "\\u01D1"=>"O\\u030C", "\\u01D2"=>"o\\u030C", "\\u01D3"=>"U\\u030C", "\\u01D4"=>"u\\u030C", "\\u01D5"=>"U\\u0308\\u0304", "\\u01D6"=>"u\\u0308\\u0304", "\\u01D7"=>"U\\u0308\\u0301",\n"\\u01D8"=>"u\\u0308\\u0301", "\\u01D9"=>"U\\u0308\\u030C", "\\u01DA"=>"u\\u0308\\u030C", "\\u01DB"=>"U\\u0308\\u0300", "\\u01DC"=>"u\\u0308\\u0300", "\\u01DE"=>"A\\u0308\\u0304", "\\u01DF"=>"a\\u0308\\u0304", "\\u01E0"=>"A\\u0307\\u0304",\n"\\u01E1"=>"a\\u0307\\u0304", "\\u01E2"=>"\\u00C6\\u0304", "\\u01E3"=>"\\u00E6\\u0304", "\\u01E6"=>"G\\u030C", "\\u01E7"=>"g\\u030C", "\\u01E8"=>"K\\u030C", "\\u01E9"=>"k\\u030C", "\\u01EA"=>"O\\u0328",\n"\\u01EB"=>"o\\u0328", "\\u01EC"=>"O\\u0328\\u0304", "\\u01ED"=>"o\\u0328\\u0304", "\\u01EE"=>"\\u01B7\\u030C", "\\u01EF"=>"\\u0292\\u030C", "\\u01F0"=>"j\\u030C", "\\u01F4"=>"G\\u0301", "\\u01F5"=>"g\\u0301",\n"\\u01F8"=>"N\\u0300", "\\u01F9"=>"n\\u0300", "\\u01FA"=>"A\\u030A\\u0301", "\\u01FB"=>"a\\u030A\\u0301", "\\u01FC"=>"\\u00C6\\u0301", "\\u01FD"=>"\\u00E6\\u0301", "\\u01FE"=>"\\u00D8\\u0301", "\\u01FF"=>"\\u00F8\\u0301",\n"\\u0200"=>"A\\u030F", "\\u0201"=>"a\\u030F", "\\u0202"=>"A\\u0311", "\\u0203"=>"a\\u0311", "\\u0204"=>"E\\u030F", "\\u0205"=>"e\\u030F", "\\u0206"=>"E\\u0311", "\\u0207"=>"e\\u0311",\n"\\u0208"=>"I\\u030F", "\\u0209"=>"i\\u030F", "\\u020A"=>"I\\u0311", "\\u020B"=>"i\\u0311", "\\u020C"=>"O\\u030F", "\\u020D"=>"o\\u030F", "\\u020E"=>"O\\u0311", "\\u020F"=>"o\\u0311",\n"\\u0210"=>"R\\u030F", "\\u0211"=>"r\\u030F", "\\u0212"=>"R\\u0311", "\\u0213"=>"r\\u0311", "\\u0214"=>"U\\u030F", "\\u0215"=>"u\\u030F", "\\u0216"=>"U\\u0311", "\\u0217"=>"

KOMPATIBLE_TABLE = {\n"\\u00A0"=>" ", "\\u00A8"=>" \\u0308", "\\u00AA"=>"a", "\\u00AF"=>" \\u0304", "\\u00B2"=>"2", "\\u00B3"=>"3", "\\u00B4"=>" \\u0301", "\\u00B5"=>"\\u03BC",\n"\\u00B8"=>" \\u0327", "\\u00B9"=>"1", "\\u00BA"=>"o", "\\u00BC"=>"1\\u20444", "\\u00BD"=>"1\\u20442", "\\u00BE"=>"3\\u20444", "\\u0132"=>"IJ", "\\u0133"=>"ij",\n"\\u013F"=>"L\\u00B7", "\\u0140"=>"l\\u00B7", "\\u0149"=>"\\u02BCn", "\\u017F"=>"s", "\\u01C4"=>"D\\u017D", "\\u01C5"=>"D\\u017E", "\\u01C6"=>"d\\u017E", "\\u01C7"=>"LJ",\n"\\u01C8"=>"Lj", "\\u01C9"=>"lj", "\\u01CA"=>"NJ", "\\u01CB"=>"Nj", "\\u01CC"=>"nj", "\\u01F1"=>"DZ", "\\u01F2"=>"Dz", "\\u01F3"=>"dz",\n"\\u02B0"=>"h", "\\u02B1"=>"\\u0266", "\\u02B2"=>"j", "\\u02B3"=>"r", "\\u02B4"=>"\\u0279", "\\u02B5"=>"\\u027B", "\\u02B6"=>"\\u0281", "\\u02B7"=>"w",\n"\\u02B8"=>"y", "\\u02D8"=>" \\u0306", "\\u02D9"=>" \\u0307", "\\u02DA"=>" \\u030A", "\\u02DB"=>" \\u0328", "\\u02DC"=>" \\u0303", "\\u02DD"=>" \\u030B", "\\u02E0"=>"\\u0263",\n"\\u02E1"=>"l", "\\u02E2"=>"s", "\\u02E3"=>"x", "\\u02E4"=>"\\u0295", "\\u037A"=>" \\u0345", "\\u0384"=>" \\u0301", "\\u03D0"=>"\\u03B2", "\\u03D1"=>"\\u03B8",\n"\\u03D2"=>"\\u03A5", "\\u03D5"=>"\\u03C6", "\\u03D6"=>"\\u03C0", "\\u03F0"=>"\\u03BA", "\\u03F1"=>"\\u03C1", "\\u03F2"=>"\\u03C2", "\\u03F4"=>"\\u0398", "\\u03F5"=>"\\u03B5",\n"\\u03F9"=>"\\u03A3", "\\u0587"=>"\\u0565\\u0582", "\\u0675"=>"\\u0627\\u0674", "\\u0676"=>"\\u0648\\u0674", "\\u0677"=>"\\u06C7\\u0674", "\\u0678"=>"\\u064A\\u0674", "\\u0E33"=>"\\u0E4D\\u0E32", "\\u0EB3"=>"\\u0ECD\\u0EB2",\n"\\u0EDC"=>"\\u0EAB\\u0E99", "\\u0EDD"=>"\\u0EAB\\u0EA1", "\\u0F0C"=>"\\u0F0B", "\\u0F77"=>"\\u0FB2\\u0F81", "\\u0F79"=>"\\u0FB3\\u0F81", "\\u10FC"=>"\\u10DC", "\\u1D2C"=>"A", "\\u1D2D"=>"\\u00C6",\n"\\u1D2E"=>"B", "\\u1D30"=>"D", "\\u1D31"=>"E", "\\u1D32"=>"\\u018E", "\\u1D33"=>"G", "\\u1D34"=>"H", "\\u1D35"=>"I", "\\u1D36"=>"J",\n"\\u1D37"=>"K", "\\u1D38"=>"L", "\\u1D39"=>"M", "\\u1D3A"=>"N", "\\u1D3C"=>"O", "\\u1D3D"=>"\\u0222", "\\u1D3E"=>"P", "\\u1D3F"=>"R",\n"\\u1D40"=>"T", "\\u1D41"=>"U", "\\u1D42"=>"W", "\\u1D43"=>"a", "\\u1D44"=>"\\u0250", "\\u1D45"=>"\\u0251", "\\u1D46"=>"\\u1D02", "\\u1D47"=>"b",\n"\\u1D48"=>"d", "\\u1D49"=>"e", "\\u1D4A"=>"\\u0259", "\\u1D4B"=>"\\u025B", "\\u1D4C"=>"\\u025C", "\\u1D4D"=>"g", "\\u1D4F"=>"k", "\\u1D50"=>"m",\n"\\u1D51"=>"\\u014B", "\\u1D52"=>"o", "\\u1D53"=>"\\u0254", "\\u1D54"=>"\\u1D16", "\\u1D55"=>"\\u1D17", "\\u1D56"=>"p", "\\u1D57"=>"t", "\\u1D58"=>"u",\n"\\u1D59"=>"\\u1D1D", "\\u1D5A"=>"\\u026F", "\\u1D5B"=>"v", "\\u1D5C"=>"\\u1D25", "\\u1D5D"=>"\\u03B2", "\\u1D5E"=>"\\u03B3", "\\u1D5F"=>"\\u03B4", "\\u1D60"=>"\\u03C6",\n"\\u1D61"=>"\\u03C7", "\\u1D62"=>"i", "\\u1D63"=>"r", "\\u1D64"=>"u", "\\u1D65"=>"v", "\\u1D66"=>"\\u03B2", "\\u1D67"=>"\\u03B3", "\\u1D68"=>"\\u03C1",\n"\\u1D69"=>"\\u03C6", "\\u1D6A"=>"\\u03C7", "\\u1D78"=>"\\u043D", "\\u1D9B"=>"\\u0252", "\\u1D9C"=>"c", "\\u1D9D"=>"\\u0255", "\\u1D9E"=>"\\u00F0", "\\u1D9F"=>"\\u025C",\n"\\u1DA0"=>"f", "\\u1DA1"=>"\\u025F", "\\u1DA2"=>"\\u0261", "\\u1DA3"=>"\\u0265", "\\u1DA4"=>"\\u0268", "\\u1DA5"=>"\\u0269", "\\u1DA6"=>"\\u026A", "\\u1DA7"=>"\\u1D7B",\n"\\u1DA8"=>"\\u029D", "\\u1DA9"=>"\\u026D", "\\u1DAA"=>"\\u1D85", "\\u1DAB"=>"\\u029F", "\\u1DAC"=>"\\u0271", "\\u1DAD"=>"\\u0270", "\\u1DAE"=>"\\u0272", "\\u1DAF"=>"\\u0273",\n"\\u1DB0"=>"\\u0274", "\\u1DB1"=>"\\u0275", "\\u1DB2"=>"\\u0278", "\\u1DB3"=>"\\u0282", "\\u1DB4"=>"\\u0283", "\\u1DB5"=>"\\u01AB", "\\u1DB6"=>"\\u0289", "\\u1DB7"=>"\\u028A",\n"\\u1DB8"=>"\\u1D1C", "\\u1DB9"=>"\\u028B", "\\u1DBA"=>"\\u028C", "\\u1DBB"=>"z", "\\u1DBC"=>"\\u0290", "\\u1DBD"=>"\\u0291", "\\u1DBE"=>"\\u0292", "\\u1DBF"=>"\\u03B8",\n"\\u1E9A"=>"a\\u02BE", "\\u1FBD"=>" \\u0313", "\\u1FBF"=>" \\u0313", "\\u1FC0"=>" \\u0342", "\\u1FFE"=>" \\u0314", "\\u2002"=>" ", "\\u2003"=>" ", "\\u2004"=>" ",\n"\\u2005"=>" ", "\\u2006"=>" ", "\\u2007"=>" ", "\\u2008"=>" ", "\\u2009"=>" ", "\\u200A"=>" ", "\\u2011"=>"\\u2010", "\\u2017"=>" \\u0333",\n"\\u2024"=>".", "\\u2025"=>"..", "\\u2026"=>"...", "\\u202F"=>" ", "\\u2033"=>"\\u2032\\u2032", "\\u2034"=>"\\u2032\\u2032\\u2032", "\\u2036"=>"\\u2035\\u2035", "\\u2037"=>"\\u2035\\u2035\\u2035",\n"\\u203C"=>"!!", "\\u203E"=>" \\u0305", "\\u2047"=>"??", "\\u2048"=>"?!", "\\u2049"=>"!?", "\\u2057"=>"\\u2032\\u2032\\u2032\\u2032", "\\u205F"=>" ", "\\u2070"=>"0",\n"\\u2071"=>"i", "\\u2074"=>"4", "\\u2075"=>"5", "\\u2076"=>"6", "\\u2077"=>"7", "\\u2078"=>"8", "\\u2079"=>"9", "\\u207A"=>"+",\n"\\u207B"=>"\\u2212", "\\u207C"=>"=", "\\u207D"=>"(", "\\u207E"=>")", "\\u207F"=>"n", "\\u2080"=>"0", "\\u2081"=>"1", "\\u2082"=>"2",\n"\\u2083"=>"3", "\\u2084"=>"4", "\\u2085"=>"5", "\\u2086"=>"6", "\\u2087"=>"7", "\\u2088"=>"8", "\\u2089"=>"9", "\\u208A"=>"+",\n"\\u208B"=>"\\u2212", "\\u208C"=>"=", "\\u208D"=>"(", "\\u208E"=>")", "\\u2090"=>"a", "\\u2091"=>"e", "\\u2092"=>"o", "\\u2093"=>"x",\n"\\u2094"=>"\\u0259", "\\u2095"=>"h", "\\u2096"=>"k", "\\u2097"=>"l", "\\u2098"=>"m", "\\u2099"=>"n", "\\u209A"=>"p", "\\u209B"=>"s",\n"\\u209C"=>"t", "\\u20A8"=>"Rs", "\\u2100"=>"a/c", "\\u2101"=>"a/s", "\\u2102"=>"C", "\\u2103"=>"\\u00B0C", "\\u2105"=>"c/o", "\\u2106"=>"c/u",\n"\\u2107"=>"\\u0190", "\\u2109"=>"\\u00B0F", "\\u210A"=>"g", "\\u210B"=>"H", "\\u210C"=>"H", "\\u210D"=>"H", "\\u210E"=>"h", "\\u210F"=>"\\u0127",\n"\\u2110"=>"I", "\\u2111"=>"I", "\\u2112"=>"L", "\\u2113"=>"l", "\\u2115"=>"N", "\\u2116"=>"No", "\\u2119"=>"P", "\\u211A"=>"Q",\n"\\u211B"=>"R", "\\u211C"=>"R", "\\u211D"=>"R", "\\u2120"=>"SM", "\\u2121"=>"

LBASE = 0x1100

LCOUNT = 19

MAX_HASH_LENGTH = 18000

NCOUNT = VCOUNT * TCOUNT

NF_HASH_C = Hash.new do |hash, key|\nhash.shift if hash.length>MAX_HASH_LENGTH # prevent DoS attack\nhash[key] = nfc_one(key)\nend

NF_HASH_D = Hash.new do |hash, key|\nhash.shift if hash.length>MAX_HASH_LENGTH # prevent DoS attack\nhash[key] = nfd_one(key)\nend

REGEXP_C = Regexp.compile(REGEXP_C_STRING, Regexp::EXTENDED)

REGEXP_C_STRING = "#{'' # composition exclusions\n}" \\\n"[\\u0340\\u0341\\u0343\\u0344\\u0374\\u037E\\u0387\\u0958-\\u095F\\u09DC\\u09DD\\u09DF" \\\n"\\u0A33\\u0A36\\u0A59-\\u0A5B\\u0A5E\\u0B5C\\u0B5D\\u0F43\\u0F4D\\u0F52" \\\n"\\u0F57\\u0F5C\\u0F69\\u0F73\\u0F75\\u0F76\\u0F78\\u0F81\\u0F93" \\\n"\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u1F71\\u1F73\\u1F75" \\\n"\\u1F77\\u1F79\\u1F7B\\u1F7D\\u1FBB\\u1FBE\\u1FC9\\u1FCB" \\\n"\\u1FD3\\u1FDB\\u1FE3\\u1FEB\\u1FEE\\u1FEF\\u1FF9\\u1FFB\\u1FFD" \\\n"\\u2000\\u2001\\u2126\\u212A\\u212B\\u2329\\u232A\\u2ADC\\uF900-\\uFA0D\\uFA10\\uFA12" \\\n"\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA6D\\uFA70-\\uFAD9\\uFB1D\\uFB1F" \\\n"\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFB4E\\u{1D15E}-\\u{1D164}\\u{1D1BB}-\\u{1D1C0}" \\\n"\\u{2F800}-\\u{2FA1D}" \\\n"]#{accents}*" \\\n"|#{'' # composition starters and characters that can be the result of a composition\n}" \\\n"[<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6" \\\n"\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137" \\\n"\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165\\u0168-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7" \\\n"\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B\\u021E\\u021F\\u0226-\\u0233\\u0292" \\\n"\\u0385\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u0391\\u0395\\u0397\\u0399\\u039F" \\\n"\\u03A1\\u03A5\\u03A9-\\u03B1\\u03B5\\u03B7\\u03B9\\u03BF\\u03C1" \\\n"\\u03C5\\u03C9-\\u03CE\\u03D2-\\u03D4\\u0400\\u0401\\u0403\\u0406\\u0407\\u040C-\\u040E\\u0410" \\\n"\\u0413\\u0415-\\u041A\\u041E\\u0423\\u0427\\u042B\\u042D\\u0430" \\\n"\\u0433\\u0435-\\u043A\\u043E\\u0443\\u0447\\u044B\\u044D\\u0450\\u0451" \\\n"\\u0453\\u0456\\u0457\\u045C-\\u045E\\u0474-\\u0477\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6-\\u04DF\\u04E2-\\u04F5" \\\n"\\u04F8\\u04F9\\u0622-\\u0627\\u0648\\u064A\\u06C0-\\u06C2\\u06D2\\u06D3\\u06D5\\u0928\\u0929" \\\n"\\u0930\\u0931\\u0933\\u0934\\u09C7\\u09CB\\u09CC\\u0B47\\u0B48\\u0B4B\\u0B4C\\u0B92\\u0B94" \\\n"\\u0BC6\\u0BC7\\u0BCA-\\u0BCC\\u0C46\\u0C48\\u0CBF\\u0CC0\\u0CC6-\\u0CC8\\u0CCA\\u0CCB\\u0D46\\u0D47" \\\n"\\u0D4A-\\u0D4C\\u0DD9\\u0DDA\\u0DDC-\\u0DDE\\u1025\\u1026\\u1B05-\\u1B0E\\u1B11\\u1B12\\u1B3A-\\u1B43\\u1E00-\\u1E99" \\\n"\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59" \\\n"\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A" \\\n"\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FBF\\u1FC1-\\u1FC4\\u1FC6-\\u1FC8\\u1FCA" \\\n"\\u1FCC-\\u1FD2\\u1FD6-\\u1FDA\\u1FDD-\\u1FE2\\u1FE4-\\u1FEA\\u1FEC\\u1FED\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA" \\\n"\\u1FFC\\u1FFE\\u2190\\u2192\\u2194\\u219A\\u219B\\u21AE\\u21CD-\\u21D0" \\\n"\\u21D2\\u21D4\\u2203\\u2204\\u2208\\u2209\\u220B\\u220C\\u2223-\\u2226\\u223C\\u2241" \\\n"\\u2243-\\u2245\\u2247-\\u2249\\u224D\\u2260-\\u2262\\u2264\\u2265\\u226D-\\u227D\\u2280-\\u2289\\u2291\\u2292" \\\n"\\u22A2\\u22A8\\u22A9\\u22AB-\\u22AF\\u22B2-\\u22B5\\u22E0-\\u22E3\\u22EA-\\u22ED\\u3046\\u304B-\\u3062" \\\n"\\u3064-\\u3069\\u306F-\\u307D\\u3094\\u309D\\u309E\\u30A6\\u30AB-\\u30C2\\u30C4-\\u30C9\\u30CF-\\u30DD" \\\n"\\u30EF-\\u30F2\\u30F4\\u30F7-\\u30FA\\u30FD\\u30FE\\u{11099}-\\u{1109C}\\u{110A5}\\u{110AB}\\u{1112E}\\u{1112F}" \\\n"\\u{11131}\\u{11132}\\u{11347}\\u{1134B}\\u{1134C}\\u{114B9}\\u{114BB}\\u{114BC}\\u{114BE}\\u{115B8}-\\u{115BB}" \\\n"]?#{accents}+" \\\n"|#{'' # Hangul syllables with separate trailer\n}" \\\n"[\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8\\uACC4" \\\n"\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4" \\\n"\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84" \\\n"\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64" \\\n"\\uAF80\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044" \\\n"\\uB060\\uB07C\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124" \\\n"\\uB140\\uB15C\\uB178\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204" \\\n"\\uB220\\uB23C\\uB258\\uB274\\uB290\\uB2AC\\uB2C8\\uB2E4" \\\n"\\uB300\\uB31C\\uB338\\uB354\\uB370\\uB38C\\uB3A8\\uB3C4" \\\n"\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C\\uB488\\uB4A4" \\\n"\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568\\uB584" \\\n"\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664" \\\n"\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744" \\\n"\\uB760\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824" \\\n"\\uB840\\uB85C\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904" \\\n"\\uB920\\uB93C\\uB958\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4" \\\n"\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70\\uBA8C\\uBAA8\\uBAC4" \\\n"\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C\\uBB88\\uBBA4" \\\n"\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68\\uBC84" \\\n"\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64" \\\n"\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44" \\\n"\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24" \\\n"\\uBF40\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004" \\\n"\\uC020\\uC03C\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4" \\\n"\\uC100\\uC11C\\uC138\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4" \\\n"\\uC1E0\\uC1FC\\uC218\\uC234\\uC250\\uC26C\\uC288\\uC2A4" \\\n"\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C\\uC368\\uC384" \\\n"\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448\\uC464" \\\n"\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544" \\\n"\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624" \\\n"\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704" \\\n"\\uC720\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4" \\\n"\\uC800\\uC81C\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4" \\\n"\\uC8E0\\uC8FC\\uC918\\uC934\\uC950\\uC96C\\uC988\\uC9A4" \\\n"\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30\\uCA4C\\uCA68\\uCA84" \\\n"\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C\\uCB48\\uCB64" \\\n"\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28\\uCC44" \\\n"\\uCC60\\

REGEXP_D = Regexp.compile(REGEXP_D_STRING, Regexp::EXTENDED)

REGEXP_D_STRING = "#{'' # composition starters and composition exclusions\n}" \\\n"[\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD" \\\n"\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165" \\\n"\\u0168-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B" \\\n"\\u021E\\u021F\\u0226-\\u0233\\u0340\\u0341\\u0343\\u0344\\u0374\\u037E\\u0385-\\u038A\\u038C" \\\n"\\u038E-\\u0390\\u03AA-\\u03B0\\u03CA-\\u03CE\\u03D3\\u03D4\\u0400\\u0401\\u0403\\u0407\\u040C-\\u040E" \\\n"\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u04C1\\u04C2" \\\n"\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0622-\\u0626\\u06C0" \\\n"\\u06C2\\u06D3\\u0929\\u0931\\u0934\\u0958-\\u095F\\u09CB\\u09CC\\u09DC\\u09DD" \\\n"\\u09DF\\u0A33\\u0A36\\u0A59-\\u0A5B\\u0A5E\\u0B48\\u0B4B\\u0B4C\\u0B5C\\u0B5D" \\\n"\\u0B94\\u0BCA-\\u0BCC\\u0C48\\u0CC0\\u0CC7\\u0CC8\\u0CCA\\u0CCB\\u0D4A-\\u0D4C\\u0DDA" \\\n"\\u0DDC-\\u0DDE\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F73" \\\n"\\u0F75\\u0F76\\u0F78\\u0F81\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC" \\\n"\\u0FB9\\u1026\\u1B06\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12" \\\n"\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1E00-\\u1E99\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15" \\\n"\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D" \\\n"\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4" \\\n"\\u1FF6-\\u1FFD\\u2000\\u2001\\u2126\\u212A\\u212B\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204" \\\n"\\u2209\\u220C\\u2224\\u2226\\u2241\\u2244\\u2247\\u2249" \\\n"\\u2260\\u2262\\u226D-\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285\\u2288\\u2289" \\\n"\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED\\u2329\\u232A\\u2ADC\\u304C\\u304E\\u3050" \\\n"\\u3052\\u3054\\u3056\\u3058\\u305A\\u305C\\u305E\\u3060" \\\n"\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A" \\\n"\\u307C\\u307D\\u3094\\u309E\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4" \\\n"\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5" \\\n"\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4" \\\n"\\u30F7-\\u30FA\\u30FE\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22" \\\n"\\uFA25\\uFA26\\uFA2A-\\uFA6D\\uFA70-\\uFAD9\\uFB1D\\uFB1F\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E" \\\n"\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFB4E\\u{1109A}\\u{1109C}\\u{110AB}\\u{1112E}\\u{1112F}\\u{1134B}\\u{1134C}" \\\n"\\u{114BB}\\u{114BC}\\u{114BE}\\u{115BA}\\u{115BB}\\u{1D15E}-\\u{1D164}\\u{1D1BB}-\\u{1D1C0}\\u{2F800}-\\u{2FA1D}" \\\n"]#{accents}*" \\\n"|#{'' # characters that can be the result of a composition, except composition starters\n}" \\\n"[<->A-PR-Za-pr-z\\u00A8\\u00C6\\u00D8" \\\n"\\u00E6\\u00F8\\u017F\\u01B7\\u0292\\u0391\\u0395\\u0397" \\\n"\\u0399\\u039F\\u03A1\\u03A5\\u03A9\\u03B1\\u03B5\\u03B7" \\\n"\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9\\u03D2\\u0406\\u0410" \\\n"\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423\\u0427\\u042B\\u042D" \\\n"\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E\\u0443\\u0447\\u044B" \\\n"\\u044D\\u0456\\u0474\\u0475\\u04D8\\u04D9\\u04E8\\u04E9\\u0627\\u0648\\u064A" \\\n"\\u06C1\\u06D2\\u06D5\\u0928\\u0930\\u0933\\u09C7\\u0B47" \\\n"\\u0B92\\u0BC6\\u0BC7\\u0C46\\u0CBF\\u0CC6\\u0D46\\u0D47\\u0DD9\\u1025" \\\n"\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D\\u1B11\\u1B3A\\u1B3C" \\\n"\\u1B3E\\u1B3F\\u1B42\\u1FBF\\u1FFE\\u2190\\u2192\\u2194\\u21D0" \\\n"\\u21D2\\u21D4\\u2203\\u2208\\u220B\\u2223\\u2225\\u223C" \\\n"\\u2243\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277" \\\n"\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5" \\\n"\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057" \\\n"\\u3059\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068" \\\n"\\u306F\\u3072\\u3075\\u3078\\u307B\\u309D\\u30A6\\u30AB" \\\n"\\u30AD\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB" \\\n"\\u30BD\\u30BF\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2" \\\n"\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2\\u30FD\\u{11099}\\u{1109B}\\u{110A5}" \\\n"\\u{11131}\\u{11132}\\u{11347}\\u{114B9}\\u{115B8}\\u{115B9}" \\\n"]?#{accents}+" \\\n"|#{'' # precomposed Hangul syllables\n}" \\\n"[\\u{AC00}-\\u{D7A4}]"

REGEXP_K = Regexp.compile(REGEXP_K_STRING, Regexp::EXTENDED)

REGEXP_K_STRING = "" \\\n"[\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA\\u00BC-\\u00BE\\u0132\\u0133" \\\n"\\u013F\\u0140\\u0149\\u017F\\u01C4-\\u01CC\\u01F1-\\u01F3\\u02B0-\\u02B8\\u02D8-\\u02DD\\u02E0-\\u02E4" \\\n"\\u037A\\u0384\\u0385\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5\\u03F9\\u0587\\u0675-\\u0678" \\\n"\\u0E33\\u0EB3\\u0EDC\\u0EDD\\u0F0C\\u0F77\\u0F79\\u10FC\\u1D2C-\\u1D2E" \\\n"\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-\\u1D6A\\u1D78\\u1D9B-\\u1DBF\\u1E9A\\u1E9B\\u1FBD\\u1FBF-\\u1FC1" \\\n"\\u1FCD-\\u1FCF\\u1FDD-\\u1FDF\\u1FED\\u1FEE\\u1FFD\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-\\u2026" \\\n"\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057\\u205F" \\\n"\\u2070\\u2071\\u2074-\\u208E\\u2090-\\u209C\\u20A8\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113\\u2115\\u2116" \\\n"\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2128\\u212C\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-\\u2140" \\\n"\\u2145-\\u2149\\u2150-\\u217F\\u2189\\u222C\\u222D\\u222F\\u2230\\u2460-\\u24EA\\u2A0C\\u2A74-\\u2A76" \\\n"\\u2C7C\\u2C7D\\u2D6F\\u2E9F\\u2EF3\\u2F00-\\u2FD5\\u3000\\u3036\\u3038-\\u303A" \\\n"\\u309B\\u309C\\u309F\\u30FF\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3247\\u3250-\\u327E" \\\n"\\u3280-\\u32FE\\u3300-\\u33FF\\uA69C\\uA69D\\uA770\\uA7F8\\uA7F9\\uAB5C-\\uAB5F\\uFB00-\\uFB06\\uFB13-\\uFB17" \\\n"\\uFB20-\\uFB29\\uFB4F-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE30-\\uFE44" \\\n"\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-\\uFFC7" \\\n"\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\u{1D400}-\\u{1D454}\\u{1D456}-\\u{1D49C}\\u{1D49E}\\u{1D49F}" \\\n"\\u{1D4A2}\\u{1D4A5}\\u{1D4A6}\\u{1D4A9}-\\u{1D4AC}\\u{1D4AE}-\\u{1D4B9}\\u{1D4BB}\\u{1D4BD}-\\u{1D4C3}\\u{1D4C5}-\\u{1D505}\\u{1D507}-\\u{1D50A}" \\\n"\\u{1D50D}-\\u{1D514}\\u{1D516}-\\u{1D51C}\\u{1D51E}-\\u{1D539}\\u{1D53B}-\\u{1D53E}\\u{1D540}-\\u{1D544}\\u{1D546}\\u{1D54A}-\\u{1D550}\\u{1D552}-\\u{1D6A5}" \\\n"\\u{1D6A8}-\\u{1D7CB}\\u{1D7CE}-\\u{1D7FF}\\u{1EE00}-\\u{1EE03}\\u{1EE05}-\\u{1EE1F}\\u{1EE21}\\u{1EE22}\\u{1EE24}\\u{1EE27}\\u{1EE29}-\\u{1EE32}" \\\n"\\u{1EE34}-\\u{1EE37}\\u{1EE39}\\u{1EE3B}\\u{1EE42}\\u{1EE47}\\u{1EE49}\\u{1EE4B}\\u{1EE4D}-\\u{1EE4F}" \\\n"\\u{1EE51}\\u{1EE52}\\u{1EE54}\\u{1EE57}\\u{1EE59}\\u{1EE5B}\\u{1EE5D}\\u{1EE5F}\\u{1EE61}\\u{1EE62}" \\\n"\\u{1EE64}\\u{1EE67}-\\u{1EE6A}\\u{1EE6C}-\\u{1EE72}\\u{1EE74}-\\u{1EE77}\\u{1EE79}-\\u{1EE7C}\\u{1EE7E}\\u{1EE80}-\\u{1EE89}\\u{1EE8B}-\\u{1EE9B}" \\\n"\\u{1EEA1}-\\u{1EEA3}\\u{1EEA5}-\\u{1EEA9}\\u{1EEAB}-\\u{1EEBB}\\u{1F100}-\\u{1F10A}\\u{1F110}-\\u{1F12E}\\u{1F130}-\\u{1F14F}\\u{1F16A}\\u{1F16B}\\u{1F190}" \\\n"\\u{1F200}-\\u{1F202}\\u{1F210}-\\u{1F23B}\\u{1F240}-\\u{1F248}\\u{1F250}\\u{1F251}" \\\n"]"

SBASE = 0xAC00

SCOUNT = LCOUNT * NCOUNT

TBASE = 0x11A7

TCOUNT = 28

UNICODE_ENCODINGS = [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE,\nEncoding::GB18030, Encoding::UCS_2BE, Encoding::UCS_4BE]

VBASE = 0x1161

VCOUNT = 21

Files

  • lib/unicode_normalize/normalize.rb
  • lib/unicode_normalize/tables.rb
  • string.c