Files
picard/test/test_textencoding.py
Sophist 110441f91e Address comments by 96187 ...
... whose local knowledge of how characters are actually used identified
more appropriate characters to map to.

soft hyphen -> nothing (since it is invisible)
multiplication -> x rather than * (visually closer)
remove duplicate oe
2014-04-14 11:57:34 +01:00

234 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import os.path
import unittest
from picard import util
#from picard.util import textencoding
# Set the value to true below to show the coverage of Latin characters
show_latin2ascii_coverage = False
compatibility_from = (
u"\u0132\u0133\u017F\u01C7\u01C8\u01C9\u01CA\u01CB\u01CC\u01F1" # IJijſLJLjljNJNjnjDZ
u"\u01F2\u01F3\uFB00\uFB01\uFB02\uFB03\uFB04\uFB05\uFB06\uFF21" # DzdzfffiflffifflſtstA
u"\uFF22\uFF23\uFF24\uFF25\uFF26\uFF27\uFF28\uFF29\uFF2A\uFF2B" #
u"\uFF2C\uFF2D\uFF2E\uFF2F\uFF30\uFF31\uFF32\uFF33\uFF34\uFF35" #
u"\uFF36\uFF37\uFF38\uFF39\uFF3A\uFF41\uFF42\uFF43\uFF44\uFF45" #
u"\uFF46\uFF47\uFF48\uFF49\uFF4A\uFF4B\uFF4C\uFF4D\uFF4E\uFF4F" #
u"\uFF50\uFF51\uFF52\uFF53\uFF54\uFF55\uFF56\uFF57\uFF58\uFF59" #
u"\uFF5A\u2100\u2101\u2102\u2105\u2106\u210A\u210B\u210C\u210D" # z℀℁ℂ℅℆ℊℋℌℍ
u"\u210E\u2110\u2111\u2112\u2113\u2115\u2116\u2119\u211A\u211B" # ℎℐℑℒℓℕ№ℙℚℛ
u"\u211C\u211D\u2121\u2124\u2128\u212C\u212D\u212F\u2130\u2131" # ℜℝ℡ℤℨℬℭℯℰℱ
u"\u2133\u2134\u2139\u213B\u2145\u2146\u2147\u2148\u2149\u3371" # ℳℴℹ℻ⅅⅆⅇⅈⅉ㍱
u"\u3372\u3373\u3374\u3375\u3376\u3377\u337A\u3380\u3381\u3383" # ㍲㍳㍴㍵㍶㍷㍺㎀㎁㎃
u"\u3384\u3385\u3386\u3387\u3388\u3389\u338A\u338B\u338E\u338F" # ㎄㎅㎆㎇㎈㎉㎊㎋㎎㎏
u"\u3390\u3391\u3392\u3393\u3394\u3399\u339A\u339C\u339D\u339E" # ㎐㎑㎒㎓㎔㎙㎚㎜㎝㎞
u"\u33A9\u33AA\u33AB\u33AC\u33AD\u33B0\u33B1\u33B3\u33B4\u33B5" # ㎩㎪㎫㎬㎭㎰㎱㎳㎴㎵
u"\u33B7\u33B8\u33B9\u33BA\u33BB\u33BD\u33BE\u33BF\u33C2\u33C3" # ㎷㎸㎹㎺㎻㎽㎾㎿㏂㏃
u"\u33C4\u33C5\u33C7\u33C8\u33C9\u33CA\u33CB\u33CC\u33CD\u33CE" # ㏄㏅㏇㏈㏉㏊㏋㏌㏍㏎
u"\u33CF\u33D0\u33D1\u33D2\u33D3\u33D4\u33D5\u33D6\u33D7\u33D8" # ㏏㏐㏑㏒㏓㏔㏕㏖㏗㏘
u"\u33D9\u33DA\u33DB\u33DC\u33DD\u249C\u249D\u249E\u249F\u24A0" # ㏙㏚㏛㏜㏝⒜⒝⒞⒟⒠
u"\u24A1\u24A2\u24A3\u24A4\u24A5\u24A6\u24A7\u24A8\u24A9\u24AA" # ⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪
u"\u24AB\u24AC\u24AD\u24AE\u24AF\u24B0\u24B1\u24B2\u24B3\u24B4" # ⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴
u"\u24B5\u2160\u2161\u2162\u2163\u2164\u2165\u2166\u2167\u2168" # ⒵ⅠⅡⅢⅣⅤⅥⅦⅧⅨ
u"\u2169\u216A\u216B\u216C\u216D\u216E\u216F\u2170\u2171\u2172" # ⅩⅪⅫⅬⅭⅮⅯⅰⅱⅲ
u"\u2173\u2174\u2175\u2176\u2177\u2178\u2179\u217A\u217B\u217C" # ⅳⅴⅵⅶⅷⅸⅹⅺⅻⅼ
u"\u217D\u217E\u217F\u2474\u2475\u2476\u2477\u2478\u2479\u247A" # ⅽⅾⅿ⑴⑵⑶⑷⑸⑹⑺
u"\u247B\u247C\u247D\u247E\u247F\u2480\u2481\u2482\u2483\u2484" # ⑻⑼⑽⑾⑿⒀⒁⒂⒃⒄
u"\u2485\u2486\u2487\u2488\u2489\u248A\u248B\u248C\u248D\u248E" # ⒅⒆⒇⒈⒉⒊⒋⒌⒍⒎
u"\u248F\u2490\u2491\u2492\u2493\u2494\u2495\u2496\u2497\u2498" # ⒏⒐⒑⒒⒓⒔⒕⒖⒗⒘
u"\u2499\u249A\u249B\uFF10\uFF11\uFF12\uFF13\uFF14\uFF15\uFF16" # ⒙⒚⒛0123456
u"\uFF17\uFF18\uFF19\u2002\u2003\u2004\u2005\u2006\u2007\u2008" # \u2002\u2003\u2004\u2005\u2006\u2007\u2008
u"\u2009\u200A\u205F\uFF02\uFF07\uFE63\uFF0D\u2024\u2025\u2026" # \u2009\u200A\u205F‥…
u"\u203C\u2047\u2048\u2049\uFE10\uFE13\uFE14\uFE15\uFE16\uFE19" # ‼⁇⁈⁉︐︓︔︕︖︙
u"\uFE30\uFE35\uFE36\uFE37\uFE38\uFE47\uFE48\uFE50\uFE52\uFE54" # ︰︵︶︷︸﹇﹈﹐﹒﹔
u"\uFE55\uFE56\uFE57\uFE59\uFE5A\uFE5B\uFE5C\uFE5F\uFE60\uFE61" # ﹕﹖﹗﹙﹚﹛﹜﹟﹠﹡
u"\uFE62\uFE64\uFE65\uFE66\uFE68\uFE69\uFE6A\uFE6B\uFF01\uFF03" # ﹢﹤﹥﹦﹨﹩﹪﹫!#
u"\uFF04\uFF05\uFF06\uFF08\uFF09\uFF0A\uFF0B\uFF0C\uFF0E\uFF0F" #
u"\uFF1A\uFF1B\uFF1C\uFF1D\uFF1E\uFF1F\uFF20\uFF3B\uFF3C\uFF3D" #
u"\uFF3E\uFF3F\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\u2A74\u2A75\u2A76" # ^_`{|}~⩴⩵⩶
)
compatibility_to = (
u"IJijsLJLjljNJNjnjDZDzdzfffiflffifflststABCDEFGHIJKLMNOPQRSTU"
u"VWXYZabcdefghijklmnopqrstuvwxyza/ca/sCc/oc/ugHHH"
u"hIILlNNoPQRRRTELZZBCeEFMoiFAXDdeijhPadaAUbaroVpcdmIUpAnAmA"
u"kAKBMBGBcalkcalpFnFmgkgHzkHzMHzGHzTHzfmnmmmcmkmPakPaMPaGParadpsnsmspVnVmVkVMVpWnWmWkWMWa.m.Bq"
u"cccdCo.dBGyhaHPinKKKMktlmlnloglxmbmilmolPHp.m.PPMPRsrSvWb(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)(n)(o)"
u"(p)(q)(r)(s)(t)(u)(v)(w)(x)(y)(z)IIIIIIIVVVIVIIVIIIIXXXIXIILCDMiiiiiiivvviviiviiiixxxixiil"
u"cdm(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)(12)(13)(14)(15)(16)(17)(18)(19)(20)1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17."
u"18.19.20.0123456789 \"'--......!!???!!?,:;!?..."
u"..(){}[],.;:?!(){}#&*+<>=\\$%@!#$%&()*+,./"
u":;<=>?@[\\]^_`{|}~::======"
)
compatibility_from += (
u"\u1D00\u1D04\u1D05\u1D07\u1D0A\u1D0B\u1D0D\u1D0F\u1D18\u1D1B" # ᴀᴄᴅᴇᴊᴋᴍᴏᴘᴛ
u"\u1D1C\u1D20\u1D21\u1D22\u3007\u00A0\u3000" # \u00A0\u3000
)
compatibility_to += u"ACDEJKMOPTUVWZ0 "
punctuation_from = (
u"\u2018\u2019\u201A\u201B\u201C\u201D\u201E\u201F\u2032\u301D" # ‘’‚‛“”„‟′〝
u"\u301E\u00AB\u00BB\u2039\u203A\u00AD\u2010\u2012\u2013\u2014" # 〞«»‹›\u00AD
u"\u2015\u2016\u2044\u2045\u2046\u204E\u3001\u3002\u3008\u3009" # ―‖⁄⁅⁆⁎、。〈〉
u"\u300A\u300B\u3014\u3015\u3018\u3019\u301A\u301B\u00D7\u00F7" # 《》〔〕〘〙〚〛×÷
u"\u2212\u2215\u2216\u2223\u2225\u226A\u226B\u2985\u2986\u00B7" # −∕∖∣∥≪≫⦅⦆·
)
punctuation_to = u"'','\"\",,\"'\"\"<<>><>-----||/[]*,.<><<>>[][][]x/-/\\|||<<>>(())."
combinations_from = (
u"\u00C6\u00D0\u00D8\u00DE\u00DF\u00E6\u00F0\u00F8\u00FE\u0110" # ÆÐØÞßæðøþĐ
u"\u0111\u0126\u0127\u0131\u0138\u0141\u0142\u014A\u014B\u0152" # đĦħıĸŁłŊŋŒ
u"\u0153\u0166\u0167\u0180\u0181\u0182\u0183\u0187\u0188\u0189" # œŦŧƀƁƂƃƇƈƉ
u"\u018A\u018B\u018C\u0190\u0191\u0192\u0193\u0195\u0196\u0197" # ƊƋƌƐƑƒƓƕƖƗ
u"\u0198\u0199\u019A\u019D\u019E\u01A2\u01A3\u01A4\u01A5\u01AB" # ƘƙƚƝƞƢƣƤƥƫ
u"\u01AC\u01AD\u01AE\u01B2\u01B3\u01B4\u01B5\u01B6\u01E4\u01E5" # ƬƭƮƲƳƴƵƶǤǥ
u"\u0221\u0224\u0225\u0234\u0235\u0236\u0237\u0238\u0239\u023A" # ȡȤȥȴȵȶȷȸȹȺ
u"\u023B\u023C\u023D\u023E\u023F\u0240\u0243\u0244\u0246\u0247" # ȻȼȽȾȿɀɃɄɆɇ
u"\u0248\u0249\u024C\u024D\u024E\u024F\u0253\u0255\u0256\u0257" # ɈɉɌɍɎɏɓɕɖɗ
u"\u025B\u025F\u0260\u0261\u0262\u0266\u0267\u0268\u026A\u026B" # ɛɟɠɡɢɦɧɨɪɫ
u"\u026C\u026D\u0271\u0272\u0273\u0274\u027C\u027D\u027E\u0280" # ɬɭɱɲɳɴɼɽɾʀ
u"\u0282\u0288\u0289\u028B\u028F\u0290\u0291\u0299\u029B\u029C" # ʂʈʉʋʏʐʑʙʛʜ
u"\u029D\u029F\u02A0\u02A3\u02A5\u02A6\u02AA\u02AB\u1D03\u1D06" # ʝʟʠʣʥʦʪʫᴃᴆ
u"\u1D0C\u1D6B\u1D6C\u1D6D\u1D6E\u1D6F\u1D70\u1D71\u1D72\u1D73" # ᴌᵫᵬᵭᵮᵯᵰᵱᵲᵳ
u"\u1D74\u1D75\u1D76\u1D7A\u1D7B\u1D7D\u1D7E\u1D80\u1D81\u1D82" # ᵴᵵᵶᵺᵻᵽᵾᶀᶁᶂ
u"\u1D83\u1D84\u1D85\u1D86\u1D87\u1D88\u1D89\u1D8A\u1D8C\u1D8D" # ᶃᶄᶅᶆᶇᶈᶉᶊᶌᶍ
u"\u1D8E\u1D8F\u1D91\u1D92\u1D93\u1D96\u1D99\u1E9C\u1E9D\u1E9E" # ᶎᶏᶑᶒᶓᶖᶙẜẝẞ
u"\u1EFA\u1EFB\u1EFC\u1EFD\u1EFE\u1EFF\u00A9\u00AE\u20A0\u20A2" # ỺỻỼỽỾỿ©®₠₢
u"\u20A3\u20A4\u20A7\u20BA\u20B9\u211E" # ₣₤₧₺₹℞
)
combinations_to = (
u"AEDOETHssaedoethDdHhiqLlNnOEoeTtbBBbCcDDDdEFfGhvII"
U"KklNnOIoiPptTtTVYyZzGgdZzlntjdbqpACcLTszBUEe"
u"JjRrYybcddejggGhhiIlllmnnNrrrRstuvYzzBGH"
u"jLqdzdztslslzBDLuebdfmnprrstzthIpUbdfgklmnprsvx"
u"zadeeiussSSLLllVvYy(C)(R)CECrFr.L.PtsTLRsRx"
)
ascii = u" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
class CompatibilityTest(unittest.TestCase):
def test_correct(self):
self.maxDiff = None
self.assertEqual(util.textencoding.unicode_simplify_compatibility(compatibility_from), compatibility_to)
self.assertEqual(util.textencoding.unicode_simplify_compatibility(punctuation_from), punctuation_from)
self.assertEqual(util.textencoding.unicode_simplify_compatibility(combinations_from), combinations_from)
self.assertEqual(util.textencoding.unicode_simplify_compatibility(ascii), ascii)
def test_incorrect(self):
pass
class PunctuationTest(unittest.TestCase):
def test_correct(self):
self.maxDiff = None
self.assertEqual(util.textencoding.unicode_simplify_punctuation(compatibility_from), compatibility_from)
self.assertEqual(util.textencoding.unicode_simplify_punctuation(punctuation_from), punctuation_to)
self.assertEqual(util.textencoding.unicode_simplify_punctuation(combinations_from), combinations_from)
self.assertEqual(util.textencoding.unicode_simplify_punctuation(ascii), ascii)
def test_incorrect(self):
pass
class CombinationsTest(unittest.TestCase):
def test_correct(self):
self.maxDiff = None
self.assertEqual(util.textencoding.unicode_simplify_combinations(combinations_from), combinations_to)
self.assertEqual(util.textencoding.unicode_simplify_combinations(compatibility_from), compatibility_from)
self.assertEqual(util.textencoding.unicode_simplify_combinations(punctuation_from), punctuation_from)
self.assertEqual(util.textencoding.unicode_simplify_combinations(ascii), ascii)
def test_incorrect(self):
pass
class AsciiPunctTest(unittest.TestCase):
def test_correct(self):
self.assertEqual(util.textencoding.asciipunct(u"Test"), u"'Test'") # Quotations
self.assertEqual(util.textencoding.asciipunct(u"“Test”"), u"\"Test\"") # Quotations
self.assertEqual(util.textencoding.asciipunct(u"16″"), u"1'6\"") # Quotations
self.assertEqual(util.textencoding.asciipunct(u""), u"...") # Ellipses
def test_incorrect(self):
pass
class UnaccentTest(unittest.TestCase):
def test_correct(self):
self.assertEqual(util.textencoding.unaccent(u"Lukáš"), u"Lukas")
self.assertEqual(util.textencoding.unaccent(u"Björk"), u"Bjork")
self.assertEqual(util.textencoding.unaccent(u"小室哲哉"), u"小室哲哉")
def test_incorrect(self):
self.assertNotEqual(util.textencoding.unaccent(u"Björk"), u"Björk")
self.assertNotEqual(util.textencoding.unaccent(u"小室哲哉"), u"Tetsuya Komuro")
self.assertNotEqual(util.textencoding.unaccent(u"Trentemøller"), u"Trentemoller")
self.assertNotEqual(util.textencoding.unaccent(u"Ænima"), u"AEnima")
self.assertNotEqual(util.textencoding.unaccent(u"ænima"), u"aenima")
class ReplaceNonAsciiTest(unittest.TestCase):
def test_correct(self):
self.assertEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Lukas")
self.assertEqual(util.textencoding.replace_non_ascii(u"Björk"), u"Bjork")
self.assertEqual(util.textencoding.replace_non_ascii(u"Trentemøller"), u"Trentemoeller")
self.assertEqual(util.textencoding.replace_non_ascii(u"Ænima"), u"AEnima")
self.assertEqual(util.textencoding.replace_non_ascii(u"ænima"), u"aenima")
self.assertEqual(util.textencoding.replace_non_ascii(u"小室哲哉"), u"____")
self.assertEqual(util.textencoding.replace_non_ascii(u"ᴀᴄᴇ"), u"ACE") # Latin Letter Small
self.assertEqual(util.textencoding.replace_non_ascii(u""), u"Abc") # Fullwidth Latin
self.assertEqual(util.textencoding.replace_non_ascii(u"500㎏,2㎓"), u"500kg,2GHz") # Technical
self.assertEqual(util.textencoding.replace_non_ascii(u"⒜⒝⒞"), u"(a)(b)(c)") # Parenthesised Latin
self.assertEqual(util.textencoding.replace_non_ascii(u"ⅯⅯⅩⅣ"), u"MMXIV") # Roman numerals
self.assertEqual(util.textencoding.replace_non_ascii(u"ⅿⅿⅹⅳ"), u"mmxiv") # Roman numerals small
self.assertEqual(util.textencoding.replace_non_ascii(u"⑴⑵⑶"), u"(1)(2)(3)") # Parenthesised numbers
self.assertEqual(util.textencoding.replace_non_ascii(u"⒈ ⒉ ⒊"), u"1. 2. 3.") # Digit full stop
self.assertEqual(util.textencoding.replace_non_ascii(u""), u"123") # Fullwidth digits
def test_incorrect(self):
self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Lukáš")
self.assertNotEqual(util.textencoding.replace_non_ascii(u"Lukáš"), u"Luk____")
if show_latin2ascii_coverage:
# The following code set blocks are taken from:
# http://en.wikipedia.org/wiki/Latin_script_in_Unicode
latin_1 = u"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
latin_a = u"ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿ" \
u"ŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽž"
latin_b = u"ƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿ" \
u"ǀǁǂǃDŽDždžLJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ" \
u"ȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿ" \
u"ɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏ"
ipa_ext = u"ɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿʀʁʂʃʄʅʆʇʈʉʊʋʌʍʎʏ" \
u"ʐʑʒʓʔʕʖʗʘʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯ"
phonetic = u"ᴀᴁᴂᴃᴄᴅᴆᴇᴈᴉᴊᴋᴌᴍᴎᴏᴐᴑᴒᴓᴔᴕᴖᴗᴘᴙᴚᴛᴜᴝᴞᴟᴠᴡᴢᴣᴤᴥᴦᴧᴨᴩᴪᴫᴬᴭᴮᴯᴰᴱᴲᴳᴴᴵᴶᴷᴸᴹᴺᴻᴼᴽᴾᴿ" \
u"ᵀᵁᵂᵃᵄᵅᵆᵇᵈᵉᵊᵋᵌᵍᵎᵏᵐᵑᵒᵓᵔᵕᵖᵗᵘᵙᵚᵛᵜᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᵫᵬᵭᵮᵯᵰᵱᵲᵳᵴᵵᵶᵷᵸᵹᵺᵻᵼᵽᵾᵿ" \
u"ᶀᶁᶂᶃᶄᶅᶆᶇᶈᶉᶊᶋᶌᶍᶎᶏᶐᶑᶒᶓᶔᶕᶖᶗᶘᶙᶚᶛᶜᶝᶞᶟᶠᶡᶢᶣᶤᶥᶦᶧᶨᶩᶪᶫᶬᶭᶮᶯᶰᶱᶲᶳᶴᶵᶶᶷᶸᶹᶺᶻᶼᶽᶾᶿ"
latin_ext_add = u"ḀḁḂḃḄḅḆḇḈḉḊḋḌḍḎḏḐḑḒḓḔḕḖḗḘḙḚḛḜḝḞḟḠḡḢḣḤḤḦḧḨḩḪḫḬḭḮḯḰḱḲḳḴḵḶḷḸḹḺḻḼḽḾḿ" \
u"ṀṁṂṃṄṅṆṇṈṉṊṋṌṍṎṏṐṑṒṓṔṕṖṗṘṙṚṛṜṝṞṟṠṡṢṣṤṥṦṧṨṩṪṫṬṭṮṯṰṱṲṳṴṵṶṷṸṹṺṻṼṽṾṿ" \
u"ẀẁẂẃẄẅẆẇẈẉẊẋẌẍẎẏẐẑẒẓẔẕẖẗẘẙẚẛẜẝẞẟẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾế" \
u"ỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹỺỻỼỽỾỿ"
letter_like = u"℀℁ℂ℃℄℅℆ℇ℈℉ℊℋℌℍℎℏℐℑℒℓ℔ℕ№℗℘ℙℚℛℜℝ℞℟℠℡™℣ℤ℥Ω℧ℨ℩KÅℬℭ℮ℯℰℱℲℳℴℵℶℷℸℹ℺℻ℼℽℾℿ" \
u"⅀⅁⅂⅃⅄ⅅⅆⅇⅈⅉ⅊⅋⅌⅍ⅎ⅏"
enclosed = u"⒐⒑⒒⒓⒔⒕⒖⒗⒘⒙⒚⒛⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" \
u"ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ⓪⓫⓬⓭⓮⓯"
print "The following lines show the coverage of Latin characters conversion to ascii."
print "Underscores are characters which currently do not have an ASCII representation."
print
print "latin-1: ",util.textencoding.replace_non_ascii(latin_1)
print "latin-1: ",util.textencoding.replace_non_ascii(latin_1)
print "latin-a: ",util.textencoding.replace_non_ascii(latin_a)
print "latin-b: ",util.textencoding.replace_non_ascii(latin_b)
print "ipa-ext: ",util.textencoding.replace_non_ascii(ipa_ext)
print "phonetic: ",util.textencoding.replace_non_ascii(phonetic)
print "latin-ext-add: ",util.textencoding.replace_non_ascii(latin_ext_add)
print "letter-like: ",util.textencoding.replace_non_ascii(letter_like)
print "enclosed: ",util.textencoding.replace_non_ascii(enclosed)
print