| 1 | #!/usr/bin/env python |
|---|
| 2 | # -*- coding: utf-8 -*- |
|---|
| 3 | """ |
|---|
| 4 | Build Translit-Tab |
|---|
| 5 | ~~~~~~~~~~~~~~~~~~ |
|---|
| 6 | |
|---|
| 7 | Generates a conversion of a transtab map. Used by `zine.utils.text`. |
|---|
| 8 | You will need a version of transtab which you can get for example |
|---|
| 9 | here: http://www.bitbucket.org/jek/translitcodec/ |
|---|
| 10 | |
|---|
| 11 | :copyright: (c) 2010 by the Zine Team, see AUTHORS for more details. |
|---|
| 12 | :license: BSD, see LICENSE for more details. |
|---|
| 13 | """ |
|---|
| 14 | import os |
|---|
| 15 | import sys |
|---|
| 16 | import csv |
|---|
| 17 | from optparse import OptionParser |
|---|
| 18 | |
|---|
| 19 | |
|---|
| 20 | import _init_zine |
|---|
| 21 | import zine |
|---|
| 22 | sys.path.append(os.path.dirname(__file__)) |
|---|
| 23 | csv.register_dialect('transtab', delimiter=';') |
|---|
| 24 | |
|---|
| 25 | |
|---|
| 26 | def read_table(path): |
|---|
| 27 | long, short, single = {}, {}, {} |
|---|
| 28 | |
|---|
| 29 | t = open(path) |
|---|
| 30 | for line in t.readlines(): |
|---|
| 31 | if not line.startswith('<'): |
|---|
| 32 | continue |
|---|
| 33 | from_spec, raw_to = line.strip().split(' ', 1) |
|---|
| 34 | from_ord = int(from_spec[2:-1], 16) |
|---|
| 35 | |
|---|
| 36 | raw = csv.reader([raw_to], 'transtab').next() |
|---|
| 37 | long_char = _unpack_uchrs(raw[0]) |
|---|
| 38 | if len(raw) < 2: |
|---|
| 39 | short_char = long_char |
|---|
| 40 | else: |
|---|
| 41 | short_char = _unpack_uchrs(raw[1]) |
|---|
| 42 | |
|---|
| 43 | long[from_ord] = long_char |
|---|
| 44 | short[from_ord] = short_char |
|---|
| 45 | if len(short_char) == 1: |
|---|
| 46 | single[from_ord] = short_char |
|---|
| 47 | return long, short, single |
|---|
| 48 | |
|---|
| 49 | |
|---|
| 50 | def _unpack_uchrs(packed): |
|---|
| 51 | chunks = packed.replace('<U', ' ').strip().split() |
|---|
| 52 | return ''.join(unichr(int(spec[:-1], 16)) for spec in chunks) |
|---|
| 53 | |
|---|
| 54 | |
|---|
| 55 | def update_mapping(long, short, single, path): |
|---|
| 56 | src = open(path) |
|---|
| 57 | try: |
|---|
| 58 | data = src.read() |
|---|
| 59 | pos = 0 |
|---|
| 60 | for x in xrange(2): |
|---|
| 61 | pos = data.find('"""', pos) + 1 |
|---|
| 62 | preamble = data[:pos + 3] |
|---|
| 63 | finally: |
|---|
| 64 | src.close() |
|---|
| 65 | |
|---|
| 66 | rewrite = open(path, 'wb') |
|---|
| 67 | try: |
|---|
| 68 | rewrite.writelines(preamble) |
|---|
| 69 | _dump_dict(rewrite, 'LONG_TABLE', long) |
|---|
| 70 | _dump_dict(rewrite, 'SHORT_TABLE', short) |
|---|
| 71 | _dump_dict(rewrite, 'SINGLE_TABLE', single) |
|---|
| 72 | finally: |
|---|
| 73 | rewrite.close() |
|---|
| 74 | |
|---|
| 75 | |
|---|
| 76 | def _dump_dict(fh, name, data): |
|---|
| 77 | fh.write('\n%s = {\n' % name) |
|---|
| 78 | for pair in sorted(data.items()): |
|---|
| 79 | fh.write(' %r: %r,\n' % pair) |
|---|
| 80 | fh.write('}\n') |
|---|
| 81 | |
|---|
| 82 | |
|---|
| 83 | def main(): |
|---|
| 84 | global parser |
|---|
| 85 | parser = OptionParser(usage='%prog [path/to/transtab]') |
|---|
| 86 | options, args = parser.parse_args() |
|---|
| 87 | if len(args) != 1: |
|---|
| 88 | parser.error('incorrect number of arguments') |
|---|
| 89 | |
|---|
| 90 | mapping_file = os.path.join(os.path.dirname(zine.__file__), |
|---|
| 91 | '_dynamic', 'translit_tab.py') |
|---|
| 92 | table = read_table(os.path.join(args[0], 'transtab')) |
|---|
| 93 | update_mapping(path=mapping_file, *table) |
|---|
| 94 | print 'All done.' |
|---|
| 95 | |
|---|
| 96 | |
|---|
| 97 | if __name__ == '__main__': |
|---|
| 98 | main() |
|---|