/jni/harfbuzz-ng/test/shaping/hb_test_tools.py
Python | 519 lines | 444 code | 62 blank | 13 comment | 84 complexity | b0647c649fc2455c1462fb58ae1b8e25 MD5 | raw file
- #!/usr/bin/python
- import sys, os, re, difflib, unicodedata, errno, cgi
- from itertools import *
- diff_symbols = "-+=*&^%$#@!~/"
- diff_colors = ['red', 'green', 'blue']
- class ColorFormatter:
- class Null:
- @staticmethod
- def start_color (c): return ''
- @staticmethod
- def end_color (): return ''
- @staticmethod
- def escape (s): return s
- @staticmethod
- def newline (): return '\n'
- class ANSI:
- @staticmethod
- def start_color (c):
- return {
- 'red': '\033[41;37;1m',
- 'green': '\033[42;37;1m',
- 'blue': '\033[44;37;1m',
- }[c]
- @staticmethod
- def end_color ():
- return '\033[m'
- @staticmethod
- def escape (s): return s
- @staticmethod
- def newline (): return '\n'
- class HTML:
- @staticmethod
- def start_color (c):
- return '<span style="background:%s">' % c
- @staticmethod
- def end_color ():
- return '</span>'
- @staticmethod
- def escape (s): return cgi.escape (s)
- @staticmethod
- def newline (): return '<br/>\n'
- @staticmethod
- def Auto (argv = [], out = sys.stdout):
- format = ColorFormatter.ANSI
- if "--format" in argv:
- argv.remove ("--format")
- format = ColorFormatter.ANSI
- if "--format=ansi" in argv:
- argv.remove ("--format=ansi")
- format = ColorFormatter.ANSI
- if "--format=html" in argv:
- argv.remove ("--format=html")
- format = ColorFormatter.HTML
- if "--no-format" in argv:
- argv.remove ("--no-format")
- format = ColorFormatter.Null
- return format
- class DiffColorizer:
- diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
- def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
- self.formatter = formatter
- self.colors = colors
- self.symbols = symbols
- def colorize_lines (self, lines):
- lines = (l if l else '' for l in lines)
- ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
- oo = ["",""]
- st = [False, False]
- for l in difflib.Differ().compare (*ss):
- if l[0] == '?':
- continue
- if l[0] == ' ':
- for i in range(2):
- if st[i]:
- oo[i] += self.formatter.end_color ()
- st[i] = False
- oo = [o + self.formatter.escape (l[2:]) for o in oo]
- continue
- if l[0] in self.symbols:
- i = self.symbols.index (l[0])
- if not st[i]:
- oo[i] += self.formatter.start_color (self.colors[i])
- st[i] = True
- oo[i] += self.formatter.escape (l[2:])
- continue
- for i in range(2):
- if st[i]:
- oo[i] += self.formatter.end_color ()
- st[i] = False
- oo = [o.replace ('\n', '') for o in oo]
- return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
- def colorize_diff (self, f):
- lines = [None, None]
- for l in f:
- if l[0] not in self.symbols:
- yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
- continue
- i = self.symbols.index (l[0])
- if lines[i]:
- # Flush
- for line in self.colorize_lines (lines):
- yield line
- lines = [None, None]
- lines[i] = l[1:]
- if (all (lines)):
- # Flush
- for line in self.colorize_lines (lines):
- yield line
- lines = [None, None]
- if (any (lines)):
- # Flush
- for line in self.colorize_lines (lines):
- yield line
- class ZipDiffer:
- @staticmethod
- def diff_files (files, symbols=diff_symbols):
- files = tuple (files) # in case it's a generator, copy it
- try:
- for lines in izip_longest (*files):
- if all (lines[0] == line for line in lines[1:]):
- sys.stdout.writelines ([" ", lines[0]])
- continue
- for i, l in enumerate (lines):
- if l:
- sys.stdout.writelines ([symbols[i], l])
- except IOError as e:
- if e.errno != errno.EPIPE:
- print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
- sys.exit (1)
- class DiffFilters:
- @staticmethod
- def filter_failures (f):
- for key, lines in DiffHelpers.separate_test_cases (f):
- lines = list (lines)
- if not DiffHelpers.test_passed (lines):
- for l in lines: yield l
- class Stat:
- def __init__ (self):
- self.count = 0
- self.freq = 0
- def add (self, test):
- self.count += 1
- self.freq += test.freq
- class Stats:
- def __init__ (self):
- self.passed = Stat ()
- self.failed = Stat ()
- self.total = Stat ()
- def add (self, test):
- self.total.add (test)
- if test.passed:
- self.passed.add (test)
- else:
- self.failed.add (test)
- def mean (self):
- return float (self.passed.count) / self.total.count
- def variance (self):
- return (float (self.passed.count) / self.total.count) * \
- (float (self.failed.count) / self.total.count)
- def stddev (self):
- return self.variance () ** .5
- def zscore (self, population):
- """Calculate the standard score.
- Population is the Stats for population.
- Self is Stats for sample.
- Returns larger absolute value if sample is highly unlikely to be random.
- Anything outside of -3..+3 is very unlikely to be random.
- See: http://en.wikipedia.org/wiki/Standard_score"""
- return (self.mean () - population.mean ()) / population.stddev ()
- class DiffSinks:
- @staticmethod
- def print_stat (f):
- passed = 0
- failed = 0
- # XXX port to Stats, but that would really slow us down here
- for key, lines in DiffHelpers.separate_test_cases (f):
- if DiffHelpers.test_passed (lines):
- passed += 1
- else:
- failed += 1
- total = passed + failed
- print "%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)
- @staticmethod
- def print_ngrams (f, ns=(1,2,3)):
- gens = tuple (Ngram.generator (n) for n in ns)
- allstats = Stats ()
- allgrams = {}
- for key, lines in DiffHelpers.separate_test_cases (f):
- test = Test (lines)
- allstats.add (test)
- for gen in gens:
- for ngram in gen (test.unicodes):
- if ngram not in allgrams:
- allgrams[ngram] = Stats ()
- allgrams[ngram].add (test)
- importantgrams = {}
- for ngram, stats in allgrams.iteritems ():
- if stats.failed.count >= 30: # for statistical reasons
- importantgrams[ngram] = stats
- allgrams = importantgrams
- del importantgrams
- for ngram, stats in allgrams.iteritems ():
- print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram))
- class Test:
- def __init__ (self, lines):
- self.freq = 1
- self.passed = True
- self.identifier = None
- self.text = None
- self.unicodes = None
- self.glyphs = None
- for l in lines:
- symbol = l[0]
- if symbol != ' ':
- self.passed = False
- i = 1
- if ':' in l:
- i = l.index (':')
- if not self.identifier:
- self.identifier = l[1:i]
- i = i + 2 # Skip colon and space
- j = -1
- if l[j] == '\n':
- j -= 1
- brackets = l[i] + l[j]
- l = l[i+1:-2]
- if brackets == '()':
- self.text = l
- elif brackets == '<>':
- self.unicodes = Unicode.parse (l)
- elif brackets == '[]':
- # XXX we don't handle failed tests here
- self.glyphs = l
- class DiffHelpers:
- @staticmethod
- def separate_test_cases (f):
- '''Reads lines from f, and if the lines have identifiers, ie.
- have a colon character, groups them by identifier,
- yielding lists of all lines with the same identifier.'''
- def identifier (l):
- if ':' in l[1:]:
- return l[1:l.index (':')]
- return l
- return groupby (f, key=identifier)
- @staticmethod
- def test_passed (lines):
- lines = list (lines)
- # XXX This is a hack, but does the job for now.
- if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
- if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
- if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
- if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
- if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
- if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
- return all (l[0] == ' ' for l in lines)
- class FilterHelpers:
- @staticmethod
- def filter_printer_function (filter_callback):
- def printer (f):
- for line in filter_callback (f):
- print line
- return printer
- @staticmethod
- def filter_printer_function_no_newline (filter_callback):
- def printer (f):
- for line in filter_callback (f):
- sys.stdout.writelines ([line])
- return printer
- class Ngram:
- @staticmethod
- def generator (n):
- def gen (f):
- l = []
- for x in f:
- l.append (x)
- if len (l) == n:
- yield tuple (l)
- l[:1] = []
- gen.n = n
- return gen
- class UtilMains:
- @staticmethod
- def process_multiple_files (callback, mnemonic = "FILE"):
- if "--help" in sys.argv:
- print "Usage: %s %s..." % (sys.argv[0], mnemonic)
- sys.exit (1)
- try:
- files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
- for s in files:
- callback (FileHelpers.open_file_or_stdin (s))
- except IOError as e:
- if e.errno != errno.EPIPE:
- print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
- sys.exit (1)
- @staticmethod
- def process_multiple_args (callback, mnemonic):
- if len (sys.argv) == 1 or "--help" in sys.argv:
- print "Usage: %s %s..." % (sys.argv[0], mnemonic)
- sys.exit (1)
- try:
- for s in sys.argv[1:]:
- callback (s)
- except IOError as e:
- if e.errno != errno.EPIPE:
- print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
- sys.exit (1)
- @staticmethod
- def filter_multiple_strings_or_stdin (callback, mnemonic, \
- separator = " ", \
- concat_separator = False):
- if "--help" in sys.argv:
- print "Usage:\n %s %s...\nor:\n %s\n\nWhen called with no arguments, input is read from standard input." \
- % (sys.argv[0], mnemonic, sys.argv[0])
- sys.exit (1)
- try:
- if len (sys.argv) == 1:
- while (1):
- line = sys.stdin.readline ()
- if not len (line):
- break
- if line[-1] == '\n':
- line = line[:-1]
- print callback (line)
- else:
- args = sys.argv[1:]
- if concat_separator != False:
- args = [concat_separator.join (args)]
- print separator.join (callback (x) for x in (args))
- except IOError as e:
- if e.errno != errno.EPIPE:
- print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
- sys.exit (1)
- class Unicode:
- @staticmethod
- def decode (s):
- return u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8')
- @staticmethod
- def parse (s):
- s = re.sub (r"0[xX]", " ", s)
- s = re.sub (r"[<+>,;&#\\xXuU\n ]", " ", s)
- return [int (x, 16) for x in s.split (' ') if len (x)]
- @staticmethod
- def encode (s):
- return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8')
- shorthands = {
- "ZERO WIDTH NON-JOINER": "ZWNJ",
- "ZERO WIDTH JOINER": "ZWJ",
- "NARROW NO-BREAK SPACE": "NNBSP",
- "COMBINING GRAPHEME JOINER": "CGJ",
- "LEFT-TO-RIGHT MARK": "LRM",
- "RIGHT-TO-LEFT MARK": "RLM",
- "LEFT-TO-RIGHT EMBEDDING": "LRE",
- "RIGHT-TO-LEFT EMBEDDING": "RLE",
- "POP DIRECTIONAL FORMATTING": "PDF",
- "LEFT-TO-RIGHT OVERRIDE": "LRO",
- "RIGHT-TO-LEFT OVERRIDE": "RLO",
- }
- @staticmethod
- def pretty_name (u):
- try:
- s = unicodedata.name (u)
- except ValueError:
- return "XXX"
- s = re.sub (".* LETTER ", "", s)
- s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
- s = re.sub (".* SIGN ", "", s)
- s = re.sub (".* COMBINING ", "", s)
- if re.match (".* VIRAMA", s):
- s = "HALANT"
- if s in Unicode.shorthands:
- s = Unicode.shorthands[s]
- return s
- @staticmethod
- def pretty_names (s):
- s = re.sub (r"[<+>\\uU]", " ", s)
- s = re.sub (r"0[xX]", " ", s)
- s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
- return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
- class FileHelpers:
- @staticmethod
- def open_file_or_stdin (f):
- if f == '-':
- return sys.stdin
- return file (f)
- class Manifest:
- @staticmethod
- def read (s, strict = True):
- if not os.path.exists (s):
- if strict:
- print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], s)
- sys.exit (1)
- return
- s = os.path.normpath (s)
- if os.path.isdir (s):
- try:
- m = file (os.path.join (s, "MANIFEST"))
- items = [x.strip () for x in m.readlines ()]
- for f in items:
- for p in Manifest.read (os.path.join (s, f)):
- yield p
- except IOError:
- if strict:
- print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST"))
- sys.exit (1)
- return
- else:
- yield s
- @staticmethod
- def update_recursive (s):
- for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
- for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
- if f in dirnames:
- dirnames.remove (f)
- if f in filenames:
- filenames.remove (f)
- dirnames.sort ()
- filenames.sort ()
- ms = os.path.join (dirpath, "MANIFEST")
- print " GEN %s" % ms
- m = open (ms, "w")
- for f in filenames:
- print >> m, f
- for f in dirnames:
- print >> m, f
- for f in dirnames:
- Manifest.update_recursive (os.path.join (dirpath, f))
- if __name__ == '__main__':
- pass