tegaki-python-0.3.1/0000755000175000017500000000000011352066723014155 5ustar mathieumathieutegaki-python-0.3.1/tests/0000755000175000017500000000000011352066723015317 5ustar mathieumathieutegaki-python-0.3.1/tests/runtests.py0000755000175000017500000000312511342122457017560 0ustar mathieumathieu#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2008 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import glob import os import sys import unittest import doctest import tegaki currdir = os.path.dirname(os.path.abspath(__file__)) parentdir = os.path.join(currdir, "..") os.chdir(currdir) sys.path = sys.path + [parentdir] def gettestnames(): return [name[:-3] for name in glob.glob('test_*.py')] # Run doctests for attr in dir(tegaki): attr = getattr(tegaki, attr) if type(attr) == type(tegaki): if hasattr(attr, "__doctest__") and attr.__doctest__: doctest.testmod(attr) # Run unittests suite = unittest.TestSuite() loader = unittest.TestLoader() for name in gettestnames(): suite.addTest(loader.loadTestsFromName(name)) testRunner = unittest.TextTestRunner(verbosity=1) testRunner.run(suite) tegaki-python-0.3.1/tests/myfile0000644000175000017500000000020611342122457016521 0ustar mathieumathieu 1000 1000 tegaki-python-0.3.1/tests/test_arrayutils.py0000644000175000017500000000524211342122457021126 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2008 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import unittest from tegaki.arrayutils import * class ArrayTest(unittest.TestCase): def testArrayFlatten(self): for arr, expected in ( ([[1,2], [3,4]], [1, 2, 3, 4]), ([[]], []) ): self.assertEquals(array_flatten(arr), expected) def testArrayReshape(self): for arr, expected, n in ( ([1, 2, 3, 4], [[1,2], [3,4]], 2), ([], [], 2), ([1, 2, 3], [[1, 2]], 2) # expected 4 values ): self.assertEquals(array_reshape(arr, n), expected) def testArraySplit(self): arr = [[1,2], [3,4], [5,6], [7,8], [9, 10], [11, 12]] expected = [ [[1,2],[3,4]], [[5,6],[7, 8]], [[9,10],[11,12]] ] self.assertEquals(array_split(arr, 3), expected) def testArrayMean(self): arr = [1, 2, 3, 4] expected = 2.5 self.assertEquals(array_mean(arr), expected) def testArrayVariance(self): arr = [1, 2, 3, 4] expected = 1.25 self.assertEquals(array_variance(arr), expected) def testArrayMeanVector(self): arr = [ [1,2], [3,4] ] expected = [2, 3] self.assertEquals(array_mean_vector(arr), expected) def testArrayVarianceVector(self): arr = [ [1,2], [3,4] ] expected = [1.0, 1.0] self.assertEquals(array_variance_vector(arr), expected) def testArrayAdd(self): arr1 = [1,2] arr2 = [3,4] expected = [4, 6] self.assertEquals(array_add(arr1, arr2), expected) def testArrayMul(self): arr1 = [1,2] arr2 = [3,4] expected = [3, 8] self.assertEquals(array_mul(arr1, arr2), expected) tegaki-python-0.3.1/tests/test_charcol.py0000644000175000017500000003004611342122457020342 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2009 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import unittest import os import sys import StringIO from tegaki.character import Point, Stroke, Writing, Character from tegaki.charcol import CharacterCollection class CharacterCollectionTest(unittest.TestCase): def setUp(self): self.currdir = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(self.currdir, "data", "collection", "test.charcol") self.cc = CharacterCollection() self.cc.read(path) f = os.path.join(self.currdir, "data", "character.xml") self.c = Character() self.c.read(f) def testValidate(self): path = os.path.join(self.currdir, "data", "collection", "test.charcol") f = open(path) buf = f.read() f.close() invalid = \ """ """ malformed = \ """ """ try: self.assertTrue(CharacterCollection.validate(buf)) self.assertFalse(CharacterCollection.validate(invalid)) self.assertFalse(CharacterCollection.validate(malformed)) except NotImplementedError: sys.stderr.write("lxml missing!\n") pass def _testReadXML(self, charcol): self.assertEquals(charcol.get_set_list(), ["一", "三", "二", "四"]) c = {} for k in ["19968_1", "19968_2", "19968_3", "19977_1", "19977_2", "20108_1"]: c[k] = Character() c[k].read(os.path.join(self.currdir, "data", "collection", k + ".xml")) self.assertEquals(charcol.get_characters("一"), [c["19968_1"], c["19968_2"], c["19968_3"]]) self.assertEquals(charcol.get_characters("三"), [c["19977_1"], c["19977_2"]]) self.assertEquals(charcol.get_characters("二"), [c["20108_1"]]) self.assertEquals(charcol.get_characters("四"), []) self.assertEquals(charcol.get_all_characters(), [c["19968_1"], c["19968_2"], c["19968_3"], c["19977_1"], c["19977_2"], c["20108_1"]]) def testReadXMLFile(self): self._testReadXML(self.cc) def testToXML(self): charcol2 = CharacterCollection() charcol2.read_string(self.cc.to_xml()) self.assertEquals(self.cc.get_set_list(), charcol2.get_set_list()) self.assertEquals(self.cc.get_all_characters(), charcol2.get_all_characters()) def testWriteGzipString(self): charcol2 = CharacterCollection() charcol2.read_string(self.cc.write_string(gzip=True), gzip=True) self.assertEquals(self.cc.get_set_list(), charcol2.get_set_list()) self.assertEquals(self.cc.get_all_characters(), charcol2.get_all_characters()) def testWriteBz2String(self): charcol2 = CharacterCollection() charcol2.read_string(self.cc.write_string(bz2=True), bz2=True) self.assertEquals(self.cc.get_set_list(), charcol2.get_set_list()) self.assertEquals(self.cc.get_all_characters(), charcol2.get_all_characters()) def testAddSame(self): path = os.path.join(self.currdir, "data", "collection", "test.charcol") charcol = CharacterCollection() charcol.read(path) charcol2 = CharacterCollection() charcol2.read(path) charcol3 = charcol.concatenate(charcol2, check_duplicate=True) self.assertEquals(charcol3.get_set_list(), ["一", "三", "二", "四"]) self.assertEquals(len(charcol3.get_characters("一")), 3) self.assertEquals(len(charcol3.get_characters("三")), 2) self.assertEquals(len(charcol3.get_characters("二")), 1) self.assertEquals(len(charcol3.get_characters("四")), 0) def testGetChars(self): all_ = self.cc.get_characters("一") self.assertEquals(self.cc.get_characters("一", limit=2), all_[0:2]) self.assertEquals(self.cc.get_characters("一", offset=2), all_[2:]) self.assertEquals(self.cc.get_characters("一", limit=1, offset=1), all_[1:2]) def testAdd(self): path = os.path.join(self.currdir, "data", "collection", "test.charcol") charcol = CharacterCollection() charcol.read(path) path2 = os.path.join(self.currdir, "data", "collection", "test2.charcol") charcol2 = CharacterCollection() charcol2.read(path2) charcol3 = charcol + charcol2 self.assertEquals(charcol3.get_set_list(), ["一", "三", "二", "四", "a", "b", "c", "d"]) self.assertEquals(len(charcol3.get_characters("一")), 3) self.assertEquals(len(charcol3.get_characters("三")), 2) self.assertEquals(len(charcol3.get_characters("二")), 1) self.assertEquals(len(charcol3.get_characters("四")), 0) self.assertEquals(len(charcol3.get_characters("a")), 3) self.assertEquals(len(charcol3.get_characters("b")), 2) self.assertEquals(len(charcol3.get_characters("c")), 1) self.assertEquals(len(charcol3.get_characters("d")), 0) def testFromCharDirRecursive(self): directory = os.path.join(self.currdir, "data") charcol = CharacterCollection.from_character_directory(directory, check_duplicate=True) self.assertEquals(charcol.get_set_list(), ["防", "三", "一", "二"]) self.assertEquals(len(charcol.get_characters("一")), 3) self.assertEquals(len(charcol.get_characters("三")), 2) self.assertEquals(len(charcol.get_characters("二")), 1) self.assertEquals(len(charcol.get_characters("防")), 1) def testFromCharDirNotRecursive(self): directory = os.path.join(self.currdir, "data") charcol = CharacterCollection.from_character_directory(directory, recursive=False, check_duplicate=True) self.assertEquals(charcol.get_set_list(), ["防"]) self.assertEquals(len(charcol.get_characters("防")), 1) def testIncludeChars(self): self.cc.include_characters_from_text("一三") self.assertEquals(self.cc.get_set_list(), ["一", "三"]) def testExcludeChars(self): self.cc.exclude_characters_from_text("三") self.assertEquals(self.cc.get_set_list(), ["一", "二"]) def testProxy(self): char = self.cc.get_all_characters()[0] writing = char.get_writing() writing.normalize() strokes = writing.get_strokes(full=True) stroke = strokes[0] stroke.smooth() p = stroke[0] p.x = 10 char2 = self.cc.get_all_characters()[0] self.assertEquals(char, char2) def testNoProxy(self): self.cc.WRITE_BACK = False char = self.cc.get_all_characters()[0] writing = char.get_writing() writing.normalize() strokes = writing.get_strokes(full=True) stroke = strokes[0] stroke.smooth() p = stroke[0] p.x = 10 char2 = self.cc.get_all_characters()[0] self.assertNotEqual(char, char2) # manually update the object self.cc.update_character_object(char) char2 = self.cc.get_all_characters()[0] self.assertEquals(char, char2) def testAddSet(self): self.cc.add_set("toto") self.assertEquals(self.cc.get_set_list()[-1], "toto") def testRemoveSet(self): before = self.cc.get_set_list() self.cc.remove_set(before[-1]) after = self.cc.get_set_list() self.assertEquals(len(before)-1, len(after)) self.assertEquals(before[0:-1], after) def testGetNSets(self): self.assertEquals(len(self.cc.get_set_list()), self.cc.get_n_sets()) self.assertEquals(4, self.cc.get_n_sets()) def testGetTotalNCharacters(self): self.assertEquals(len(self.cc.get_all_characters()), self.cc.get_total_n_characters()) self.assertEquals(6, self.cc.get_total_n_characters()) def testGetNCharacters(self): for set_name in self.cc.get_set_list(): self.assertEquals(len(self.cc.get_characters(set_name)), self.cc.get_n_characters(set_name)) self.assertEquals(self.cc.get_n_characters("一"), 3) self.assertEquals(self.cc.get_n_characters("三"), 2) self.assertEquals(self.cc.get_n_characters("二"), 1) def testSetCharacters(self): before = self.cc.get_characters("一")[0:2] self.cc.set_characters("一", before) after = self.cc.get_characters("一") self.assertEquals(before, after) def testAppendCharacter(self): len_before = len(self.cc.get_characters("一")) self.cc.append_character("一", self.c) len_after = len(self.cc.get_characters("一")) self.assertEquals(len_before + 1, len_after) def testInsertCharacter(self): before = self.cc.get_characters("一")[0] len_before = len(self.cc.get_characters("一")) self.cc.insert_character("一", 0, self.c) after = self.cc.get_characters("一")[0] self.assertNotEqual(before, after) len_after = len(self.cc.get_characters("一")) self.assertEqual(len_before+1, len_after) def testReplaceCharacter(self): before = self.cc.get_characters("一")[0] len_before = len(self.cc.get_characters("一")) self.cc.replace_character("一", 0, self.c) after = self.cc.get_characters("一")[0] self.assertNotEqual(before, after) len_after = len(self.cc.get_characters("一")) self.assertEqual(len_before, len_after) def testRemoveCharacter(self): before = self.cc.get_characters("一")[0] len_before = len(self.cc.get_characters("一")) self.cc.remove_character("一", 0) after = self.cc.get_characters("一")[0] self.assertNotEqual(before, after) len_after = len(self.cc.get_characters("一")) self.assertEqual(len_before-1, len_after) def testRemoveLastCharacter(self): before = self.cc.get_characters("一")[-1] len_before = len(self.cc.get_characters("一")) self.cc.remove_last_character("一") after = self.cc.get_characters("一")[-1] self.assertNotEqual(before, after) len_after = len(self.cc.get_characters("一")) self.assertEqual(len_before-1, len_after) def testRemoveSamples(self): self.cc.remove_samples(keep_at_most=2) self.assertEquals(self.cc.get_n_characters("一"), 2) self.assertEquals(self.cc.get_n_characters("三"), 2) self.assertEquals(self.cc.get_n_characters("二"), 1) self.cc.remove_samples(keep_at_most=1) self.assertEquals(self.cc.get_n_characters("一"), 1) self.assertEquals(self.cc.get_n_characters("三"), 1) self.assertEquals(self.cc.get_n_characters("二"), 1) def testRemoveEmptySets(self): self.cc.remove_empty_sets() self.assertEquals(self.cc.get_set_list(), ["一", "三", "二"]) tegaki-python-0.3.1/tests/minjson.py0000644000175000017500000003471611342122457017355 0ustar mathieumathieu############################################################################## # # Copyright (c) 2006 Zope Corporation and Contributors. # All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE. # ############################################################################## # minjson.py # reads minimal javascript objects. # str's objects and fixes the text to write javascript. #UNICODE USAGE: Minjson tries hard to accommodate naive usage in a #"Do what I mean" manner. Real applications should handle unicode separately. # The "right" way to use minjson in an application is to provide minjson a # python unicode string for reading and accept a unicode output from minjson's # writing. That way, the assumptions for unicode are yours and not minjson's. # That said, the minjson code has some (optional) unicode handling that you # may look at as a model for the unicode handling your application may need. # Thanks to Patrick Logan for starting the json-py project and making so many # good test cases. # Additional thanks to Balazs Ree for replacing the writing module. # Jim Washington 6 Dec 2006. # 2006-12-06 Thanks to Koen van de Sande, now handles the case where someone # might want e.g., a literal "\n" in text not a new-line. # 2005-12-30 writing now traverses the object tree instead of relying on # str() or unicode() # 2005-10-10 on reading, looks for \\uxxxx and replaces with u'\uxxxx' # 2005-10-09 now tries hard to make all strings unicode when reading. # 2005-10-07 got rid of eval() completely, makes object as found by the # tokenizer. # 2005-09-06 imported parsing constants from tokenize; they changed a bit from # python2.3 to 2.4 # 2005-08-22 replaced the read sanity code # 2005-08-21 Search for exploits on eval() yielded more default bad operators. # 2005-08-18 Added optional code from Koen van de Sande to escape # outgoing unicode chars above 128 from re import compile, sub, search, DOTALL from token import ENDMARKER, NAME, NUMBER, STRING, OP, ERRORTOKEN from tokenize import tokenize, TokenError, NL #Usually, utf-8 will work, set this to utf-16 if you dare. emergencyEncoding = 'utf-8' class ReadException(Exception): pass class WriteException(Exception): pass ################################# # read JSON object # ################################# slashstarcomment = compile(r'/\*.*?\*/',DOTALL) doubleslashcomment = compile(r'//.*\n') unichrRE = compile(r"\\u[0-9a-fA-F]{4,4}") def unichrReplace(match): return unichr(int(match.group()[2:],16)) escapeStrs = (('\n',r'\n'),('\b',r'\b'), ('\f',r'\f'),('\t',r'\t'),('\r',r'\r'), ('"',r'\"') ) class DictToken: __slots__=[] pass class ListToken: __slots__=[] pass class ColonToken: __slots__=[] pass class CommaToken: __slots__=[] pass class JSONReader(object): """raise SyntaxError if it is not JSON, and make the object available""" def __init__(self,data): self.stop = False #make an iterator of data so that next() works in tokenize. self._data = iter([data]) self.lastOp = None self.objects = [] self.tokenize() def tokenize(self): try: tokenize(self._data.next,self.readTokens) except TokenError: raise SyntaxError def resolveList(self): #check for empty list if isinstance(self.objects[-1],ListToken): self.objects[-1] = [] return theList = [] commaCount = 0 try: item = self.objects.pop() except IndexError: raise SyntaxError while not isinstance(item,ListToken): if isinstance(item,CommaToken): commaCount += 1 else: theList.append(item) try: item = self.objects.pop() except IndexError: raise SyntaxError if not commaCount == (len(theList) -1): raise SyntaxError theList.reverse() item = theList self.objects.append(item) def resolveDict(self): theList = [] #check for empty dict if isinstance(self.objects[-1], DictToken): self.objects[-1] = {} return #not empty; must have at least three values try: #value (we're going backwards!) value = self.objects.pop() except IndexError: raise SyntaxError try: #colon colon = self.objects.pop() if not isinstance(colon, ColonToken): raise SyntaxError except IndexError: raise SyntaxError try: #key key = self.objects.pop() if not isinstance(key,basestring): raise SyntaxError except IndexError: raise SyntaxError #salt the while comma = value while not isinstance(comma,DictToken): # store the value theList.append((key,value)) #do it again... try: #might be a comma comma = self.objects.pop() except IndexError: raise SyntaxError if isinstance(comma,CommaToken): #if it's a comma, get the values try: value = self.objects.pop() except IndexError: #print self.objects raise SyntaxError try: colon = self.objects.pop() if not isinstance(colon, ColonToken): raise SyntaxError except IndexError: raise SyntaxError try: key = self.objects.pop() if not isinstance(key,basestring): raise SyntaxError except IndexError: raise SyntaxError theDict = {} for k in theList: theDict[k[0]] = k[1] self.objects.append(theDict) def readTokens(self,type, token, (srow, scol), (erow, ecol), line): # UPPERCASE consts from tokens.py or tokenize.py if type == OP: if token not in "[{}],:-": raise SyntaxError else: self.lastOp = token if token == '[': self.objects.append(ListToken()) elif token == '{': self.objects.append(DictToken()) elif token == ']': self.resolveList() elif token == '}': self.resolveDict() elif token == ':': self.objects.append(ColonToken()) elif token == ',': self.objects.append(CommaToken()) elif type == STRING: tok = token[1:-1] parts = tok.split("\\\\") for k in escapeStrs: if k[1] in tok: parts = [part.replace(k[1],k[0]) for part in parts] self.objects.append("\\".join(parts)) elif type == NUMBER: if self.lastOp == '-': factor = -1 else: factor = 1 try: self.objects.append(factor * int(token)) except ValueError: self.objects.append(factor * float(token)) elif type == NAME: try: self.objects.append({'true':True, 'false':False,'null':None}[token]) except KeyError: raise SyntaxError elif type == ENDMARKER: pass elif type == NL: pass elif type == ERRORTOKEN: if ecol == len(line): #it's a char at the end of the line. (mostly) harmless. pass else: raise SyntaxError else: raise SyntaxError def output(self): try: assert len(self.objects) == 1 except AssertionError: raise SyntaxError return self.objects[0] def safeRead(aString, encoding=None): """read the js, first sanitizing a bit and removing any c-style comments If the input is a unicode string, great. That's preferred. If the input is a byte string, strings in the object will be produced as unicode anyway. """ # get rid of trailing null. Konqueror appends this. CHR0 = chr(0) while aString.endswith(CHR0): aString = aString[:-1] # strip leading and trailing whitespace aString = aString.strip() # zap /* ... */ comments aString = slashstarcomment.sub('',aString) # zap // comments aString = doubleslashcomment.sub('',aString) # detect and handle \\u unicode characters. Note: This has the side effect # of converting the entire string to unicode. This is probably OK. unicodechars = unichrRE.search(aString) if unicodechars: aString = unichrRE.sub(unichrReplace, aString) #if it's already unicode, we won't try to decode it if isinstance(aString, unicode): s = aString else: if encoding: # note: no "try" here. the encoding provided must work for the # incoming byte string. UnicodeDecode error will be raised # in that case. Often, it will be best not to provide the encoding # and allow the default s = unicode(aString, encoding) #print "decoded %s from %s" % (s,encoding) else: # let's try to decode to unicode in system default encoding try: s = unicode(aString) #import sys #print "decoded %s from %s" % (s,sys.getdefaultencoding()) except UnicodeDecodeError: # last choice: handle as emergencyEncoding enc = emergencyEncoding s = unicode(aString, enc) #print "%s decoded from %s" % (s, enc) # parse and get the object. try: data = JSONReader(s).output() except SyntaxError: raise ReadException, 'Unacceptable JSON expression: %s' % aString return data read = safeRead ################################# # write object as JSON # ################################# import re, codecs from cStringIO import StringIO ### Codec error handler def jsonreplace_handler(exc): '''Error handler for json If encoding fails, \\uxxxx must be emitted. This is similar to the "backshashreplace" handler, only that we never emit \\xnn since this is not legal according to the JSON syntax specs. ''' if isinstance(exc, UnicodeEncodeError): part = exc.object[exc.start] # repr(part) will convert u'\unnnn' to u'u\\nnnn' return u'\\u%04x' % ord(part), exc.start+1 else: raise exc # register the error handler codecs.register_error('jsonreplace', jsonreplace_handler) ### Writer def write(input, encoding='utf-8', outputEncoding=None): writer = JsonWriter(input_encoding=encoding, output_encoding=outputEncoding) writer.write(input) return writer.getvalue() re_strmangle = re.compile('"|\b|\f|\n|\r|\t|\\\\') def func_strmangle(match): return { '"': '\\"', '\b': '\\b', '\f': '\\f', '\n': '\\n', '\r': '\\r', '\t': '\\t', '\\': '\\\\', }[match.group(0)] def strmangle(text): return re_strmangle.sub(func_strmangle, text) class JsonStream(object): def __init__(self): self.buf = [] def write(self, text): self.buf.append(text) def getvalue(self): return ''.join(self.buf) class JsonWriter(object): def __init__(self, stream=None, input_encoding='utf-8', output_encoding=None): ''' - stream is optional, if specified must also give output_encoding - The input strings can be unicode or in input_encoding - output_encoding is optional, if omitted, result will be unicode ''' if stream is not None: if output_encoding is None: raise WriteException, 'If a stream is given, output encoding must also be provided' else: stream = JsonStream() self.stream = stream self.input_encoding = input_encoding self.output_encoding = output_encoding def write(self, obj): if isinstance(obj, (list, tuple)): self.stream.write('[') first = True for elem in obj: if first: first = False else: self.stream.write(',') self.write(elem) self.stream.write(']'), elif isinstance(obj, dict): self.stream.write('{') first = True for key, value in obj.iteritems(): if first: first = False else: self.stream.write(',') self.write(key) self.stream.write(':') self.write(value) self.stream.write('}') elif obj is True: self.stream.write('true') elif obj is False: self.stream.write('false') elif obj is None: self.stream.write('null') elif not isinstance(obj, basestring): # if we are not baseobj, convert to it try: obj = str(obj) except Exception, exc: raise WriteException, 'Cannot write object (%s: %s)' % (exc.__class__, exc) self.stream.write(obj) else: # convert to unicode first if not isinstance(obj, unicode): try: obj = unicode(obj, self.input_encoding) except (UnicodeDecodeError, UnicodeTranslateError): obj = unicode(obj, 'utf-8', 'replace') # do the mangling obj = strmangle(obj) # make the encoding if self.output_encoding is not None: obj = obj.encode(self.output_encoding, 'jsonreplace') self.stream.write('"') self.stream.write(obj) self.stream.write('"') def getvalue(self): return self.stream.getvalue() tegaki-python-0.3.1/tests/test_arrayutils.pyc0000644000175000017500000000663411342122457021277 0ustar mathieumathieu gJc@s3ddkZddkTdeifdYZdS(iN(t*t ArrayTestcBsYeZdZdZdZdZdZdZdZdZ dZ RS( cCsdx]ddgddggddddgfgggffD]"\}}|it||q:WdS(Niiii(t assertEqualst array_flatten(tselftarrtexpected((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyttestArrayFlattens ' cCsxddddgddgddggdfggdfdddgddggdffD](\}}}|it|||q[WdS(Niiii(Rt array_reshape(RRRtn((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyttestArrayReshape%s * "cCsddgddgddgddgd d gd d gg}ddgddggddgddggd d gd d ggg}|it|d|dS( Niiiiiiiii i i i (Rt array_split(RRR((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyttestArraySplit/s<EcCs2ddddg}d}|it||dS(Niiiig@(Rt array_mean(RRR((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyt testArrayMean5scCs2ddddg}d}|it||dS(Niiiig?(Rtarray_variance(RRR((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyttestArrayVariance;scCs>ddgddgg}ddg}|it||dS(Niiii(Rtarray_mean_vector(RRR((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyttestArrayMeanVectorAs cCs>ddgddgg}ddg}|it||dS(Niiiig?(Rtarray_variance_vector(RRR((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyttestArrayVarianceVectorGs cCsAddg}ddg}ddg}|it|||dS(Niiiii(Rt array_add(Rtarr1tarr2R((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyt testArrayAddMs   cCsAddg}ddg}ddg}|it|||dS(Niiiii(Rt array_mul(RRRR((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyt testArrayMulTs   ( t__name__t __module__RR R RRRRRR(((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyRs       (tunittestttegaki.arrayutilstTestCaseR(((sI/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_arrayutils.pyss  tegaki-python-0.3.1/tests/mynewfile.xml.bz20000644000175000017500000000020511342122457020525 0ustar mathieumathieuBZh91AY&SY~߀PG. u T#@ JPѴBz#L!mhFC2gd?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddddddddddddddddddddddddddddddddddddddddddddddddddddddgyddddddddddddddddddgdddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddg=ddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYgSdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~ddddddddddddddddddddddddddddddddddddddddddddddddddddgYg|_dS(Niiiiiiiiiiiiiiiiiiii%i"i(i,i/i2i6i9i<i@iCiFiJiMiTiZi^idihiniuixi|iiiii iiiiiiiiiiiiiPiiiiiiiiiiiiiiiiiiiiiii iiiiii"i)i,i0i3i6i=i@iDiGiJiiNiQiTiXi[iiiiWiaikiriiiiiiii:i^ieihiliriyiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii i&ibi|iiiiiivioii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(i%i"(i(i"(i,i"(i/i"(i2i"(i6i"(i9i"(i<i%(i@i%(i@i((i@i,(i<i/(i<i2(i<i6(i<i9(i9i<(i9i@(i6iC(i6iF(i2iJ(i2iM(i/iT(i,iZ(i,i^(i,id(i(ih(i%in(i"in(iiu(iix(ii|(ii(ii(ii(ii(ii(i i(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(i"i(i(i(i,i(i2i(i6i(i9i(i<i(i<i(i@i(iCi(iFi(iFi(iJi(iMi(iMi(iPi(iPi(iPi(iPi(iPi(iPi(iPi(iPi(iPi(iPi(iPi(iPi(iMi(iMi(iMi(iJi(iJi(iFi(iFi(iFi(iCi(iCi(i@i(i<i(i<i (i9i(i6i(i2i(i2i(i/i(i,i(i,i(i(i"(i%i"(i"i)(ii,(ii,(ii0(ii3(ii6(i i6(ii=(ii=(ii@(ii@(iiD(iiG(iiG(iiJ(iiJ(iiN(iiN(iiQ(iiT(iiT(iiT(iiX(iiX(iiX(ii[(ii[(ii[(ii(ii(ii(ii(ii"(ii%(ii((ii,(ii,(ii2(ii6(ii<(ii@(iiC(iiF(iiM(iiP(iiT(iiW(iiZ(iia(iid(iih(iik(iik(iir(iiu(iix(ii|(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii"(ii)(ii,(ii0(ii6(ii:(ii=(ii@(iiD(iiG(iiN(iiQ(iiT(ii[(ii^(iie(iih(iil(iir(iiy(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii (ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii (ii(ii(iri(iui(ixi(i|i(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(i i(ii(ii(ii(ii(i"i(i&i(iGi(iJi(iQi(iTi(iXi(i^i(ibi(ihi(ili(iri(iyi(i|i(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(iir(iiu(iiu(i"iu(i&iu(i:i|(i=i|(iDi|(iXi(i[i(iei(ihi(iri(ivi(i|i(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii"(ii)(ii,(ii3(ii6(ii:(ii=(ii@(iiG(iiJ(iiN(iiQ(iiT(iiX(i|i[(i|i^(iyi^(iyib(ivib(irib(ioib(ioie(ilie(ihie(ieie(ibie(i^ie(ii%(ii((ii,(ii/(ii2(ii6(ii<(ii@(iiC(iiJ(iiP(iiW(iiZ(iia(iik(iin(iiu(iix(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii&(ii)(ii0(ii3(ii6(ii@(iiD(iiJ(iiN(ii[(ii^(iie(iil(iir(ii|(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(ii(tostpathtdirnametabspatht__file__tcurrdirtstrokes(tself((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pytsetUp!s! cCs9|i|id|i|i|iidS(Ns防(t assertEqualstget_utf8R t get_writingt get_strokes(R tchar((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyt _testReadXMLusc CsGtii|idd}x ||d|ddfD]}t|}|o|i|ntti d}g}|D]\}}||q}~}x|D]}z-|i |t|} |i || Wdti |Xz*| i t|} |i || Wdti |XqWq6Wt}|i t|i dS( Ntdatas character.xmls.gzips.bz2s.xmls.xml.gzs.xml.bz2(s.xmls.xml.gzs.xml.bz2(RRtjoinR tNoneRRtmapttempfiletmkstemptsaveRtunlinkt assertRaisest ValueError( R tfile_tfRtfilest_[1]tfdRt output_pathstnewchartnewchar2((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestConstructorAndSave{s, '     cCsBtii|idd}t}|i||i|dS(NRs character.xml(RRRR RtreadR(R tfileR((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestReadXMLFiles  cCsHtii|idd}t}|i|dt|i|dS(NRscharacter.xml.gziptgzip(RRRR RR'tTrueR(R R(R((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestReadXMLGzipFiles cCsHtii|idd}t}|i|dt|i|dS(NRscharacter.xml.bz2tbz2(RRRR RR'R+R(R R(R((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestReadXMLBZ2Files cCsdtii|idd}t|}|i}|it}|i||i |dS(NRs character.xml( RRRR topenR'tcloseRt read_stringR(R R(RtbufR((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestReadXMLStrings     cCsjtii|idd}t|}|i}|it}|i|dt |i |dS(NRscharacter.xml.gzipR*( RRRR R/R'R0RR1R+R(R R(tstringR((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestReadXMLGzipStrings    cCsjtii|idd}t|}|i}|it}|i|dt |i |dS(NRscharacter.xml.bz2R-( RRRR R/R'R0RR1R+R(R R(R4R((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestReadXMLBZ2Strings    cCs(t}d|_d|_d|_|S(Niii(Rtxtyt timestamp(R tpoint((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyt _getPoints     cCs&|i}|i|iddS(Ns#(R;Rtto_xml(R R:((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestPointToXMLs cCsJ|i}|iti|ihdd<dd<dd<dS(Niuyiu timestampiux(R;RtminjsonR'tto_json(R R:((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestPointToJSONs cCsot}d|_d|_d|_t}d|_d|_d|_t}|i||i||S(Niiiiig?(RR7R8R9tpressureRt append_point(R R:tpoint2tstroke((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyt _getStrokes           cCs,|i}d}|i||idS(Ns_ (RERR<(R RDtexpected((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestStrokeToXMLs cCsz|i}hhdd<dd<ddR'R?(R RDRF((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestStrokeToJSONs /cCst}d|_d|_d|_t}d|_d|_d|_t}|i||i|t}|i||S(Niiiiig?( RR7R8R9RARRBRt append_stroke(R R:RCRDtwriting((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyt _getWritings             cCs,|i}d}|i||idS(Ns1000 1000 (RKRR<(R RJRF((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestWritingToXMLs c Cs|i}hdd<dd<hhdd<dd<dd R'R?(R RJRF((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestWritingToJSON)s OcCs3|i}t}|i||id|S(NtA(RKRt set_writingtset_utf8(R RJR((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyt _getCharacter1s     cCsU|i}ti}|i|t}|i|i|i||dS(N(RQtStringIOtwriteRR1tgetvalueR(R Rtiotnew_char((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestWriteXMLFile:s     c Cs|i}hdd<hdd<dd<hhdd<dd <d d R'R?(R RRF((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestCharacterToJSONEs  6cCst}|idd|idd|idd|idd|idd|idd|idd|idd|id d |i}d d d d gddgdddgg}|i||dS(Niiiiiiiii(ii(ii(ii(ii(ii(ii(ii(ii(ii(Rtmove_totline_toRR(R RJR RF((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestNewWritingOs   cCs)t}d|_d|_d|_t}d|_d|_d|_t}|i||i|t}d|_d|_d|_t}d|_d|_d|_t}|i||i||i|idt}|i ||i ||i|iddS( Niiiiiii i( RR7R8R9RRBRt get_durationRRI(R R:RCRDtpoint3tpoint4tstroke2RJ((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyt testDurationes6                         cCsitdddd}tdddd}tdddd}|i||j|i||jdS(NR7iR8ii(Rt assertTruet assertFalse(R tp1tp2tp3((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestPointEqualitys cCs?tdddd}|i|dj|i|djdS(NR7iR8i(RRbRRa(R Rc((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestPointEqualityNonescCs8tdddd}|i}|i||jdS(NR7iR8i(RtcopyRa(R RcRd((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyt testPointCopys cCst}|itdddd|itddddt}|itdddd|itddddt}|itdddd|itdddd|i||j|i||jdS(NR7iR8iii(RRBRRaRb(R ts1ts2ts3((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestStrokeEqualitys   cCskt}|itdddd|itdddd|i|dj|i|djdS(NR7iR8ii(RRBRRbRRa(R Rj((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestStrokeEqualityNones  cCsdt}|itdddd|itdddd|i}|i||jdS(NR7iR8ii(RRBRRhRa(R RjRk((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestStrokeCopys   cCst}|itdddd|itddddt}|itdddd|itddddt}|i||i|t}|itdddd|itddddt}|itdddd|itddddt}|i||i|t}|itdddd|itddddt}|itdddd|itddddt}|i||i||i|||i||dS(NR7iR8iii(RRBRRRIRtassertNotEqual(R RjRktw1tw2tw3((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestWritingEqualitys:               cCst}|itdddd|itddddt}|itdddd|itddddt}|i||i||i|dj|i|djdS(NR7iR8ii(RRBRRRIRaRRb(R RjRkRq((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestWritingEqualityNones     cCs3t}|i|dj|i|djdS(N(RRaRRb(R tc((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestCharacterEqualityNones cCst}|itdddd|itddddt}|itdddd|itddddt}|i||i||i}|i||jdS(NR7iR8ii(RRBRRRIRhRa(R RjRkRqRr((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestWritingCopys      cCs&|i}|i|iddS(Ni(RKRt get_n_points(R RJ((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestGetNPoints s cCst}|itdddd|itddddt}|itdddd|itddddt}|i||i||id|i|iddggdS( NR7iR8iii(ii(ii(RRBRRRIt remove_strokeRR(R RjRktw((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestRemoveStrokes      cCs+t}|itdddd|itddddt}|itdddd|itddddt}|i||i|t}|itdddd|itdddd|id ||i|id d gd d gddggdS(NR7iR8iiii!i,i(ii(ii(ii!(i!i,(ii(ii(RRBRRRIt insert_strokeRR(R RjRkR|Rl((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestInsertStroke!s      !cCs"t}|itdddd|itddddt}|itdddd|itddddt}|i||i|t}|itdddd|itdddd|id ||i|id d gd d ggdS(NR7iR8iiii!i,i(ii(ii(ii!(i!i,(RRBRRRItreplace_strokeRR(R RjRkR|Rl((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestReplaceStroke7s      cCset}|itdddd|itdddd|i|it|ddS(NR7iR8iii(RRBRtclearRtlen(R Rj((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyttestClearStrokeKs   cCstii|idd}t|}|i}|id}d}yF|iti ||i ti ||i ti |Wn#t j ot i idnXdS(NRs character.xmls s slxml missing! (RRRR R/R'R0RaRtvalidateRbtNotImplementedErrortsyststderrRS(R RRR2tinvalidt malformed((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyt testValidateSs    cCstii|idd}t}|i|ttii|idd}|ii}|i|i |i |dS(NRs character.xmlscharacter.sexp( RRRR RR'R/tstripR0Rtto_sexp(R RRtsexp((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyt testToSexpxs  ! (*t__name__t __module__R RR&R)R,R.R3R5R6R;R=R@RERGRHRKRLRMRQRWRXR[R`RfRgRiRmRnRoRtRuRwRxRzR}RRRRR(((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyRsP T  "             %      (         %( tunittestRRRRR>Rttegaki.characterRRRRtTestCaseR(((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_character.pyss      "tegaki-python-0.3.1/tests/test_mathutils.pyc0000644000175000017500000000167211342122457021107 0ustar mathieumathieu gJc@s3ddkZddkTdeifdYZdS(iN(t*tMathTestcBseZdZRS(cCs[xTd d dfd d dffD]4\}}}tt||}|i||qWdS( Nii i iig@ig(ii i (ii i(ii(ii(troundteuclidean_distancet assertEquals(tselftv1tv2texpectedtres((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_mathutils.pyttestEuclideanDistances (t__name__t __module__R (((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_mathutils.pyRs(tunittestttegaki.mathutilstTestCaseR(((sH/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_mathutils.pyss  tegaki-python-0.3.1/tests/charcol.chardb0000644000175000017500000001000011342122457020062 0ustar mathieumathieuSQLite format 3@  ::b7!{indexcharacter_setid_indexcharactersCREATE INDEX character_setid_index ON characters(setid)e!!tablecharacterscharactersCREATE TABLE characters( charid INTEGER PRIMARY KEY, setid INTEGER REFERENCES character_sets, utf8 TEXT, n_strokes INTEGER, data BLOB, -- gz xml sha1 TEXT )x))+tablecharacter_setscharacter_setsCREATE TABLE character_sets( setid INTEGER PRIMARY KEY, name TEXT )   tegaki-python-0.3.1/tests/test_charcol.pyc0000644000175000017500000003221111342122457020501 0ustar mathieumathieu Kc @sddkZddkZddkZddkZddklZlZlZlZddk l Z dei fdYZ dS(iN(tPointtStroketWritingt Character(tCharacterCollectiontCharacterCollectionTestcBs eZdZdZdZdZdZdZdZdZ dZ d Z d Z d Z d Zd ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZRS(cCstiitiit|_tii|iddd}t|_|ii |tii|idd}t |_ |i i |dS(Ntdatat collections test.charcols character.xml( tostpathtdirnametabspatht__file__tcurrdirtjoinRtcctreadRtc(tselfR tf((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pytsetUp s!  cCstii|iddd}t|}|i}|id}d}yF|iti ||i ti ||i ti |Wn#t j ot i idnXdS(NRRs test.charcols s6 slxml missing! (RR RR topenRtcloset assertTrueRtvalidatet assertFalsetNotImplementedErrortsyststderrtwrite(RR Rtbuftinvalidt malformed((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt testValidate)s   c CsW|i|iddddgh}xZddddd d gD]@}t||<||itii|id d |d qAW|i|id|d|d|dg|i|id|d|d g|i|id|d g|i|idg|i|i |d|d|d|d|d |d gdS(Ns一s三s二s四t19968_1t19968_2t19968_3t19977_1t19977_2t20108_1RRs.xml( t assertEqualst get_set_listRRRR RR tget_characterstget_all_characters(RtcharcolRtk((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt _testReadXMLIs$" cCs|i|idS(N(R.R(R((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestReadXMLFile^scCsat}|i|ii|i|ii|i|i|ii|idS(N(Rt read_stringRtto_xmlR(R)R+(Rtcharcol2((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt testToXMLas  cCsmt}|i|iidtdt|i|ii|i|i|ii|idS(Ntgzip(RR0Rt write_stringtTrueR(R)R+(RR2((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestWriteGzipStringhs  "cCsmt}|i|iidtdt|i|ii|i|i|ii|idS(Ntbz2(RR0RR5R6R(R)R+(RR2((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestWriteBz2Stringos  "cCstii|iddd}t}|i|t}|i||i|dt}|i|i ddddg|it |i dd |it |i dd |it |i dd |it |i dd dS( NRRs test.charcoltcheck_duplicates一s三s二s四iiii( RR RR RRt concatenateR6R(R)tlenR*(RR R,R2tcharcol3((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt testAddSamevs    "c Cs|iid}|i|iiddd|dd!|i|iiddd|d|i|iiddddd|dd!dS(Ns一tlimitiitoffseti(RR*R((Rtall_((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt testGetCharss )&!c Cstii|iddd}t}|i|tii|iddd}t}|i|||}|i|iddddd d d d g|it|i dd |it|i dd|it|i dd|it|i dd|it|i d d |it|i d d|it|i d d|it|i d ddS(NRRs test.charcols test2.charcols一s三s二s四tatbRtdiiii( RR RR RRR(R)R<R*(RR R,tpath2R2R=((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestAdds$      cCstii|id}ti|dt}|i|iddddg|it |i dd|it |i dd|it |i dd |it |i dd dS( NRR:s防s三s一s二iii( RR RR Rtfrom_character_directoryR6R(R)R<R*(Rt directoryR,((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestFromCharDirRecursives  "cCsotii|id}ti|dtdt}|i|i dg|it |i dddS(NRt recursiveR:s防i( RR RR RRHtFalseR6R(R)R<R*(RRIR,((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestFromCharDirNotRecursives  cCs3|iid|i|iiddgdS(Ns一三s一s三(Rtinclude_characters_from_textR(R)(R((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestIncludeCharsscCs3|iid|i|iiddgdS(Ns三s一s二(Rtexclude_characters_from_textR(R)(R((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestExcludeCharsscCs|iid}|i}|i|idt}|d}|i|d}d|_|iid}|i||dS(Nitfulli ( RR+t get_writingt normalizet get_strokesR6tsmoothtxR((Rtchartwritingtstrokeststroketptchar2((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt testProxys      cCst|i_|iid}|i}|i|idt}|d}|i|d}d|_ |iid}|i |||ii ||iid}|i ||dS(NiRRi ( RLRt WRITE_BACKR+RSRTRUR6RVRWtassertNotEqualtupdate_character_objectR((RRXRYRZR[R\R]((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt testNoProxys       cCs1|iid|i|iidddS(Nttotoi(Rtadd_setR(R)(R((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt testAddSetscCsm|ii}|ii|d|ii}|it|dt||i|dd!|dS(Niii(RR)t remove_setR(R<(Rtbeforetafter((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt testRemoveSets  cCsE|it|ii|ii|id|iidS(Ni(R(R<RR)t get_n_sets(R((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyt testGetNSetss(cCsE|it|ii|ii|id|iidS(Ni(R(R<RR+tget_total_n_characters(R((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestGetTotalNCharactersscCsxE|iiD]4}|it|ii||ii|qW|i|iidd|i|iidd|i|iidddS(Ns一is三is二i(RR)R(R<R*tget_n_characters(Rtset_name((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestGetNCharactersscCsR|iiddd!}|iid||iid}|i||dS(Ns一ii(RR*tset_charactersR((RRgRh((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestSetCharactersscCs^t|iid}|iid|it|iid}|i|d|dS(Ns一i(R<RR*tappend_characterRR((Rt len_beforet len_after((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestAppendCharacterscCs|iidd}t|iid}|iidd|i|iidd}|i||t|iid}|i|d|dS(Ns一ii(RR*R<tinsert_characterRR`t assertEqual(RRgRtRhRu((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestInsertCharacterscCs|iidd}t|iid}|iidd|i|iidd}|i||t|iid}|i||dS(Ns一i(RR*R<treplace_characterRR`Rx(RRgRtRhRu((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestReplaceCharacter scCs|iidd}t|iid}|iidd|iidd}|i||t|iid}|i|d|dS(Ns一ii(RR*R<tremove_characterR`Rx(RRgRtRhRu((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestRemoveCharacterscCs|iidd}t|iid}|iid|iidd}|i||t|iid}|i|d|dS(Ns一ii(RR*R<tremove_last_characterR`Rx(RRgRtRhRu((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestRemoveLastCharacter#scCs|iidd|i|iidd|i|iidd|i|iidd|iidd|i|iidd|i|iidd|i|iidddS(Nt keep_at_mostis一s三s二i(Rtremove_samplesR(Rn(R((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestRemoveSamples.scCs3|ii|i|iidddgdS(Ns一s三s二(Rtremove_empty_setsR(R)(R((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyttestRemoveEmptySets9s (t__name__t __module__RR!R.R/R3R7R9R>RBRGRJRMRORQR^RbReRiRkRmRpRrRvRyR{R}RRR(((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyRs:                  ( tunittestRRtStringIOttegaki.characterRRRRttegaki.charcolRtTestCaseR(((sF/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/test_charcol.pyss    "tegaki-python-0.3.1/tests/test_character.py0000644000175000017500000005076011342122457020670 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2008 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import unittest import os import sys import StringIO import minjson import tempfile from tegaki.character import Point, Stroke, Writing, Character class CharacterTest(unittest.TestCase): def setUp(self): self.currdir = os.path.dirname(os.path.abspath(__file__)) self.strokes = [[(210, 280), (213, 280), (216, 280), (220, 280), (223, 280), (226, 280), (230, 280), (233, 280), (236, 280), (243, 280), (246, 280), (253, 280), (260, 280), (263, 280), (270, 283), (276, 283), (283, 286), (286, 286), (293, 290), (296, 290), (300, 290), (303, 290), (306, 290), (310, 290), (313, 290), (316, 293), (320, 293), (320, 296), (320, 300), (316, 303), (316, 306), (316, 310), (316, 313), (313, 316), (313, 320), (310, 323), (310, 326), (306, 330), (306, 333), (303, 340), (300, 346), (300, 350), (300, 356), (296, 360), (293, 366), (290, 366), (286, 373), (283, 376), (280, 380), (276, 386), (273, 386), (273, 390), (270, 390), (270, 396), (266, 396), (263, 400), (260, 403), (256, 406), (253, 406), (253, 410), (250, 410), (250, 413), (246, 413), (250, 413), (253, 413), (256, 413), (260, 413), (263, 413), (286, 423), (290, 423), (296, 423), (300, 423), (306, 426), (310, 426), (313, 426), (316, 426), (316, 430), (320, 430), (323, 430), (326, 430), (326, 433), (330, 433), (333, 433), (333, 436), (336, 436), (336, 440), (336, 443), (336, 446), (336, 450), (336, 453), (336, 456), (336, 460), (336, 463), (336, 466), (336, 470), (336, 476), (333, 480), (333, 483), (333, 486), (330, 490), (330, 496), (326, 496), (326, 500), (326, 503), (323, 506), (323, 510), (320, 516), (316, 520), (316, 523), (313, 526), (310, 526), (306, 530), (306, 533), (303, 536), (300, 536), (300, 540), (296, 546), (293, 546), (290, 553), (286, 556), (283, 556), (283, 560), (276, 563), (270, 566), (266, 566), (263, 573), (260, 573), (256, 576), (253, 576), (250, 580), (250, 583), (246, 583), (243, 586), (240, 586), (240, 590), (236, 590), (233, 593), (230, 596), (226, 596), (220, 596), (220, 600), (216, 600), (213, 600), (210, 603), (206, 603), (203, 603)], [(200, 276), (200, 280), (200, 283), (200, 286), (200, 290), (203, 293), (203, 296), (203, 300), (206, 300), (206, 306), (206, 310), (206, 316), (206, 320), (210, 323), (210, 326), (210, 333), (210, 336), (210, 340), (210, 343), (210, 346), (213, 353), (213, 356), (213, 360), (213, 363), (216, 363), (216, 370), (216, 373), (216, 376), (220, 380), (220, 386), (220, 393), (220, 396), (220, 400), (220, 403), (220, 410), (220, 416), (220, 420), (220, 423), (220, 426), (220, 433), (220, 436), (220, 443), (220, 446), (220, 450), (220, 453), (220, 460), (220, 466), (220, 470), (220, 473), (220, 476), (220, 483), (220, 486), (220, 490), (220, 493), (220, 496), (220, 500), (220, 503), (220, 506), (220, 510), (220, 513), (220, 516), (220, 520), (220, 526), (220, 530), (223, 536), (223, 540), (223, 543), (223, 546), (223, 553), (223, 556), (223, 560), (223, 566), (223, 570), (223, 573), (223, 576), (223, 580), (223, 583), (223, 590), (223, 593), (223, 596), (223, 603), (223, 606), (223, 613), (223, 616), (223, 620), (223, 626), (223, 633), (223, 640), (223, 643), (223, 650), (223, 653), (223, 660), (223, 663), (223, 666), (223, 673), (223, 676), (223, 683), (223, 686), (223, 690), (223, 693), (223, 696), (223, 700), (223, 706), (223, 710), (223, 713), (223, 720), (223, 723), (223, 726), (223, 730), (223, 736), (223, 740), (223, 746), (223, 750), (223, 753), (223, 756), (223, 760), (223, 763), (223, 766), (223, 773), (223, 776), (223, 780)], [(493, 216), (493, 220), (496, 223), (496, 226), (496, 230), (500, 233), (500, 236), (500, 240), (500, 243), (503, 246), (503, 250), (506, 253), (506, 256), (506, 260), (510, 263), (510, 266), (510, 270), (510, 273)], [(370, 283), (373, 283), (376, 283), (380, 283), (386, 283), (390, 283), (400, 283), (403, 283), (413, 283), (423, 283), (426, 283), (436, 283), (443, 283), (450, 283), (456, 283), (466, 283), (470, 283), (476, 283), (486, 283), (493, 283), (500, 283), (503, 283), (513, 283), (516, 283), (523, 283), (526, 283), (533, 283), (536, 283), (540, 283), (546, 283), (550, 283), (583, 283), (586, 283), (593, 283), (596, 283), (600, 283), (606, 283), (610, 283), (616, 283), (620, 283), (626, 283), (633, 283), (636, 283), (643, 283), (646, 283), (650, 283), (653, 283), (656, 283), (663, 283), (666, 283), (670, 283), (673, 283), (676, 283), (680, 283), (683, 283), (686, 283), (690, 283), (693, 283), (696, 283), (700, 283), (703, 286)], [(530, 370), (536, 373), (540, 373), (546, 373), (550, 373), (570, 380), (573, 380), (580, 380), (600, 386), (603, 386), (613, 386), (616, 386), (626, 386), (630, 386), (636, 386), (640, 386), (646, 386), (650, 386), (653, 386), (656, 386), (656, 390), (656, 393), (660, 396), (660, 400), (660, 403), (660, 406), (663, 410), (663, 413), (663, 416), (663, 420), (663, 423), (663, 426), (663, 430), (663, 433), (663, 436), (663, 440), (663, 446), (663, 450), (663, 456), (663, 460), (663, 463), (663, 470), (663, 473), (663, 480), (663, 483), (663, 490), (660, 496), (660, 500), (660, 506), (660, 510), (660, 516), (656, 520), (656, 526), (656, 530), (656, 536), (656, 543), (656, 546), (656, 553), (653, 556), (653, 563), (653, 566), (653, 570), (650, 573), (650, 576), (646, 583), (646, 586), (643, 590), (643, 593), (640, 596), (640, 600), (636, 603), (636, 606), (633, 606), (633, 610), (630, 610), (626, 610), (623, 610), (623, 613), (620, 613), (616, 613), (613, 613), (610, 613), (606, 613)], [(490, 293), (490, 296), (490, 300), (490, 303), (490, 306), (490, 310), (490, 316), (490, 320), (493, 323), (493, 330), (493, 336), (493, 343), (493, 346), (493, 353), (493, 363), (493, 366), (493, 373), (493, 376), (493, 386), (493, 390), (493, 396), (493, 403), (493, 406), (493, 413), (493, 416), (493, 423), (493, 426), (493, 433), (493, 436), (493, 443), (493, 453), (493, 456), (493, 463), (493, 470), (486, 490), (486, 520), (483, 530), (483, 530), (483, 540), (480, 543), (480, 550), (480, 553), (476, 560), (476, 563), (476, 566), (476, 576), (476, 580), (473, 586), (473, 590), (460, 603), (460, 606), (460, 613), (460, 620), (456, 626), (456, 636), (456, 640), (453, 646), (453, 650), (453, 656), (453, 660), (450, 666), (450, 673), (446, 676), (443, 680), (443, 683), (443, 686), (440, 690), (440, 696), (436, 696), (436, 703), (433, 706), (433, 713), (430, 716), (430, 720), (426, 723), (426, 726), (423, 730), (420, 736), (420, 740), (420, 743), (420, 746), (416, 746), (416, 750), (416, 753), (413, 756), (413, 760), (410, 760), (410, 763), (406, 763)]] def _testReadXML(self, char): self.assertEquals(char.get_utf8(), "防") self.assertEquals(self.strokes, char.get_writing().get_strokes()) def testConstructorAndSave(self): file_ = os.path.join(self.currdir, "data", "character.xml") for f in (file_, file_ + ".gzip", file_ + ".bz2", None): char = Character(f) if f: self._testReadXML(char) # check that it is correctly loaded files = map(tempfile.mkstemp, (".xml", ".xml.gz", ".xml.bz2")) output_paths = [path for fd,path in files] for path in output_paths: try: # check that save with a path argument works char.save(path) newchar = Character(path) self.assertEquals(char, newchar) finally: os.unlink(path) try: # check that save with a path argument works newchar.save() newchar2 = Character(path) self.assertEquals(char, newchar2) finally: os.unlink(path) char = Character() self.assertRaises(ValueError, char.save) def testReadXMLFile(self): file = os.path.join(self.currdir, "data", "character.xml") char = Character() char.read(file) self._testReadXML(char) def testReadXMLGzipFile(self): file = os.path.join(self.currdir, "data", "character.xml.gzip") char = Character() char.read(file, gzip=True) self._testReadXML(char) def testReadXMLBZ2File(self): file = os.path.join(self.currdir, "data", "character.xml.bz2") char = Character() char.read(file, bz2=True) self._testReadXML(char) def testReadXMLString(self): file = os.path.join(self.currdir, "data", "character.xml") f = open(file) buf = f.read() f.close() char = Character() char.read_string(buf) self._testReadXML(char) def testReadXMLGzipString(self): file = os.path.join(self.currdir, "data", "character.xml.gzip") file = open(file) string = file.read() file.close() char = Character() char.read_string(string, gzip=True) self._testReadXML(char) def testReadXMLBZ2String(self): file = os.path.join(self.currdir, "data", "character.xml.bz2") file = open(file) string = file.read() file.close() char = Character() char.read_string(string, bz2=True) self._testReadXML(char) def _getPoint(self): point = Point() point.x = 1 point.y = 2 point.timestamp = 3 return point def testPointToXML(self): point = self._getPoint() self.assertEquals(point.to_xml(), '') def testPointToJSON(self): point = self._getPoint() self.assertEquals(minjson.read(point.to_json()), {u'y': 2, u'timestamp': 3, u'x': 1}) def _getStroke(self): point = Point() point.x = 1 point.y = 2 point.timestamp = 3 point2 = Point() point2.x = 4 point2.y = 5 point2.pressure = 0.1 stroke = Stroke() stroke.append_point(point) stroke.append_point(point2) return stroke def testStrokeToXML(self): stroke = self._getStroke() expected = """ """ self.assertEquals(expected, stroke.to_xml()) def testStrokeToJSON(self): stroke = self._getStroke() expected = {u'points': [{u'y': 2, u'timestamp': 3, u'x': 1}, {u'y': 5, u'pressure': 0, u'x': 4}]} self.assertEquals(minjson.read(stroke.to_json()), expected) def _getWriting(self): point = Point() point.x = 1 point.y = 2 point.timestamp = 3 point2 = Point() point2.x = 4 point2.y = 5 point2.pressure = 0.1 stroke = Stroke() stroke.append_point(point) stroke.append_point(point2) writing = Writing() writing.append_stroke(stroke) return writing def testWritingToXML(self): writing = self._getWriting() expected = """1000 1000 """ self.assertEquals(expected, writing.to_xml()) def testWritingToJSON(self): writing = self._getWriting() expected = {u'width': 1000, u'height': 1000, u'strokes': [{u'points': [{u'y': 2, u'timestamp': 3, u'x': 1}, {u'y': 5, u'pressure': 0, u'x': 4}]}]} self.assertEquals(minjson.read(writing.to_json()), expected) def _getCharacter(self): writing = self._getWriting() char = Character() char.set_writing(writing) char.set_utf8("A") return char def testWriteXMLFile(self): char = self._getCharacter() io = StringIO.StringIO() char.write(io) new_char = Character() new_char.read_string(io.getvalue()) self.assertEquals(char, new_char) def testCharacterToJSON(self): char = self._getCharacter() expected = {u'utf8': u'A', u'writing': {u'width' : 1000, u'height': 1000, 'strokes': [{u'points': [{u'y': 2, u'timestamp': 3, u'x': 1}, {u'y': 5, u'pressure': 0, u'x': 4}]}]}} self.assertEquals(minjson.read(char.to_json()), expected) def testNewWriting(self): writing = Writing() writing.move_to(0,0) writing.line_to(1,1) writing.line_to(2,2) writing.line_to(3,3) writing.move_to(4,4) writing.line_to(5,5) writing.move_to(6,6) writing.line_to(7,7) writing.line_to(8,8) strokes = writing.get_strokes() expected = [ [(0, 0), (1,1), (2,2), (3,3)], [(4,4), (5,5)], [(6,6), (7,7), (8,8)] ] self.assertEquals(strokes, expected) def testDuration(self): point = Point() point.x = 1 point.y = 2 point.timestamp = 0 point2 = Point() point2.x = 4 point2.y = 5 point2.timestamp = 5 stroke = Stroke() stroke.append_point(point) stroke.append_point(point2) point3 = Point() point3.x = 1 point3.y = 2 point3.timestamp = 7 point4 = Point() point4.x = 4 point4.y = 5 point4.timestamp = 10 stroke2 = Stroke() stroke2.append_point(point3) stroke2.append_point(point4) self.assertEquals(stroke2.get_duration(), 3) writing = Writing() writing.append_stroke(stroke) writing.append_stroke(stroke2) self.assertEquals(writing.get_duration(), 10) def testPointEquality(self): p1 = Point(x=2, y=3) p2 = Point(x=2, y=3) p3 = Point(x=2, y=4) self.assertTrue(p1 == p2) self.assertFalse(p1 == p3) def testPointEqualityNone(self): p1 = Point(x=2, y=3) self.assertFalse(p1 == None) self.assertTrue(p1 != None) def testPointCopy(self): p1 = Point(x=2, y=3) p2 = p1.copy() self.assertTrue(p1 == p2) def testStrokeEquality(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = Stroke() s2.append_point(Point(x=2, y=3)) s2.append_point(Point(x=3, y=4)) s3 = Stroke() s3.append_point(Point(x=2, y=3)) s3.append_point(Point(x=4, y=5)) self.assertTrue(s1 == s2) self.assertFalse(s1 == s3) def testStrokeEqualityNone(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) self.assertFalse(s1 == None) self.assertTrue(s1 != None) def testStrokeCopy(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = s1.copy() self.assertTrue(s1 == s2) def testWritingEquality(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = Stroke() s2.append_point(Point(x=2, y=3)) s2.append_point(Point(x=3, y=4)) w1 = Writing() w1.append_stroke(s1) w1.append_stroke(s2) s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = Stroke() s2.append_point(Point(x=2, y=3)) s2.append_point(Point(x=3, y=4)) w2 = Writing() w2.append_stroke(s1) w2.append_stroke(s2) s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = Stroke() s2.append_point(Point(x=2, y=3)) s2.append_point(Point(x=3, y=5)) w3 = Writing() w3.append_stroke(s1) w3.append_stroke(s2) self.assertEquals(w1, w2) self.assertNotEqual(w1, w3) def testWritingEqualityNone(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = Stroke() s2.append_point(Point(x=2, y=3)) s2.append_point(Point(x=3, y=4)) w1 = Writing() w1.append_stroke(s1) w1.append_stroke(s2) self.assertTrue(w1 != None) self.assertFalse(w1 == None) def testCharacterEqualityNone(self): c = Character() self.assertTrue(c != None) self.assertFalse(c == None) def testWritingCopy(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = Stroke() s2.append_point(Point(x=2, y=3)) s2.append_point(Point(x=3, y=4)) w1 = Writing() w1.append_stroke(s1) w1.append_stroke(s2) w2 = w1.copy() self.assertTrue(w1 == w2) def testGetNPoints(self): writing = self._getWriting() self.assertEquals(writing.get_n_points(), 2) def testRemoveStroke(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = Stroke() s2.append_point(Point(x=2, y=3)) s2.append_point(Point(x=3, y=4)) w = Writing() w.append_stroke(s1) w.append_stroke(s2) w.remove_stroke(1) self.assertEquals(w.get_strokes(), [[(2,3),(3,4)]]) def testInsertStroke(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = Stroke() s2.append_point(Point(x=2, y=3)) s2.append_point(Point(x=3, y=4)) w = Writing() w.append_stroke(s1) w.append_stroke(s2) s3 = Stroke() s3.append_point(Point(x=22, y=33)) s3.append_point(Point(x=33, y=44)) w.insert_stroke(1, s3) self.assertEquals(w.get_strokes(), [[(2,3),(3,4)], [(22,33),(33,44)], [(2,3),(3,4)]]) def testReplaceStroke(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s2 = Stroke() s2.append_point(Point(x=2, y=3)) s2.append_point(Point(x=3, y=4)) w = Writing() w.append_stroke(s1) w.append_stroke(s2) s3 = Stroke() s3.append_point(Point(x=22, y=33)) s3.append_point(Point(x=33, y=44)) w.replace_stroke(1, s3) self.assertEquals(w.get_strokes(), [[(2,3),(3,4)],[(22,33),(33,44)]]) def testClearStroke(self): s1 = Stroke() s1.append_point(Point(x=2, y=3)) s1.append_point(Point(x=3, y=4)) s1.clear() self.assertEquals(len(s1), 0) def testValidate(self): path = os.path.join(self.currdir, "data", "character.xml") f = open(path) buf = f.read() f.close() invalid = \ """ """ malformed = \ """ """ try: self.assertTrue(Character.validate(buf)) self.assertFalse(Character.validate(invalid)) self.assertFalse(Character.validate(malformed)) except NotImplementedError: sys.stderr.write("lxml missing!\n") pass def testToSexp(self): f = os.path.join(self.currdir, "data", "character.xml") char = Character() char.read(f) f = open(os.path.join(self.currdir, "data", "character.sexp")) sexp = f.read().strip() f.close() self.assertEquals(char.to_sexp(), sexp) tegaki-python-0.3.1/tests/data/0000755000175000017500000000000011352066723016230 5ustar mathieumathieutegaki-python-0.3.1/tests/data/character.xml0000755000175000017500000004176611342122457020723 0ustar mathieumathieu tegaki-python-0.3.1/tests/data/character.xml.bz20000644000175000017500000000255311342122457021403 0ustar mathieumathieuBZh91AY&SY%qy_TP/`@ ` _p`p* @xqAU* @z LQ@4041h0 4h4zѵhɦH I Q^ԍݶ|Vfsd+9V,QJP@ eUVV Uj,Y*gV2̪$U-]FjZM"ЉԵaXQZ-[XMkX[]i6Uт$U[hʕr[%'5YHTHQJ U"U()lҵY\֋imj!hc0ZjdҨhdŵh(VJH hڵsEP֌kZֵ)JҔMQ S~/7y\TYJ$DI,& Dv#$A$"1df  Z2 M.Ik}ގ$8ĢTޫ}̉{fff!2w{-mtx+C]5zܟ9t<49}m sk/9 0KZuP$ʽlݜCbCh+׵^* ؅+}i9f$ Qn[ޓY$#m/3$ܙ2܋$шDI $ Io{ZI$uv$Hf9bl0En5먂fomC"janS֗.Owndq$#Ice)U-Yotazo[EɨgA%+d&׺xBP3nI%&Ϭׯ%'7=[M{$9ʧeHDL>ޖMz$DMVNd#x <]i$l$@]ۆd>8Ɋ' !n6 Q0\giuxwuicoxOVM4:8Yho3"l6[in9Oh#dfHm gw}wCjm@m=5< Oؚ6%/weeK Z&goCmkWy}T9&nYqSNRU`*516;}YhKM gMYc_Oj _ڋ[ѣQFe6)LlK3Lѩff&#c*RS53)&4fSLRRK&L̕)26 4%%Fj1IISi4QRX5&$" ]ckmZV5+kϕL68\.7fH.Ґ :+--%4$;BLH4H~.p 7JJtegaki-python-0.3.1/tests/data/collection/0000755000175000017500000000000011352066723020363 5ustar mathieumathieutegaki-python-0.3.1/tests/data/collection/19968_2.xml0000755000175000017500000000324411342122457022030 0ustar mathieumathieu tegaki-python-0.3.1/tests/data/collection/19968_3.xml0000755000175000017500000000540611342122457022033 0ustar mathieumathieu tegaki-python-0.3.1/tests/data/collection/19968_1.xml0000755000175000017500000000301611342122457022024 0ustar mathieumathieu 800 900 tegaki-python-0.3.1/tests/data/collection/19977_2.xml0000755000175000017500000001150211342122457022024 0ustar mathieumathieu tegaki-python-0.3.1/tests/data/collection/test2.charcol0000644000175000017500000000574011342122457022763 0ustar mathieumathieu a 800 900 a a b b c tegaki-python-0.3.1/tests/data/collection/test.charcol0000644000175000017500000004600311342122457022676 0ustar mathieumathieu 800 900 tegaki-python-0.3.1/tests/data/collection/20108_1.xml0000755000175000017500000000554711342122457022011 0ustar mathieumathieu tegaki-python-0.3.1/tests/data/collection/19977_1.xml0000755000175000017500000001272611342122457022034 0ustar mathieumathieu tegaki-python-0.3.1/tests/data/character.xml.gzip0000644000175000017500000000341311342122457021653 0ustar mathieumathieu`HQr\5EY7~*'c1l1T `W;VJΜ _ɸNHݭիwz7w/w,g7w?}˯ή~}?~<.o~y>~݋Qf@7IMD3fYXB7 a"–!!%rXA^q-"ؙ$/a&[0 <1<͇$$v(Z=J$@c[h$GU|8t1p g 2"޶I - NACN0,w@nY`a^0 洁5L6aN~aNg'd9EN6HK%~\o%b,.6b0}ͶJ?w@4Sσ= Dp@1G J 3!S@30l,fa5 ,tٳHU& yjj$ zJHScXL0 :@  &`7H'`0G`0pi-.qdf^62icf@w0 ty_. @+,OfGd_Ђ\L%'l}6&;}q16wFyp0 {#ŀILb]f=$z wH`GQT0 | <0e E4M,P,j*T JaP XrsfjfY0F yVryeVXw2@EzpnS]}d'@fT4jp5C:y'@!{^0Ķ0@րaffصP&QnbB>:LDm kQ,7 '5>9 iGm  kkXGm L$FMN2`dOS 5cCs6td}x|i|o|d }qW|i}tid|}tid|}ti|}|otit|}nt |t o |}nV|ot ||}n<yt |}Wn(t j ot }t ||}nXyt |i}Wn tj otd|nX|S(sread the js, first sanitizing a bit and removing any c-style comments If the input is a unicode string, great. That's preferred. If the input is a byte string, strings in the object will be produced as unicode anyway. iits Unacceptable JSON expression: %s(tchrtendswithtstriptslashstarcommentRtdoubleslashcommenttunichrRERRR*tunicodetUnicodeDecodeErrortemergencyEncodingRR]R)R (taStringtencodingtCHR0t unicodecharststencR%((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pytsafeReads0   N(tStringIOcCsIt|to/|i|i}dt||idfSn|dS(sError handler for json If encoding fails, \uxxxx must be emitted. This is similar to the "backshashreplace" handler, only that we never emit \xnn since this is not legal according to the JSON syntax specs. u\u%04xiN(R*tUnicodeEncodeErrortobjecttstarttord(texcRZ((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pytjsonreplace_handler=st jsonreplacecCs,td|d|}|i||iS(Ntinput_encodingtoutput_encoding(t JsonWritertwritetgetvalue(tinputRjtoutputEncodingtwriter((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyR{Qs s"|| | | | |\\cCsPhdd<dd<dd<dd<d d <d d <d d<|idS(Ns\"Rs\bss\fs s\ns s\rs s\ts s\\s\i(R(R((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pytfunc_strmangleXs      cCstit|S(N(t re_strmangleRR(ttext((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyt strmanglecst JsonStreamcBs#eZdZdZdZRS(cCs g|_dS(N(tbuf(R$((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyR&hscCs|ii|dS(N(RR-(R$R((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyR{kscCsdi|iS(NR_(RIR(R$((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyR|ns(RRR&R{R|(((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyRfs  RzcBs,eZddddZdZdZRS(sutf-8cCsS|dj o|djo tdq4n t}||_||_||_dS(s - stream is optional, if specified must also give output_encoding - The input strings can be unicode or in input_encoding - output_encoding is optional, if omitted, result will be unicode s;If a stream is given, output encoding must also be providedN(R!RRtstreamRxRy(R$RRxRy((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyR&ss     cCst|ttfol|iidt}x<|D]4}|o t}n|iid|i|q3W|iidfn%t|to|iidt}xe|iD]W\}}|o t}n|iid|i||iid|i|qW|iidn|tjo|iidnb|tjo|iidnA|djo|iid n t|t pRyt |}Wn+t j o}t d |i|fnX|ii|nt|tpHyt||i}WqAttfj ot|d d }qAXnt|}|idj o|i|id }n|iid|ii||iiddS(NR<RAR>R=R@R?RCRDREsCannot write object (%s: %s)sutf-8RHRwR(R*tlistttupleRR{RLRtdictt iteritemsR!R4tstrt ExceptionRt __class__RfRxRgtUnicodeTranslateErrorRRytencode(R$tobjtfirsttelemR7R5Ru((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyR{sZ        cCs |iiS(N(RR|(R$((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyR|sN(RRR!R&R{R|(((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pyRzqs 4(s s\n(ss\b(s s\f(s s\t(s s\r(Rs\"(((((,treRRRRRORRRRRR R R R RhRR RRcRdReRRGRRRRRrRR!Rotreadtcodecst cStringIORpRvtregister_errorR{RRRRRz(((sA/home/mathieu/Desktop/projects/hwr/tegaki-python/tests/minjson.pys3s8".    1   tegaki-python-0.3.1/tests/test_mathutils.py0000644000175000017500000000230111342122457020732 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2008 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import unittest from tegaki.mathutils import * class MathTest(unittest.TestCase): def testEuclideanDistance(self): for v1, v2, expected in ( ( (2, 10, 12), (3, 10, 7), 5.0 ), ( (5, 5), (5, 5), 0.0) ): res = round(euclidean_distance(v1, v2)) self.assertEquals(res, expected) tegaki-python-0.3.1/AUTHORS0000644000175000017500000000005711342122456015222 0ustar mathieumathieuMathieu Blondel tegaki-python-0.3.1/COPYING0000644000175000017500000004326211342122457015213 0ustar mathieumathieu GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. tegaki-python-0.3.1/PKG-INFO0000644000175000017500000000036111352066723015252 0ustar mathieumathieuMetadata-Version: 1.0 Name: tegaki-python Version: 0.3.1 Summary: Tegaki Python library Home-page: http://www.tegaki.org Author: Mathieu Blondel Author-email: mathieu ÂT mblondel DÔT org License: GPL Description: UNKNOWN Platform: UNKNOWN tegaki-python-0.3.1/tegaki/0000755000175000017500000000000011352066723015421 5ustar mathieumathieutegaki-python-0.3.1/tegaki/charcol.py0000644000175000017500000010325211342122456017404 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2009 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import sqlite3 import base64 import tempfile import re import os from tegaki.dictutils import SortedDict from tegaki.character import _XmlBase, Point, Stroke, Writing, Character def _dict_factory(cursor, row): d = {} for idx, col in enumerate(cursor.description): d[col[0]] = d[idx] = row[idx] return d class ObjectProxy(object): """ An object that forwards all attribute and method calls to another object. Object proxies are used to automatically reflect back in the db changes that are made to objects. For example: >>> char = charcol.get_all_characters()[0] >>> char.set_utf8(newvalue) # will be automatically changed in the db """ WRITE_METHODS = [] READ_METHODS = [] WRITE_ATTRIBUTES = [] def __init__(self, charpool, obj, charobj=None): self._charpool = charpool # the object to redirect attributes and method calls to self._obj = obj # the original character object self._charobj = obj if charobj is None else charobj def __getattr__(self, attr_): attr = getattr(self._obj, attr_) write = False if attr_ in self.WRITE_METHODS: write = True elif not attr_ in self.READ_METHODS: return attr def wrapper(*args, **kw): if write: self._charpool.add_char(self._charobj) return _apply_proxy(self._charpool, attr(*args, **kw), self._charobj) return wrapper def __setattr__(self, attr, value): if attr in self.WRITE_ATTRIBUTES: self._charpool.add_char(self._charobj) setattr(self._obj, attr, value) self.__dict__[attr] = value def __eq__(self, othr): if othr.__class__.__name__.endswith("Proxy"): othr = othr._obj return self._obj == othr def __ne__(self, othr): return not(self == othr) class PointProxy(ObjectProxy): """ Proxy to Point. """ WRITE_METHODS = ["resize", "move_rel", "copy_from"] WRITE_ATTRIBUTES = Point.KEYS def __getitem__(self, x): return self._obj[x] class StrokeProxy(ObjectProxy): """ Proxy to Stroke. """ WRITE_METHODS = ["append_point", "insert", "smooth", "clear", "downsample", "downsample_threshold", "upsample", "upsample_threshod"] READ_METHODS = [] def __getitem__(self, i): return _apply_proxy(self._charpool, self._obj[i], self._charobj) def __len__(self): return len(self._obj) class WritingProxy(ObjectProxy): """ Proxy to Writing. """ # Note: Some method calls need not be mentioned below # because they automatically update the db thanks to # Point and Stroke methods that are being used in their implementation. WRITE_METHODS = ["clear", "move_to_point", "line_to_point", "set_width", "set_height", "remove_stroke"] READ_METHODS = ["get_strokes"] class CharacterProxy(ObjectProxy): """ Proxy to Writing. """ WRITE_METHODS = ["set_utf8", "set_unicode", "set_writing", "read", "read_string"] READ_METHODS = ["get_writing"] def __repr__(self): return "" % (str(self.get_utf8()), id(self)) OBJ_PROXY = {Character: CharacterProxy, Writing : WritingProxy, Stroke : StrokeProxy, Point : PointProxy} def _apply_proxy(charpool, obj, charobj): return _apply_proxy_rec(charpool, obj, charobj) def _apply_proxy_rec(charpool, obj, charobj, reclevel=0): try: return OBJ_PROXY[obj.__class__](charpool, obj, charobj) except KeyError: if (isinstance(obj, tuple) or isinstance(obj, list)) and reclevel <= 1: return [_apply_proxy_rec(charpool, ele, charobj, reclevel+1) \ for ele in obj] else: return obj class _CharPool(dict): """ Holds characters that need be updated. We don't want changes to be immediately reflected back to the db for performance reasons. The _CharPool keeps track of what objects need be updated. """ def __init__(self, cursor): self._c = cursor def add_char(self, char): self[char.charid] = char def _update_character(self, char): self._c.execute("""UPDATE characters SET utf8=?, n_strokes=?, data=?, sha1=? WHERE charid=?""", (char.get_utf8(), char.get_writing().get_n_strokes(), _adapt_character(char), char.hash(), char.charid)) def clear_pool_threshold(self, threshold=100): if len(self) > threshold: self.clear_pool() def clear_pool(self): for charid, char in self.items(): self._update_character(char) self.clear() def _convert_character(data): # converts a BLOB into an object char = Character() char.read_string(base64.b64decode(data), gzip=True) return char def _adapt_character(char): # converts an object into a BLOB return base64.b64encode(char.write_string(gzip=True)) def _gzipbz2(path): return (True if path.endswith(".gz") or path.endswith(".gzip") else False, True if path.endswith(".bz2") or path.endswith(".bzip2") else False) class CharacterCollection(_XmlBase): """ A collection of L{Characters}. A CharacterCollection is composed of sets. Each set can be composed of zero, one, or more characters. /!\ Sets do not necessarily contain only characters of the same class / utf8 value. Sets may also be used to group characters in other fashions (e.g. by number of strokes, by handwriting quality, etc...). Therefore the set name is not guaranteed to contain the utf8 value of the characters of that set. The utf8 value must be retrieved from each character individually. Building character collection objects ===================================== A character collection can be built from scratch progmatically: >>> char = Character() >>> charcol = CharacterCollection() >>> charcol.add_set("my set") >>> charcol.append_character("my set", char) Reading XML files ================= A character collection can be read from an XML file: >>> charcol = CharacterCollection() >>> charcol.read("myfile") Gzip-compressed and bzip2-compressed XML files can also be read: >>> charcol = CharacterCollection() >>> charcol.read("myfilegz", gzip=True) >>> charcol = Character() >>> charcol.read("myfilebz", bz2=True) A similar method read_string exists to read the XML from a string instead of a file. For convenience, you can directly load a character collection by passing it the file to load. In that case, compression is automatically detected based on file extension (.gz, .bz2). >>> charcol = Character("myfile.xml.gz") The recommended extension for XML character collection files is .charcol. Writing XML files ================= A character collection can be saved to an XML file by using the write() method. >>> charcol.write("myfile") The write method has gzip and bz2 arguments just like read(). In addition, there is a write_string method which generates a string instead of a file. For convenience, you can save a character collection with the save() method. It automatically detects compression based on the file extension. >>> charcol.save("mynewfile.xml.bz2") If the CharacterCollection object was passed a file when it was constructed, the path can ce omitted. >>> charcol = Character("myfile.gz") >>> charcol.save() Using .chardb files =================== XML files allow to retain human-readability and are ideal for small character collections. However, they force the whole database to be kept in memory. For larger collections, it's recommended to use .chardb files instead. Their loading is faster and the whole collection doesn't need be kept entirely in memory. However human-readability ist lost. >>> charcol = CharacterCollection("charcol.chardb") [...] >>> charcol.save() The .chardb extension is required. """ #: With WRITE_BACK set to True, proxy objects are returned in place of #: character, writing, stroke and point objects in order to automatically #: reflect changes to these objects back to the sqlite db. #: However, there is probably overhead usigng them. WRITE_BACK = True def get_auto_commit(self): return True if self._con.isolation_level is None else False def set_auto_commit(self, auto): self._con.isolation_level = None if auto else "" #: With AUTO_COMMIT set to true, data is immediately written to disk AUTO_COMMIT = property(get_auto_commit, set_auto_commit) DTD = \ """ """ def __init__(self, path=":memory:"): """ Construct a collection. @type path: str @param path: an XML file or a DB file (see also L{bind}) """ if path is None: path = ":memory:" if not path in ("", ":memory:") and not path.endswith(".chardb"): # this should be an XML character collection gzip, bz2 = _gzipbz2(path) self.bind(":memory:") self.read(path, gzip=gzip, bz2=bz2) self._path = path # contains the path to the xml file else: # this should be either a .chardb, ":memory:" or "" self.bind(path) self._path = None # DB utils def _e(self, req, *a, **kw): self._charpool.clear_pool() #print req, a, kw return self._c.execute(req, *a, **kw) def _em(self, req, *a, **kw): self._charpool.clear_pool() #print req, a, kw return self._c.executemany(req, *a, **kw) def _fo(self): return self._c.fetchone() def _fa(self): return self._c.fetchall() def _efo(self, req, *a, **kw): self._e(req, *a, **kw) return self._fo() def _efa(self, req, *a, **kw): self._e(req, *a, **kw) return self._fa() def _has_tables(self): self._e("SELECT count(type) FROM sqlite_master WHERE type = 'table'") return self._fo()[0] > 0 def _create_tables(self): self._c.executescript(""" CREATE TABLE character_sets( setid INTEGER PRIMARY KEY, name TEXT ); CREATE TABLE characters( charid INTEGER PRIMARY KEY, setid INTEGER REFERENCES character_sets, utf8 TEXT, n_strokes INTEGER, data BLOB, -- gz xml sha1 TEXT ); CREATE INDEX character_setid_index ON characters(setid); """) def get_character_from_row(self, row): # charid, setid, utf8, n_strokes, data, sha1 char = _convert_character(row['data']) char.charid = row['charid'] if self.WRITE_BACK: return CharacterProxy(self._charpool, char) else: return char def _update_set_ids(self): self._SETIDS = SortedDict() for row in self._efa("SELECT * FROM character_sets ORDER BY setid"): self._SETIDS[row['name'].encode("utf8")] = row['setid'] # Public API def __repr__(self): return "" % \ (self.get_total_n_characters(), id(self)) def bind(self, path): """ Bind database to a db file. All changes to the previous binded database will be lost if you haven't committed changes with commit(). @type path: str Possible values for path: ":memory:" for fully in memory database "" for a in memory database that uses temp files under pressure "/path/to/file.chardb" for file-based database """ self._con = sqlite3.connect(path) self._con.text_factory = sqlite3.OptimizedUnicode self._con.row_factory = _dict_factory #sqlite3.Row self._c = self._con.cursor() self._charpool = _CharPool(self._c) if not self._has_tables(): self._create_tables() self._update_set_ids() self._dbpath = path def get_db_filename(self): """ Returns the db file which is internally used by the collection. @rtype: str or None @return: file path or None if in memory db """ return None if self._dbpath in (":memory:", "") else self._dbpath def commit(self): """ Commit changes since last commit. """ self._charpool.clear_pool() self._con.commit() def save(self, path=None): """ Save collection to a file. @type path: str @param path: path where to write the file or None if use the path \ that was given to the constructor If path ends with .chardb, it's saved as binary db file. Otherwise, it will be saved as XML. In the latter case, the file extension is used to determine whether the file must be saved as plain, gzip-compressed or bzip2-compressed XML. If path is omitted, the path that was given to the CharacterCollection constructor is used. """ if path is None: if self._path is not None: # an XML file was provided to constructor gzip, bz2 = _gzipbz2(self._path) self.write(self._path, gzip=gzip, bz2=bz2) else: if path.endswith(".chardb"): if self._dbpath != path: # the collection changed its database name # FIXME: this can rewritten more efficiently with # the ATTACH command if os.path.exists(path): os.unlink(path) newcc = CharacterCollection(path) newcc.merge([self]) newcc.commit() del newcc self.bind(path) else: gzip, bz2 = _gzipbz2(path) self.write(path, gzip=gzip, bz2=bz2) self.commit() @staticmethod def from_character_directory(directory, extensions=["xml", "bz2", "gz"], recursive=True, check_duplicate=False): """ Creates a character collection from a directory containing individual character files. """ regexp = re.compile("\.(%s)$" % "|".join(extensions)) charcol = CharacterCollection() for name in os.listdir(directory): full_path = os.path.join(directory, name) if os.path.isdir(full_path) and recursive: charcol += CharacterCollection.from_character_directory( full_path, extensions) elif regexp.search(full_path): char = Character() gzip = False; bz2 = False if full_path.endswith(".gz"): gzip = True if full_path.endswith(".bz2"): bz2 = True try: char.read(full_path, gzip=gzip, bz2=bz2) except ValueError: continue # ignore malformed XML files utf8 = char.get_utf8() if utf8 is None: utf8 = "Unknown" charcol.add_set(utf8) if not check_duplicate or \ not char in charcol.get_characters(utf8): charcol.append_character(utf8, char) return charcol def concatenate(self, other, check_duplicate=False): """ Merge two charcols together and return a new charcol @type other: CharacterCollection """ new = CharacterCollection() new.merge([self, other], check_duplicate=check_duplicate) return new def merge(self, charcols, check_duplicate=False): """ Merge several charcacter collections into the current collection. @type charcols: list @param charcols: a list of CharacterCollection to merge """ try: # it's faster to delete the whole index and rewrite it afterwards self._e("""DROP INDEX character_setid_index;""") for charcol in charcols: for set_name in charcol.get_set_list(): self.add_set(set_name) if check_duplicate: existing_chars = self.get_characters(set_name) chars = charcol.get_characters(set_name) chars = [c for c in chars if not c in existing_chars] self.append_characters(set_name, chars) else: chars = charcol.get_character_rows(set_name) self.append_character_rows(set_name, chars) finally: self._e("""CREATE INDEX character_setid_index ON characters(setid);""") def __add__(self, other): return self.concatenate(other) def add_set(self, set_name): """ Add a new set to collection. @type set_name: str """ self.add_sets([set_name]) def add_sets(self, set_names): """ Add new sets to collection. @type set_names: list of str """ set_names = [(set_name,) for set_name in set_names \ if not set_name in self._SETIDS] self._em("INSERT INTO character_sets(name) VALUES (?)", set_names) self._update_set_ids() def remove_set(self, set_name): """ Remove set_name from collection. @type set_name: str """ self.remove_sets([set_name]) def remove_sets(self, set_names): """ Remove set_name from collection. @type set_name: str """ set_names = [(set_name,) for set_name in set_names] self._em("DELETE FROM character_sets WHERE name=?", set_names) self._update_set_ids() def get_set_list(self): """ Return the sets available in collection. @rtype: list of str """ return self._SETIDS.keys() def get_n_sets(self): """ Return the number of sets available in collection. @rtype: int """ return len(self._SETIDS) def get_characters(self, set_name, limit=-1, offset=0): """ Return character belonging to a set. @type set_name: str @param set_name: the set characters belong to @type limit: int @param limit: the number of characters needed or -1 if all @type offset: int @param offset: the offset to start from (0 if from beginning) @rtype: list of L{Character} """ return list(self.get_characters_gen(set_name, limit, offset)) def get_characters_gen(self, set_name, limit=-1, offset=0): """ Return a generator to iterate over characters. See L{get_characters). """ rows = self.get_character_rows(set_name, limit, offset) return (self.get_character_from_row(r) for r in rows) def get_character_rows(self, set_name, limit=-1, offset=0): i = self._SETIDS[set_name] self._e("""SELECT * FROM characters WHERE setid=? ORDER BY charid LIMIT ? OFFSET ?""", (i, int(limit), int(offset))) return self._fa() def get_random_characters(self, n): """ Return characters at random. @type n: int @param n: number of random characters needed. """ return list(self.get_random_characters_gen(n)) def get_random_characters_gen(self, n): """ Return a generator to iterate over random characters. See \ L{get_random_characters). """ self._e("""SELECT DISTINCT * from characters ORDER BY RANDOM() LIMIT ?""", (int(n),)) return (self.get_character_from_row(r) for r in self._fa()) def get_n_characters(self, set_name): """ Return the number of character belonging to a set. @type set_name: str @param set_name: the set characters belong to @rtype int """ try: i = self._SETIDS[set_name] return self._efo("""SELECT count(charid) FROM characters WHERE setid=?""", (i,))[0] except KeyError: return 0 def get_all_characters(self, limit=-1, offset=0): """ Return all characters in collection. @type limit: int @param limit: the number of characters needed or -1 if all @type offset: int @param offset: the offset to start from (0 if from beginning) @rtype: list of L{Character} """ return list(self.get_all_characters_gen(limit=-1, offset=0)) def get_all_characters_gen(self, limit=-1, offset=0): """ Return a generator to iterate over all characters. See \ L{get_all_characters). """ self._e("""SELECT * FROM characters ORDER BY charid LIMIT ? OFFSET ?""", (int(limit), int(offset))) return (self.get_character_from_row(r) for r in self._fa()) def get_total_n_characters(self): """ Return the total number of characters in collection. @rtype: int """ return self._efo("SELECT COUNT(charid) FROM characters")[0] def get_total_n_strokes(self): """ Return the total number of strokes in collection. @rtype: int """ return self._efo("SELECT SUM(n_strokes) FROM characters")[0] def get_average_n_strokes(self, set_name): """ Return the average number of stroke of the characters in that set. """ i = self._SETIDS[set_name] return self._efo("""SELECT AVG(n_strokes) FROM characters WHERE setid=?""", (i,))[0] def set_characters(self, set_name, characters): """ Set/Replace the characters of a set. @type set_name: str @param set_name: the set that needs be updated @type characters: list of L{Character} """ i = self._SETIDS[set_name] self._e("DELETE FROM characters WHERE setid=?", (i,)) for char in characters: self.append_character(set_name, char) def append_character(self, set_name, character): """ Append a new character to a set. @type set_name: str @param set_name: the set to which the character needs be added @type character: L{Character} """ self.append_characters(set_name, [character]) def append_characters(self, set_name, characters): rows = [{'utf8':c.get_utf8(), 'n_strokes':c.get_writing().get_n_strokes(), 'data':_adapt_character(c), 'sha1':c.hash()} for c in characters] self.append_character_rows(set_name, rows) def append_character_rows(self, set_name, rows): i = self._SETIDS[set_name] tupls = [(i, r['utf8'], r['n_strokes'], r['data'], r['sha1']) \ for r in rows] self._em("""INSERT INTO characters (setid, utf8, n_strokes, data, sha1) VALUES (?,?,?,?,?)""", tupls) def insert_character(self, set_name, i, character): """ Insert a new character to a set at a given position. @type set_name: str @param set_name: the set to which the character needs be inserted @type i: int @param i: position @type character: L{Character} """ chars = self.get_characters(set_name) chars.insert(i, character) self.set_characters(set_name, chars) def remove_character(self, set_name, i): """ Remove a character from a set at a given position. @type set_name: str @param set_name: the set from which the character needs be removed @type i: int @param i: position """ setid = self._SETIDS[set_name] charid = self._efo("""SELECT charid FROM characters WHERE setid=? ORDER BY charid LIMIT 1 OFFSET ?""", (setid, i))[0] if charid: self._e("DELETE FROM characters WHERE charid=?", (charid,)) def remove_last_character(self, set_name): """ Remove the last character from a set. @type set_name: str @param set_name: the set from which the character needs be removed """ setid = self._SETIDS[set_name] charid = self._efo("""SELECT charid FROM characters WHERE setid=? ORDER BY charid DESC LIMIT 1""", (setid,))[0] if charid: self._e("DELETE FROM characters WHERE charid=?", (charid,)) def update_character_object(self, character): """ Update a character. @type character: L{Character} character must have been previously retrieved from the collection. """ if not hasattr(character, "charid"): raise ValueError, "The character object needs a charid attribute" self._e("""UPDATE characters SET utf8=?, n_strokes=?, data=?, sha1=? WHERE charid=?""", (character.get_utf8(), character.get_writing().get_n_strokes(), _adapt_character(character), character.hash(), character.charid)) def replace_character(self, set_name, i, character): """ Replace the character at a given position with a new character. @type set_name: str @param set_name: the set where the character needs be replaced @type i: int @param i: position @type character: L{Character} """ setid = self._SETIDS[set_name] charid = self._efo("""SELECT charid FROM characters WHERE setid=? ORDER BY charid LIMIT 1 OFFSET ?""", (setid, i))[0] if charid: character.charid = charid self.update_character_object(character) def _get_dict_from_text(self, text): text = text.replace(" ", "").replace("\n", "").replace("\t", "") dic = {} for c in text: dic[c] = 1 return dic def include_characters_from_text(self, text): """ Only keep characters found in a text. Or put differently, remove all characters but those found in a text. @type text: str """ dic = self._get_dict_from_text(unicode(text, "utf8")) utf8values = ",".join(["'%s'" % k for k in dic.keys()]) self._e("DELETE FROM characters WHERE utf8 NOT IN(%s)" % utf8values) self.remove_empty_sets() def include_characters_from_files(self, text_files): """ Only keep characters found in text_files. @type text_files: list @param text_files: a list of file paths """ buf = "" for inc_path in text_files: f = open(inc_path) buf += f.read() f.close() if len(buf) > 0: self.include_characters_from_text(buf) def exclude_characters_from_text(self, text): """ Exclude characters found in a text. @type text: str """ dic = self._get_dict_from_text(unicode(text, "utf8")) utf8values = ",".join(["'%s'" % k for k in dic.keys()]) self._e("DELETE FROM characters WHERE utf8 IN(%s)" % utf8values) self.remove_empty_sets() def exclude_characters_from_files(self, text_files): """ Exclude characters found in text_files. @type text_files: list @param text_files: a list of file paths """ buf = "" for exc_path in text_files: f = open(exc_path) buf += f.read() f.close() if len(buf) > 0: self.exclude_characters_from_text(buf) def remove_samples(self, keep_at_most): """ Remove samples. @type keep_at_most: the maximum number of samples to keep. """ for set_name in self.get_set_list(): if self.get_n_characters(set_name) > keep_at_most: setid = self._SETIDS[set_name] self._e("""DELETE FROM characters WHERE charid IN(SELECT charid FROM characters WHERE setid=? ORDER BY charid LIMIT -1 OFFSET ?)""", (setid, keep_at_most)) def _get_set_char_counts(self): rows = self._efa("""SELECT setid, COUNT(charid) AS n_chars FROM characters GROUP BY setid""") d = {} for row in rows: d[row['setid']] = row['n_chars'] return d def remove_empty_sets(self): """ Remove sets that don't include any character. """ charcounts = self._get_set_char_counts() empty_sets = [] for set_name, setid in self._SETIDS.items(): try: if charcounts[setid] == 0: empty_sets.append(set_name) except KeyError: empty_sets.append(set_name) self.remove_sets(empty_sets) def to_xml(self): """ Converts collection to XML. @rtype: str """ s = "\n" s += "\n" for set_name in self.get_set_list(): s += "\n" % set_name for character in self.get_characters(set_name): s += " \n" utf8 = character.get_utf8() if utf8: s += " %s\n" % utf8 for line in character.get_writing().to_xml().split("\n"): s += " %s\n" % line s += " \n" s += "\n" s += "\n" return s # XML processing... def _start_element(self, name, attrs): self._tag = name if self._first_tag: self._first_tag = False if self._tag != "character-collection": raise ValueError, \ "The very first tag should be " if self._tag == "set": if not attrs.has_key("name"): raise ValueError, " should have a name attribute" self._curr_set_name = attrs["name"].encode("UTF-8") self.add_set(self._curr_set_name) if self._tag == "character": self._curr_char = Character() self._curr_writing = self._curr_char.get_writing() self._curr_width = None self._curr_height = None self._curr_utf8 = None if self._tag == "stroke": self._curr_stroke = Stroke() elif self._tag == "point": point = Point() for key in ("x", "y", "pressure", "xtilt", "ytilt", "timestamp"): if attrs.has_key(key): value = attrs[key].encode("UTF-8") if key in ("pressure", "xtilt", "ytilt"): value = float(value) else: value = int(float(value)) else: value = None setattr(point, key, value) self._curr_stroke.append_point(point) def _end_element(self, name): if name == "character-collection": for s in ["_tag", "_curr_char", "_curr_writing", "_curr_width", "_curr_height", "_curr_utf8", "_curr_stroke", "_curr_chars", "_curr_set_name"]: if s in self.__dict__: del self.__dict__[s] if name == "character": if self._curr_utf8: self._curr_char.set_utf8(self._curr_utf8) if self._curr_width: self._curr_writing.set_width(self._curr_width) if self._curr_height: self._curr_writing.set_height(self._curr_height) self.append_character(self._curr_set_name, self._curr_char) if name == "stroke": if len(self._curr_stroke) > 0: self._curr_writing.append_stroke(self._curr_stroke) self._stroke = None self._tag = None def _char_data(self, data): if self._tag == "utf8": self._curr_utf8 = data.encode("UTF-8") if self._tag == "width": self._curr_width = int(data) elif self._tag == "height": self._curr_height = int(data) tegaki-python-0.3.1/tegaki/dictutils.py0000644000175000017500000001202111342122456017766 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (c) 2005, the Lawrence Journal-World # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. Neither the name of Django nor the names of its contributors may be used # to endorse or promote products derived from this software without # specific prior written permission. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. class SortedDict(dict): """ A dictionary that keeps its keys in the order in which they're inserted. """ def __new__(cls, *args, **kwargs): instance = super(SortedDict, cls).__new__(cls, *args, **kwargs) instance.keyOrder = [] return instance def __init__(self, data=None): if data is None: data = {} super(SortedDict, self).__init__(data) if isinstance(data, dict): self.keyOrder = data.keys() else: self.keyOrder = [] for key, value in data: if key not in self.keyOrder: self.keyOrder.append(key) def __deepcopy__(self, memo): from copy import deepcopy return self.__class__([(key, deepcopy(value, memo)) for key, value in self.iteritems()]) def __setitem__(self, key, value): super(SortedDict, self).__setitem__(key, value) if key not in self.keyOrder: self.keyOrder.append(key) def __delitem__(self, key): super(SortedDict, self).__delitem__(key) self.keyOrder.remove(key) def __iter__(self): for k in self.keyOrder: yield k def pop(self, k, *args): result = super(SortedDict, self).pop(k, *args) try: self.keyOrder.remove(k) except ValueError: # Key wasn't in the dictionary in the first place. No problem. pass return result def popitem(self): result = super(SortedDict, self).popitem() self.keyOrder.remove(result[0]) return result def items(self): return zip(self.keyOrder, self.values()) def iteritems(self): for key in self.keyOrder: yield key, super(SortedDict, self).__getitem__(key) def keys(self): return self.keyOrder[:] def iterkeys(self): return iter(self.keyOrder) def values(self): return map(super(SortedDict, self).__getitem__, self.keyOrder) def itervalues(self): for key in self.keyOrder: yield super(SortedDict, self).__getitem__(key) def update(self, dict_): for k, v in dict_.items(): self.__setitem__(k, v) def setdefault(self, key, default): if key not in self.keyOrder: self.keyOrder.append(key) return super(SortedDict, self).setdefault(key, default) def value_for_index(self, index): """Returns the value of the item at the given zero-based index.""" return self[self.keyOrder[index]] def insert(self, index, key, value): """Inserts the key, value pair before the item with the given index.""" if key in self.keyOrder: n = self.keyOrder.index(key) del self.keyOrder[n] if n < index: index -= 1 self.keyOrder.insert(index, key) super(SortedDict, self).__setitem__(key, value) def copy(self): """Returns a copy of this object.""" # This way of initializing the copy means it works for subclasses, too. obj = self.__class__(self) obj.keyOrder = self.keyOrder[:] return obj def __repr__(self): """ Replaces the normal dict.__repr__ with a version that returns the keys in their sorted order. """ return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in self.items()]) def clear(self): super(SortedDict, self).clear() self.keyOrder = []tegaki-python-0.3.1/tegaki/__init__.py0000644000175000017500000000024011352064314017520 0ustar mathieumathieu# -*- coding: utf-8 -*- VERSION = '0.3.1' import tegaki.character import tegaki.charcol import tegaki.recognizer import tegaki.trainer import tegaki.arrayutilstegaki-python-0.3.1/tegaki/character.py0000644000175000017500000011051711342122456017727 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2008-2009 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import xml.parsers.expat import cStringIO import gzip as gzipm try: import bz2 as bz2m except ImportError: pass from math import floor, atan, sin, cos, pi import os import hashlib try: # lxml is used for DTD validation # for server-side applications, it is recommended to install it # for desktop applications, it is optional from lxml import etree except ImportError: pass from tegaki.mathutils import euclidean_distance from tegaki.dictutils import SortedDict class Point(dict): """ A point in a 2-dimensional space. """ #: Attributes that a point can have. KEYS = ("x", "y", "pressure", "xtilt", "ytilt", "timestamp") def __init__(self, x=None, y=None, pressure=None, xtilt=None, ytilt=None, timestamp=None): """ @type x: int @type y: int @type pressure: float @type xtilt: float @type ytilt: float @type timestamp: int @param timestamp: ellapsed time since first point in milliseconds """ dict.__init__(self) self.x = x self.y = y self.pressure = pressure self.xtilt = xtilt self.ytilt = ytilt self.timestamp = timestamp def __getattr__(self, attr): try: return self[attr] except KeyError: raise AttributeError def __setattr__(self, attr, value): try: self[attr] = value except KeyError: raise AttributeError def get_coordinates(self): """ Return (x,y) coordinates. @rtype: tuple of two int @return: (x,y) coordinates """ return (self.x, self.y) def resize(self, xrate, yrate): """ Scale point. @type xrate: float @param xrate: the x scaling factor @type yrate: float @param yrate: the y scaling factor """ self.x = int(self.x * xrate) self.y = int(self.y * yrate) def move_rel(self, dx, dy): """ Translate point. @type dx: int @param dx: relative distance from x @type dy: int @param yrate: relative distance from y """ self.x = self.x + dx self.y = self.y + dy def to_xml(self): """ Converts point to XML. @rtype: str """ attrs = [] for key in self.KEYS: if self[key] is not None: attrs.append("%s=\"%s\"" % (key, str(self[key]))) return "" % " ".join(attrs) def to_json(self): """ Converts point to JSON. @rtype: str """ attrs = [] for key in self.KEYS: if self[key] is not None: attrs.append("\"%s\" : %d" % (key, int(self[key]))) return "{ %s }" % ", ".join(attrs) def to_sexp(self): """ Converts point to S-expressions. @rtype: str """ return "(%d %d)" % (self.x, self.y) def __eq__(self, othr): if not othr.__class__.__name__ in ("Point", "PointProxy"): return False for key in self.KEYS: if self[key] != othr[key]: return False return True def __ne__(self, othr): return not(self == othr) def copy_from(self, p): """ Replace point with another point. @type p: L{Point} @param p: the point to copy from """ self.clear() for k in p.keys(): if p[k] is not None: self[k] = p[k] def copy(self): """ Return a copy of point. @rtype: L{Point} """ return Point(**self) def __repr__(self): return "" % (self.x, self.y, id(self)) class Stroke(list): """ A sequence of L{Points}. """ def __init__(self): list.__init__(self) self._is_smoothed = False def get_coordinates(self): """ Return (x,y) coordinates. @rtype: a list of tuples """ return [(p.x, p.y) for p in self] def get_duration(self): """ Return the time that it took to draw the stroke. @rtype: int or None @return: time in millisecons or None if the information is not available """ if len(self) > 0: if self[-1].timestamp is not None and self[0].timestamp is not None: return self[-1].timestamp - self[0].timestamp return None def append_point(self, point): """ Append point to stroke. @type point: L{Point} """ self.append(point) def to_xml(self): """ Converts stroke to XML. @rtype: str """ s = "\n" for point in self: s += " %s\n" % point.to_xml() s += "" return s def to_json(self): """ Converts stroke to JSON. @rtype: str """ s = "{\"points\" : [" s += ",".join([point.to_json() for point in self]) s += "]}" return s def to_sexp(self): """ Converts stroke to S-expressions. @rtype: str """ return "(" + "".join([p.to_sexp() for p in self]) + ")" def __eq__(self, othr): if not othr.__class__.__name__ in ("Stroke", "StrokeProxy"): return False if len(self) != len(othr): return False for i in range(len(self)): if self[i] != othr[i]: return False return True def __ne__(self, othr): return not(self == othr) def copy_from(self, s): """ Replace stroke with another stroke. @type s: L{Stroke} @param s: the stroke to copy from """ self.clear() self._is_smoothed = s.get_is_smoothed() for p in s: self.append_point(p.copy()) def copy(self): """ Return a copy of stroke. @rtype: L{Stroke} """ c = Stroke() c.copy_from(self) return c def get_is_smoothed(self): """ Return whether the stroke has been smoothed already or not. @rtype: boolean """ return self._is_smoothed def smooth(self): """ Visually improve the rendering of stroke by averaging points with their neighbours. The method is based on a (simple) moving average algorithm. Let p = p(0), ..., p(N) be the set points of this stroke, w = w(-M), ..., w(0), ..., w(M) be a set of weights. This algorithm aims at replacing p with a set p' such as p'(i) = (w(-M)*p(i-M) + ... + w(0)*p(i) + ... + w(M)*p(i+M)) / S and where S = w(-M) + ... + w(0) + ... w(M). End points are not affected. """ if self._is_smoothed: return weights = [1, 1, 2, 1, 1] # Weights to be used times = 3 # Number of times to apply the algorithm if len(self) < len(weights): return offset = int(floor(len(weights) / 2.0)) wsum = sum(weights) for n in range(times): s = self.copy() for i in range(offset, len(self) - offset): self[i].x = 0 self[i].y = 0 for j in range(len(weights)): self[i].x += weights[j] * s[i + j - offset].x self[i].y += weights[j] * s[i + j - offset].y self[i].x = int(round(self[i].x / wsum)) self[i].y = int(round(self[i].y / wsum)) self._is_smoothed = True def clear(self): """ Remove all points from stroke. """ while len(self) != 0: del self[0] self._is_smoothed = False def downsample(self, n): """ Downsample by keeping only 1 sample every n samples. @type n: int """ if len(self) == 0: return new_s = Stroke() for i in range(len(self)): if i % n == 0: new_s.append_point(self[i]) self.copy_from(new_s) def downsample_threshold(self, threshold): """ Downsample by removing consecutive samples for which the euclidean distance is inferior to threshold. @type threshod: int """ if len(self) == 0: return new_s = Stroke() new_s.append_point(self[0]) last = 0 for i in range(1, len(self) - 2): u = [self[last].x, self[last].y] v = [self[i].x, self[i].y] if euclidean_distance(u, v) > threshold: new_s.append_point(self[i]) last = i new_s.append_point(self[-1]) self.copy_from(new_s) def upsample(self, n): """ 'Artificially' increase sampling by adding n linearly spaced points between consecutive points. @type n: int """ self._upsample(lambda d: n) def upsample_threshold(self, threshold): """ 'Artificially' increase sampling, using threshold to determine how many samples to add between consecutive points. @type threshold: int """ self._upsample(lambda d: int(floor(float(d) / threshold - 1))) def _upsample(self, func): """ 'Artificially' increase sampling, using func(distance) to determine how many samples to add between consecutive points. """ if len(self) == 0: return new_s = Stroke() for i in range(len(self)- 1): x1, y1 = [self[i].x, self[i].y] x2, y2 = [self[i+1].x, self[i+1].y] new_s.append_point(self[i]) dx = x2 - x1 dy = y2 - y1 if dx == 0: alpha = pi / 2 cosalpha = 0.0 sinalpha = 1.0 else: alpha = atan(float(abs(dy)) / abs(x2 - x1)) cosalpha = cos(alpha) sinalpha = sin(alpha) d = euclidean_distance([x1, y1], [x2, y2]) signx = cmp(dx, 0) signy = cmp(dy, 0) n = func(d) for j in range(1, n+1): dx = cosalpha * 1.0 / (n + 1) * d dy = sinalpha * 1.0 / (n + 1) * d new_s.append_point(Point(x=int(x1+j*dx*signx), y=int(y1+j*dy*signy))) new_s.append_point(self[-1]) self.copy_from(new_s) def __repr__(self): return "" % (len(self), id(self)) class Writing(object): """ A sequence of L{Strokes}. """ #: Default width and height of the canvas #: If the canvas used to create the Writing object #: has a different width or height, then #: the methods set_width and set_height need to be used WIDTH = 1000 HEIGHT = 1000 NORMALIZE_PROPORTION = 0.7 # percentage of the drawing area NORMALIZE_MIN_SIZE = 0.1 # don't nornalize if below that percentage def __init__(self): self._width = Writing.WIDTH self._height = Writing.HEIGHT self.clear() def clear(self): """ Remove all strokes from writing. """ self._strokes = [] def get_duration(self): """ Return the time that it took to draw the strokes. @rtype: int or None @return: time in millisecons or None if the information is not available """ if self.get_n_strokes() > 0: if self._strokes[0][0].timestamp is not None and \ self._strokes[-1][-1].timestamp is not None: return self._strokes[-1][-1].timestamp - \ self._strokes[0][0].timestamp return None def move_to(self, x, y): """ Start a new stroke at (x,y). @type x: int @type y: int """ # For compatibility point = Point() point.x = x point.y = y self.move_to_point(point) def line_to(self, x, y): """ Add point with coordinates (x,y) to the current stroke. @type x: int @type y: int """ # For compatibility point = Point() point.x = x point.y = y self.line_to_point(point) def move_to_point(self, point): """ Start a new stroke at point. @type point: L{Point} """ stroke = Stroke() stroke.append_point(point) self.append_stroke(stroke) def line_to_point(self, point): """ Add point to the current stroke. @type point: L{Point} """ self._strokes[-1].append(point) def get_n_strokes(self): """ Return the number of strokes. @rtype: int """ return len(self._strokes) def get_n_points(self): """ Return the total number of points. """ return sum([len(s) for s in self._strokes]) def get_strokes(self, full=False): """ Return strokes. @type full: boolean @param full: whether to return strokes as objects or as (x,y) pairs """ if not full: # For compatibility return [[(int(p.x), int(p.y)) for p in s] for s in self._strokes] else: return self._strokes def append_stroke(self, stroke): """ Add a new stroke. @type stroke: L{Stroke} """ self._strokes.append(stroke) def insert_stroke(self, i, stroke): """ Insert a stroke at a given position. @type stroke: L{Stroke} @type i: int @param i: position at which to add the stroke (starts at 0) """ self._strokes.insert(i, stroke) def remove_stroke(self, i): """ Remove the ith stroke. @type i: int @param i: position at which to delete a stroke (starts at 0) """ if self.get_n_strokes() - 1 >= i: del self._strokes[i] def remove_last_stroke(self): """ Remove last stroke. Equivalent to remove_stroke(n-1) where n is the number of strokes. """ if self.get_n_strokes() > 0: del self._strokes[-1] def replace_stroke(self, i, stroke): """ Replace the ith stroke with a new stroke. @type i: int @param i: position at which to replace a stroke (starts at 0) @type stroke: L{Stroke} @param stroke: the new stroke """ if self.get_n_strokes() - 1 >= i: self.remove_stroke(i) self.insert_stroke(i, stroke) def resize(self, xrate, yrate): """ Scale writing. @type xrate: float @param xrate: the x scaling factor @type yrate: float @param yrate: the y scaling factor """ for stroke in self._strokes: if len(stroke) == 0: continue stroke[0].resize(xrate, yrate) for point in stroke[1:]: point.resize(xrate, yrate) def move_rel(self, dx, dy): """ Translate writing. @type dx: int @param dx: relative distance from current position @type dy: int @param yrate: relative distance from current position """ for stroke in self._strokes: if len(stroke) == 0: continue stroke[0].move_rel(dx, dy) for point in stroke[1:]: point.move_rel(dx, dy) def size(self): """ Return writing size. @rtype: (x, y, width, height) @return: (x,y) are the coordinates of the upper-left point """ xmin, ymin = 4294967296, 4294967296 # 2^32 xmax, ymax = 0, 0 for stroke in self._strokes: for point in stroke: xmin = min(xmin, point.x) ymin = min(ymin, point.y) xmax = max(xmax, point.x) ymax = max(ymax, point.y) return (xmin, ymin, xmax-xmin, ymax-ymin) def normalize(self): """ Call L{normalize_size} and L{normalize_position} consecutively. """ self.normalize_size() self.normalize_position() def normalize_position(self): """ Translate character so as to have the same amount of space to each side of the drawing box. It improves the quality of characters by making them more centered on the drawing box. """ x, y, width, height = self.size() dx = (self._width - width) / 2 - x dy = (self._height - height) / 2 - y self.move_rel(dx, dy) def normalize_size(self): """ Scale character to match a given, fixed size. This improves the quality of characters which are too big or too small. """ # Note: you should call normalize_position() after normalize_size() x, y, width, height = self.size() if float(width) / self._width > Writing.NORMALIZE_MIN_SIZE: xrate = self._width * Writing.NORMALIZE_PROPORTION / width else: # Don't normalize if too thin in width xrate = 1.0 if float(height) / self._height > Writing.NORMALIZE_MIN_SIZE: yrate = self._height * Writing.NORMALIZE_PROPORTION / height else: # Don't normalize if too thin in height yrate = 1.0 self.resize(xrate, yrate) def downsample(self, n): """ Downsample by keeping only 1 sample every n samples. @type n: int """ for s in self._strokes: s.downsample(n) def downsample_threshold(self, threshold): """ Downsample by removing consecutive samples for which the euclidean distance is inferior to threshold. @type threshod: int """ for s in self._strokes: s.downsample_threshold(threshold) def upsample(self, n): """ 'Artificially' increase sampling by adding n linearly spaced points between consecutive points. @type n: int """ for s in self._strokes: s.upsample(n) def upsample_threshold(self, threshold): """ 'Artificially' increase sampling, using threshold to determine how many samples to add between consecutive points. @type threshold: int """ for s in self._strokes: s.upsample_threshold(threshold) def get_size(self): """ Return the size of the drawing box. @rtype: tuple Not to be confused with size() which returns the size the writing. """ return (self.get_width(), self.get_height()) def set_size(self, w, h): self.set_width(w) self.set_height(h) def get_width(self): """ Return the width of the drawing box. @rtype: int """ return self._width def set_width(self, width): """ Set the drawing box width. This is necessary if the points which are added were not drawn in 1000x1000 drawing box. """ self._width = width def get_height(self): """ Return the height of the drawing box. @rtype: int """ return self._height def set_height(self, height): """ Set the drawing box height. This is necessary if the points which are added were not drawn in 1000x1000 drawing box. """ self._height = height def to_xml(self): """ Converts writing to XML. @rtype: str """ s = "%d\n" % self.get_width() s += "%d\n" % self.get_height() s += "\n" for stroke in self._strokes: for line in stroke.to_xml().split("\n"): s += " %s\n" % line s += "" return s def to_json(self): """ Converts writing to JSON. @rtype: str """ s = "{ \"width\" : %d, " % self.get_width() s += "\"height\" : %d, " % self.get_height() s += "\"strokes\" : [" s += ", ".join([stroke.to_json() for stroke in self._strokes]) s += "]}" return s def to_sexp(self): """ Converts writing to S-expressions. @rtype: str """ return "((width %d)(height %d)(strokes %s))" % \ (self._width, self._height, "".join([s.to_sexp() for s in self._strokes])) def __eq__(self, othr): if not othr.__class__.__name__ in ("Writing", "WritingProxy"): return False if self.get_n_strokes() != othr.get_n_strokes(): return False if self.get_width() != othr.get_width(): return False if self.get_height() != othr.get_height(): return False othr_strokes = othr.get_strokes(full=True) for i in range(len(self._strokes)): if self._strokes[i] != othr_strokes[i]: return False return True def __ne__(self, othr): return not(self == othr) self.clear() self._is_smoothed = s.get_is_smoothed() for p in s: self.append_point(p.copy()) def copy_from(self, w): """ Replace writing with another writing. @type w: L{Writing} @param w: the writing to copy from """ self.clear() self.set_width(w.get_width()) self.set_height(w.get_height()) for s in w.get_strokes(True): self.append_stroke(s.copy()) def copy(self): """ Return a copy writing. @rtype: L{Writing} """ c = Writing() c.copy_from(self) return c def smooth(self): """ Smooth all strokes. See L{Stroke.smooth}. """ for stroke in self._strokes: stroke.smooth() def __repr__(self): return "" % (self.get_n_strokes(), id(self)) class _XmlBase(object): """ Class providing XML functionality to L{Character} and \ L{CharacterCollection}. """ @classmethod def validate(cls, string): """ Validate XML against a DTD. @type string: str @param string: a string containing XML DTD must be an attribute of cls. """ try: dtd = etree.DTD(cStringIO.StringIO(cls.DTD)) root = etree.XML(string.strip()) return dtd.validate(root) except etree.XMLSyntaxError: return False except NameError: # this means that the functionality is not available on that # system so you have to catch that exception if you want to # ignore it raise NotImplementedError def read(self, file, gzip=False, bz2=False, compresslevel=9): """ Read XML from a file. @type file: str or file @param file: path to file or file object @type gzip: boolean @param gzip: whether the file is gzip-compressed or not @type bz2: boolean @param bz2: whether the file is bzip2-compressed or not @type compresslevel: int @param compresslevel: compression level (see gzip module documentation) Raises ValueError if incorrect XML. """ parser = self._get_parser() try: if type(file) == str: if gzip: file = gzipm.GzipFile(file, compresslevel=compresslevel) elif bz2: try: file = bz2m.BZ2File(file, compresslevel=compresslevel) except NameError: raise NotImplementedError else: file = open(file) parser.ParseFile(file) file.close() else: parser.ParseFile(file) except (IOError, xml.parsers.expat.ExpatError): raise ValueError def read_string(self, string, gzip=False, bz2=False, compresslevel=9): """ Read XML from string. @type string: str @param string: string containing XML Other parameters are identical to L{read}. """ if gzip: io = cStringIO.StringIO(string) io = gzipm.GzipFile(fileobj=io, compresslevel=compresslevel) string = io.read() elif bz2: try: string = bz2m.decompress(string) except NameError: raise NotImplementedError parser = self._get_parser() parser.Parse(string) def write(self, file, gzip=False, bz2=False, compresslevel=9): """ Write XML to a file. @type file: str or file @param file: path to file or file object @type gzip: boolean @param gzip: whether the file need be gzip-compressed or not @type bz2: boolean @param bz2: whether the file need be bzip2-compressed or not @type compresslevel: int @param compresslevel: compression level (see gzip module documentation) """ if type(file) == str: if gzip: file = gzipm.GzipFile(file, "w", compresslevel=compresslevel) elif bz2: try: file = bz2m.BZ2File(file, "w", compresslevel=compresslevel) except NameError: raise NotImplementedError else: file = open(file, "w") file.write(self.to_xml()) file.close() else: file.write(self.to_xml()) def write_string(self, gzip=False, bz2=False, compresslevel=9): """ Write XML to string. @rtype: str @return: string containing XML Other parameters are identical to L{write}. """ if bz2: try: return bz2m.compress(self.to_xml(), compresslevel=compresslevel) except NameError: raise NotImplementedError elif gzip: io = cStringIO.StringIO() f = gzipm.GzipFile(fileobj=io, mode="w", compresslevel=compresslevel) f.write(self.to_xml()) f.close() return io.getvalue() else: return self.to_xml() def _get_parser(self): parser = xml.parsers.expat.ParserCreate(encoding="UTF-8") parser.StartElementHandler = self._start_element parser.EndElementHandler = self._end_element parser.CharacterDataHandler = self._char_data self._first_tag = True return parser class Character(_XmlBase): """ A handwritten character. A Character is composed of meta-data and handwriting data. Handwriting data are contained in L{Writing} objects. Building character objects ========================== A character can be built from scratch progmatically: >>> s = Stroke() >>> s.append_point(Point(10, 20)) >>> w = Writing() >>> w.append_stroke(s) >>> c = Character() >>> c.set_writing(writing) Reading XML files ================= A character can be read from an XML file: >>> c = Character() >>> c.read("myfile") Gzip-compressed and bzip2-compressed XML files can also be read: >>> c = Character() >>> c.read("myfilegz", gzip=True) >>> c = Character() >>> c.read("myfilebz", bz2=True) A similar method read_string exists to read the XML from a string instead of a file. For convenience, you can directly load a character by passing it the file to load. In that case, compression is automatically detected based on file extension (.gz, .bz2). >>> c = Character("myfile.xml.gz") The recommended extension for XML character files is .xml. Writing XML files ================= A character can be saved to an XML file by using the write() method. >>> c.write("myfile") The write method has gzip and bz2 arguments just like read(). In addition, there is a write_string method which generates a string instead of a file. For convenience, you can save a character with the save() method. It automatically detects compression based on the file extension. >>> c.save("mynewfile.xml.bz2") If the Character object was passed a file when it was constructed, the path can ce omitted. >>> c = Character("myfile.gz") >>> c.save() >>> c = Character() >>> c.save() Traceback (most recent call last): File "", line 1, in File "tegaki/character.py", line 1238, in save raise ValueError, "A path must be specified" ValueError: A path must be specified """ DTD = \ """ """ def __init__(self, path=None): """ Creates a new Character. @type path: str or None @param path: path to file to load or None if empty character The file extension is used to determine whether the file is plain, gzip-compressed or bzip2-compressed XML. """ self._writing = Writing() self._utf8 = None self._path = path if path is not None: gzip = True if path.endswith(".gz") or path.endswith(".gzip") \ else False bz2 = True if path.endswith(".bz2") or path.endswith(".bzip2") \ else False self.read(path, gzip=gzip, bz2=bz2) def get_utf8(self): """ Return the label of the character. @rtype: str """ return self._utf8 def get_unicode(self): """ Return the label character. @rtype: unicode """ return unicode(self.get_utf8(), "utf8") def set_utf8(self, utf8): """ Set the label the character. @type utf8: str """ self._utf8 = utf8 def set_unicode(self, uni): """ Set the label of the character. @type uni: unicode """ self._utf8 = uni.encode("utf8") def get_writing(self): """ Return the handwriting data of the character. @rtype: L{Writing} """ return self._writing def set_writing(self, writing): """ Set the handwriting data of the character. @type writing: L{Writing} """ self._writing = writing def hash(self): """ Return a sha1 digest for that character. """ return hashlib.sha1(self.to_xml()).hexdigest() def save(self, path=None): """ Save character to file. @type path: str @param path: path where to write the file or None if use the path \ that was given to the constructor The file extension is used to determine whether the file is plain, gzip-compressed or bzip2-compressed XML. """ if [path, self._path] == [None, None]: raise ValueError, "A path must be specified" elif path is None: path = self._path gzip = True if path.endswith(".gz") or path.endswith(".gzip") \ else False bz2 = True if path.endswith(".bz2") or path.endswith(".bzip2") \ else False self.write(path, gzip=gzip, bz2=bz2) def to_xml(self): """ Converts character to XML. @rtype: str """ s = "\n" s += "\n" if self._utf8: s += " %s\n" % self._utf8 for line in self._writing.to_xml().split("\n"): s += " %s\n" % line s += "" return s def to_json(self): """ Converts character to JSON. @rtype: str """ s = "{" attrs = ["\"utf8\" : \"%s\"" % self._utf8, "\"writing\" : " + self._writing.to_json()] s += ", ".join(attrs) s += "}" return s def to_sexp(self): """ Converts character to S-expressions. @rtype: str """ return "(character (value %s)" % self._utf8 + \ self._writing.to_sexp()[1:-1] def __eq__(self, char): if not char.__class__.__name__ in ("Character", "CharacterProxy"): return False return self._utf8 == char.get_utf8() and \ self._writing == char.get_writing() def __ne__(self, othr): return not(self == othr) self.clear() self.set_width(w.get_width()) self.set_height(w.get_height()) for s in w.get_strokes(True): self.append_stroke(s.copy()) def copy_from(self, c): """ Replace character with another character. @type c: L{Character} @param c: the character to copy from """ self.set_utf8(c.get_utf8()) self.set_writing(c.get_writing().copy()) def copy(self): """ Return a copy of character. @rtype: L{Character} """ c = Character() c.copy_from(self) return c def __repr__(self): return "" % (str(self.get_utf8()), id(self)) # Private... def _start_element(self, name, attrs): self._tag = name if self._first_tag: self._first_tag = False if self._tag != "character": raise ValueError, "The very first tag should be " if self._tag == "stroke": self._stroke = Stroke() elif self._tag == "point": point = Point() for key in ("x", "y", "pressure", "xtilt", "ytilt", "timestamp"): if attrs.has_key(key): value = attrs[key].encode("UTF-8") if key in ("pressure", "xtilt", "ytilt"): value = float(value) else: value = int(float(value)) else: value = None setattr(point, key, value) self._stroke.append_point(point) def _end_element(self, name): if name == "character": for s in ["_tag", "_stroke"]: if s in self.__dict__: del self.__dict__[s] if name == "stroke": if len(self._stroke) > 0: self._writing.append_stroke(self._stroke) self._stroke = None self._tag = None def _char_data(self, data): if self._tag == "utf8": self._utf8 = data.encode("UTF-8") elif self._tag == "width": self._writing.set_width(int(data)) elif self._tag == "height": self._writing.set_height(int(data)) tegaki-python-0.3.1/tegaki/engine.py0000644000175000017500000000664211342122456017243 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2009 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import os import platform from tegaki.dictutils import SortedDict class Engine(object): """ Base class for Recognizer and Trainer. """ @classmethod def _get_search_path(cls, what): """ Return a list of search path. @typ what: str @param what: "models" or "engines" """ libdir = os.path.dirname(os.path.abspath(__file__)) try: # UNIX homedir = os.environ['HOME'] homeengines = os.path.join(homedir, ".tegaki", what) except KeyError: # Windows homedir = os.environ['USERPROFILE'] homeengines = os.path.join(homedir, "tegaki", what) search_path = [# For Unix "/usr/local/share/tegaki/%s/" % what, "/usr/share/tegaki/%s/" % what, # for Maemo "/media/mmc1/tegaki/%s/" % what, "/media/mmc2/tegaki/%s/" % what, # personal directory homeengines, # lib dir os.path.join(libdir, what)] # For Windows try: search_path += [os.path.join(os.environ["APPDATA"], "tegaki", what), r"C:\Python25\share\tegaki\%s" % what, r"C:\Python26\share\tegaki\%s" % what] except KeyError: pass # For OSX if platform.system() == "Darwin": search_path += [os.path.join(homedir, "Library", "Application Support", "tegaki", what), os.path.join("/Library", "Application Support", "tegaki", what)] try: env = {"engines": "TEGAKI_ENGINE_PATH", "models" : "TEGAKI_MODEL_PATH"}[what] if env in os.environ and \ os.environ[env].strip() != "": search_path += os.environ[env].strip().split(os.path.pathsep) except KeyError: pass return search_path @classmethod def read_meta_file(cls, meta_file): """ Read a .meta file. @type meta_file: str @param meta_file: meta file file to read @rtype: dict """ f = open(meta_file) ret = SortedDict() for line in f.readlines(): try: key, value = [s.strip() for s in line.strip().split("=")] ret[key] = value except ValueError: continue f.close() return ret tegaki-python-0.3.1/tegaki/mathutils.py0000644000175000017500000000236211342122456020003 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2008 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel from math import sqrt, hypot, atan2, pi def euclidean_distance(v1, v2): assert(len(v1) == len(v2)) return sqrt(sum([(v2[i] - v1[i]) ** 2 for i in range(len(v1))])) def cartesian_to_polar(x, y): """ Cartesian to polar coordinates conversion. r is the distance to the point. teta is the angle to the point between 0 and 2 pi. """ r = hypot(x, y) teta = atan2(y, x) + pi return (r, teta)tegaki-python-0.3.1/tegaki/engines/0000755000175000017500000000000011352066723017051 5ustar mathieumathieutegaki-python-0.3.1/tegaki/engines/__init__.py0000644000175000017500000000000011342122456021143 0ustar mathieumathieutegaki-python-0.3.1/tegaki/engines/tegakizinnia.py0000644000175000017500000000636211342122456022102 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2008-2009 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import os from tegaki.recognizer import Recognizer, RecognizerError from tegaki.trainer import Trainer, TrainerError try: import zinnia class ZinniaRecognizer(Recognizer): RECOGNIZER_NAME = "zinnia" def __init__(self): Recognizer.__init__(self) self._recognizer = zinnia.Recognizer() def open(self, path): ret = self._recognizer.open(path) if not ret: raise RecognizerError, "Could not open!" def recognize(self, writing, n=10): s = zinnia.Character() s.set_width(writing.get_width()) s.set_height(writing.get_height()) strokes = writing.get_strokes() for i in range(len(strokes)): stroke = strokes[i] for x, y in stroke: s.add(i, x, y) result = self._recognizer.classify(s, n+1) size = result.size() return [(result.value(i), result.score(i)) \ for i in range(0, (size - 1))] RECOGNIZER_CLASS = ZinniaRecognizer class ZinniaTrainer(Trainer): TRAINER_NAME = "zinnia" def __init__(self): Trainer.__init__(self) def train(self, charcol, meta, path=None): self._check_meta(meta) trainer = zinnia.Trainer() zinnia_char = zinnia.Character() for set_name in charcol.get_set_list(): for character in charcol.get_characters(set_name): if (not zinnia_char.parse(character.to_sexp())): raise TrainerError, zinnia_char.what() else: trainer.add(zinnia_char) if not path: if "path" in meta: path = meta["path"] else: path = os.path.join(os.environ['HOME'], ".tegaki", "models", "zinnia", meta["name"] + ".model") else: path = os.path.abspath(path) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) meta_file = path.replace(".model", ".meta") if not meta_file.endswith(".meta"): meta_file += ".meta" trainer.train(path) self._write_meta_file(meta, meta_file) TRAINER_CLASS = ZinniaTrainer except ImportError: pass tegaki-python-0.3.1/tegaki/trainer.py0000644000175000017500000001003411342122456017430 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2008-2009 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import glob import os import imp from cStringIO import StringIO from tegaki.engine import Engine from tegaki.dictutils import SortedDict class TrainerError(Exception): """ Raised when something went wrong in a Trainer. """ pass class Trainer(Engine): """ Base Trainer class. A trainer can train models based on sample data annotated with labels. """ def __init__(self): pass @classmethod def get_available_trainers(cls): """ Return trainers installed on the system. @rtype: dict @return: a dict where keys are trainer names and values \ are trainer classes """ if not "available_trainers" in cls.__dict__: cls._load_available_trainers() return cls.available_trainers @classmethod def _load_available_trainers(cls): cls.available_trainers = SortedDict() for directory in cls._get_search_path("engines"): if not os.path.exists(directory): continue for f in glob.glob(os.path.join(directory, "*.py")): if f.endswith("__init__.py") or f.endswith("setup.py"): continue module_name = os.path.basename(f).replace(".py", "") module_name += "trainer" module = imp.load_source(module_name, f) try: name = module.TRAINER_CLASS.TRAINER_NAME cls.available_trainers[name] = module.TRAINER_CLASS except AttributeError: pass def set_options(self, options): """ Process trainer/model specific options. @type options: dict @param options: a dict where keys are option names and values are \ option values """ pass # To be implemented by child class def train(self, character_collection, meta, path=None): """ Train a model. @type character_collection: L{CharacterCollection} @param character_collection: collection containing training data @type meta: dict @param meta: meta dict obtained with L{Engine.read_meta_file} @type path: str @param path: path to the ouput model \ (if None, the personal directory is assumed) The meta dict needs the following keys: - name: full name (mandatory) - shortname: name with less than 3 characters (mandatory) - language: model language (optional) """ raise NotImplementedError def _check_meta(self, meta): if not meta.has_key("name") or not meta.has_key("shortname"): raise TrainerError, "meta must contain a name and a shortname" def _write_meta_file(self, meta, meta_file): io = StringIO() for k,v in meta.items(): io.write("%s = %s\n" % (k,v)) if os.path.exists(meta_file): f = open(meta_file) contents = f.read() f.close() # don't rewrite the file if same if io.getvalue() == contents: return f = open(meta_file, "w") f.write(io.getvalue()) f.close() tegaki-python-0.3.1/tegaki/arrayutils.py0000644000175000017500000001605411342122456020173 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2008 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel __doctest__ = True def array_sample(arr, rate): """ Sample array. @type arr: list/tuple/array @param arr: the list/tuple/array to sample @type rate: float @param rate: the rate (between 0 and 1.0) at which to sample @rtype: list @return: the sampled list >>> array_sample([1,2,3,4,5,6], 0.5) [1, 3, 5] """ n = int(round(1 / rate)) return [arr[i] for i in range(0, len(arr), n)] def array_flatten(l, ltypes=(list, tuple)): """ Reduce array of possibly multiple dimensions to one dimension. @type l: list/tuple/array @param l: the list/tuple/array to flatten @rtype: list @return: the flatten list >>> array_flatten([[1,2,3], [4,5], [[7,8]]]) [1, 2, 3, 4, 5, 7, 8] """ i = 0 while i < len(l): while isinstance(l[i], ltypes): if not l[i]: l.pop(i) if not len(l): break else: l[i:i+1] = list(l[i]) i += 1 return l def array_reshape(arr, n): """ Reshape one-dimensional array to a list of n-element lists. @type arr: list/tuple/array @param arr: the array to reshape @type n: int @param n: the number of elements in each list @rtype: list @return: the reshaped array >>> array_reshape([1,2,3,4,5,6,7,8,9], 3) [[1, 2, 3], [4, 5, 6], [7, 8, 9]] """ newarr = [] subarr = [] i = 0 for ele in arr: subarr.append(ele) i += 1 if i % n == 0 and i != 0: newarr.append(subarr) subarr = [] return newarr def array_split(seq, p): """ Split an array into p arrays of about the same size. @type seq: list/tuple/array @param seq: the array to split @type p: int @param p: the split size @rtype: list @return: the split array >>> array_split([1,2,3,4,5,6,7], 3) [[1, 2, 3], [4, 5], [6, 7]] """ newseq = [] n = len(seq) / p # min items per subsequence r = len(seq) % p # remaindered items b,e = 0, n + min(1, r) # first split for i in range(p): newseq.append(seq[b:e]) r = max(0, r-1) # use up remainders b,e = e, e + n + min(1, r) # min(1,r) is always 0 or 1 return newseq def array_mean(arr): """ Calculate the mean of the elements contained in an array. @type arr: list/tuple/array @rtype: float @return: mean >>> array_mean([100, 150, 300]) 183.33333333333334 """ return float(sum(arr)) / float(len(arr)) def array_variance(arr, mean=None): """ Calculate the variance of the elements contained in an array. @type arr: list/tuple/array @rtype: float @return: variance >>> array_variance([100, 150, 300]) 7222.2222222222226 """ if mean is None: mean = array_mean(arr) var = array_mean([(val - mean) ** 2 for val in arr]) if var == 0.0: return 1.0 else: return var def array_mean_vector(vectors): """ Calculate the mean of the vectors, element-wise. @type arr: list of vectors @rtype: list of floats @return: list of means >>> array_mean_vector([[10,20], [100, 200]]) [55.0, 110.0] """ assert(len(vectors) > 0) n_dimensions = len(vectors[0]) mean_vector = [] for i in range(n_dimensions): arr = [vector[i] for vector in vectors] mean_vector.append(array_mean(arr)) return mean_vector def array_variance_vector(vectors, means=None): """ Calculate the variance of the vectors, element-wise. @type arr: list of vectors @rtype: list of floats @return: list of variances >>> array_variance_vector([[10,20], [100, 200]]) [2025.0, 8100.0] """ assert(len(vectors) > 0) n_dimensions = len(vectors[0]) if means is not None: assert(n_dimensions == len(means)) else: means = array_mean_vector(vectors) variance_vector = [] for i in range(n_dimensions): arr = [vector[i] for vector in vectors] variance_vector.append(array_variance(arr, means[i])) return variance_vector def array_covariance_matrix(vectors, non_diagonal=False): """ Calculate the covariance matrix of vectors. @type vectors: list of arrays @type non_diagonal: boolean @param non_diagonal: whether to calculate non-diagonal elements of the \ matrix or not >>> array_covariance_matrix([[10,20], [100, 200]]) [2025.0, 0.0, 0.0, 8100.0] >>> array_covariance_matrix([[10,20], [100, 200]], non_diagonal=True) [2025.0, 4050.0, 4050.0, 8100.0] """ assert(len(vectors) > 0) n_dimensions = len(vectors[0]) cov_matrix = [] for i in range(n_dimensions): for j in range(n_dimensions): if i == j: # diagonal value: COV(X,X) = VAR(X) arr = [vector[i] for vector in vectors] cov_matrix.append(array_variance(arr)) else: # non-diagonal value if non_diagonal: # COV(X,Y) = E(XY) - E(X)E(Y) arr_x = [vector[i] for vector in vectors] arr_y = [vector[j] for vector in vectors] arr_xy = array_mul(arr_x, arr_y) mean_xy = array_mean(arr_xy) mean_x = array_mean(arr_x) mean_y = array_mean(arr_y) cov_matrix.append(mean_xy - mean_x * mean_y) else: # X and Y indep => COV(X,Y) = 0 cov_matrix.append(0.0) return cov_matrix def array_add(arr1, arr2): """ Add two arrays element-wise. >>> array_add([1,2],[3,4]) [4, 6] """ assert(len(arr1) == len(arr1)) newarr = [] for i in range(len(arr1)): newarr.append(arr1[i] + arr2[i]) return newarr def array_mul(arr1, arr2): """ Multiply two arrays element-wise. >>> array_mul([1,2],[3,4]) [3, 8] """ assert(len(arr1) == len(arr1)) newarr = [] for i in range(len(arr1)): newarr.append(arr1[i] * arr2[i]) return newarr if __name__ == '__main__': import doctest doctest.testmod()tegaki-python-0.3.1/tegaki/recognizer.py0000644000175000017500000001671411342122456020146 0ustar mathieumathieu# -*- coding: utf-8 -*- # Copyright (C) 2008-2009 The Tegaki project contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # Contributors to this file: # - Mathieu Blondel import glob import os import imp from tegaki.engine import Engine from tegaki.dictutils import SortedDict class RecognizerError(Exception): """ Raised when something went wrong in a Recognizer. """ pass class Recognizer(Engine): """ Base Recognizer class. A recognizer can recognize handwritten characters based on a model. The L{open} method should be used to load a model from an absolute path on the disk. The L{set_model} method should be used to load a model from its name. Two models can't have the same name within one recognizer. However, two models can be named the same if they belong to two different recognizers. Recognizers usually have a corresponding L{Trainer}. """ def __init__(self): self._model = None @classmethod def get_available_recognizers(cls): """ Return recognizers installed on the system. @rtype: dict @return: a dict where keys are recognizer names and values \ are recognizer classes """ if not "available_recognizers" in cls.__dict__: cls._load_available_recognizers() return cls.available_recognizers @classmethod def _load_available_recognizers(cls): cls.available_recognizers = SortedDict() for directory in cls._get_search_path("engines"): if not os.path.exists(directory): continue for f in glob.glob(os.path.join(directory, "*.py")): if f.endswith("__init__.py") or f.endswith("setup.py"): continue module_name = os.path.basename(f).replace(".py", "") module_name += "recognizer" module = imp.load_source(module_name, f) try: name = module.RECOGNIZER_CLASS.RECOGNIZER_NAME cls.available_recognizers[name] = module.RECOGNIZER_CLASS except AttributeError: pass @staticmethod def get_all_available_models(): """ Return available models from all recognizers. @rtype: list @return: a list of tuples (recognizer_name, model_name, meta_dict) """ all_models = [] for r_name, klass in Recognizer.get_available_recognizers().items(): for model_name, meta in klass.get_available_models().items(): all_models.append([r_name, model_name, meta]) return all_models @classmethod def get_available_models(cls): """ Return available models for the current recognizer. @rtype; dict @return: a dict where keys are models names and values are meta dict """ if "available_models" in cls.__dict__: return cls.available_models else: name = cls.RECOGNIZER_NAME cls.available_models = cls._get_available_models(name) return cls.__dict__["available_models"] @classmethod def _get_available_models(cls, recognizer): available_models = SortedDict() for directory in cls._get_search_path("models"): directory = os.path.join(directory, recognizer) if not os.path.exists(directory): continue meta_files = glob.glob(os.path.join(directory, "*.meta")) for meta_file in meta_files: meta = Recognizer.read_meta_file(meta_file) if not meta.has_key("name") or \ not meta.has_key("shortname"): continue model_file = meta_file.replace(".meta", ".model") if meta.has_key("path") and not os.path.exists(meta["path"]): # skip model if specified path is incorrect continue elif not meta.has_key("path") and os.path.exists(model_file): # if path option is missing, assume the .model file # is in the same directory meta["path"] = model_file available_models[meta["name"]] = meta return available_models def open(self, path): """ Open a model. @type path: str @param path: model path Raises RecognizerError if could not open. """ raise NotImplementedError def set_options(self, options): """ Process recognizer/model specific options. @type options: dict @param options: a dict where keys are option names and values are \ option values """ pass def get_model(self): """ Return the currently selected model. @rtype: str @return: name which identifies model uniquely on the system """ return self._model def set_model(self, model_name): """ Set the currently selected model. @type model_name: str @param model_name: name which identifies model uniquely on the system model_name must exist for that recognizer. """ if not model_name in self.__class__.get_available_models(): raise RecognizerError, "Model does not exist" self._model = model_name meta = self.__class__.get_available_models()[model_name] self.set_options(meta) path = meta["path"] self.open(path) # To be implemented by child class def recognize(self, writing, n=10): """ Recognizes handwriting. @type writing: L{Writing} @param writing: the handwriting to recognize @type n: int @param n: the number of candidates to return @rtype: list @return: a list of tuple (label, probability/distance) A model must be loaded with open or set_model() beforehand. """ raise NotImplementedError if __name__ == "__main__": import sys from tegaki.character import Character recognizer = sys.argv[1] # name of recognizer model = sys.argv[2] # name of model file char = Character() char.read(sys.argv[3]) # path of .xml file writing = char.get_writing() recognizers = Recognizer.get_available_recognizers() print "Available recognizers", recognizers if not recognizer in recognizers: raise Exception, "Not an available recognizer" recognizer_klass = recognizers[recognizer] recognizer = recognizer_klass() models = recognizer_klass.get_available_models() print "Available models", models if not model in models: raise Exception, "Not an available model" recognizer.set_model(model) print recognizer.recognize(writing) tegaki-python-0.3.1/README0000644000175000017500000000147111342122457015034 0ustar mathieumathieutegaki-python ============= Base Python library for the Tegaki project. Requirements ------------ Python 2.4 http://www.python.org Zinnia * 0.05 http://zinnia.sourceforge.net/ [*] The Python bindings are also required. Install ------- ($ su) # python setup.py install Developer --------- If you intend to contribute to Tegaki, the most convenient way to install it is to add the following to your ~/.bashrc file. tegakiroot=/path/to/hwr/ export PYTHONPATH=$tegakiroot/tegaki-python:$tegakiroot/tegaki-pygtk In order to download the latest source code, please use git. $ git clone http://www.mblondel.org/code/hwr.git License ------- This package is distributed under the terms of the GNU General Public License. See the COPYING file for more information. Homepage -------- http://www.tegaki.org tegaki-python-0.3.1/ChangeLog0000644000175000017500000002344311352066723015735 0ustar mathieumathieucommit 5b0b86343c91cbf3d7ee683e82f41f9df75edf4d Author: Mathieu Blondel Date: Mon Mar 22 15:32:33 2010 +0900 [all] Set current version to 0.3.1. commit d232e5c84913508872ab3c161d1dfbf3cbefad6a Author: Mathieu Blondel Date: Tue Jan 12 13:43:05 2010 +0900 [tegaki-python] Fixed bug in CharacterCollection.remove_samples. commit 48884434905f6c8e8b103b1acccaf26dfb957365 Author: Mathieu Blondel Date: Wed Dec 23 20:25:51 2009 +0900 [tegaki-python] Added get_size() and set_size(). commit dc82b0befb3339f69193be3f68900ace4eff0045 Author: Mathieu Blondel Date: Fri Dec 11 22:24:28 2009 +0900 [tegaki-python] Added __repr__ for a few objects. commit 9b396782428e65e2b97b2dfe1b5b8a9362287104 Author: Mathieu Blondel Date: Thu Dec 10 17:44:16 2009 +0900 [tegaki-python] Fixed pb in CharacterCollection.get_n_characters(). commit 47afc05133c10b29a38d7aec1e1f180d69f4e365 Author: Mathieu Blondel Date: Wed Dec 9 17:32:18 2009 +0900 [tegaki-python] Updated ToDo list. commit 62a09950deea0094f16974bf509c3ee03bfca2da Author: Mathieu Blondel Date: Wed Dec 9 14:10:39 2009 +0900 [tegaki-python] Various optimizations to CharacterCollection. commit 3ce946ca19fe4b13a737b5e3ec2834cdab30ee6c Author: Mathieu Blondel Date: Fri Dec 4 21:52:28 2009 +0900 [tegaki-python] Added get_total_n_strokes to CharacterCollection. commit e26b64edc059dd40a33f259546b6dc2da1ace52d Author: Mathieu Blondel Date: Thu Dec 3 18:48:04 2009 +0900 [tegaki-python] Added get_random_characters() to CharacterCollection. commit cb9274dd1e879e80ecb219d268a3f3c9b7de5cb7 Author: Mathieu Blondel Date: Thu Dec 3 16:36:33 2009 +0900 [tegaki-python] Renamed PROXY option to WRITE_BACK. commit 50cad5c4779dde302de7fec6fb6af5b763797617 Author: Mathieu Blondel Date: Thu Dec 3 16:30:42 2009 +0900 [tegaki-python] Added get_average_n_strokes to CharacterCollection. commit 9d01b64a775f15733a9a62098fe534faa9b33d79 Author: Mathieu Blondel Date: Thu Dec 3 16:22:11 2009 +0900 [tegaki-python] Sync SortedDict with upstream. commit a656fed9c7473daa790fc52179bd602800e4ff7e Author: Mathieu Blondel Date: Thu Dec 3 16:01:51 2009 +0900 [tegaki-python] Fixed variance. commit b0106a723d71c2f79f01d9a8f8b6e2bb05f18a13 Author: Mathieu Blondel Date: Thu Dec 3 12:43:49 2009 +0900 [tegaki-python] Added generator-based methods for get_characters and get_all_characters. Use these methods when you retrieve a lot of characters. commit 4257abf1f0c8d5302bc874ce46dae783def9f12d Author: Mathieu Blondel Date: Thu Dec 3 12:05:37 2009 +0900 [tegaki-python] Run doctests as well in runtests.py. commit 167c43ea557d08f4b4e31ccda45750bd231211ee Author: Mathieu Blondel Date: Wed Dec 2 20:57:02 2009 +0900 [tegaki-python] More documentation about Character and CharacterCollection. commit f95f32762ac38cd12b6659593353dd7b5c1a49be Author: Mathieu Blondel Date: Wed Dec 2 20:37:26 2009 +0900 [tegaki-python] Added add_sets() method to CharacterCollection. commit 164d927e1b399e411c7d8519c70845d344b321dc Author: Mathieu Blondel Date: Wed Dec 2 20:22:28 2009 +0900 [tegaki-python] Handle XML files CharacterCollection constructor and add a new save() method. commit 3ad6e525dd61bfa15ae2539042d97ee9759ede64 Author: Mathieu Blondel Date: Wed Dec 2 18:34:26 2009 +0900 [tegaki-python] Added optional path argument to Character constructor and a new save() method. commit a092a357343c67826efb1466ad33bc470c3487d7 Author: Mathieu Blondel Date: Wed Dec 2 16:45:03 2009 +0900 [tegaki-python] Added inline documentation. commit 58fbebb75d7ee6b5484c2838c0272b950dc1e35a Author: Mathieu Blondel Date: Tue Dec 1 17:52:32 2009 +0900 [tegaki-python] Moved read_meta_file to Engine class. commit 9829c06728ffe4ab00c7f5766090d467e178e9c1 Author: Mathieu Blondel Date: Tue Dec 1 17:26:17 2009 +0900 [tegaki-python] CharacterCollection now uses sqlite as backend. commit 04f1db381e24f20f3b406694e092a810186e944a Author: Mathieu Blondel Date: Sun Nov 29 21:26:48 2009 +0900 [tegaki-python] Fixed system-wide path on Mac OS X. commit 23428970b5125e6effc233ff3261e6cb368ff93d Author: Mathieu Blondel Date: Sun Nov 1 20:56:48 2009 +0900 [all] Include ChangeLog. commit ac086c4a5e34df5552483b26cee5ff1f6d0aad2b Author: Mathieu Blondel Date: Sun Nov 1 19:34:42 2009 +0900 [tegaki-python] Removed NEWS file. commit 4ac1c409c8b6e80fe86e6109837685f7fa390556 Author: Mathieu Blondel Date: Sun Oct 25 20:16:59 2009 +0900 [tegaki-python] Bumped to version 0.3 commit 3014ea2a47ba070169e270b1fd2ff104708bd64b Author: Mathieu Blondel Date: Mon Oct 19 17:38:21 2009 +0900 [tegaki-python] Added include_characters_from_files() and exclude_characters_from_files() to CharacterCollection. commit 5cad9cbf608c32adc6209848a28bb5b02b8cccb7 Author: Mathieu Blondel Date: Sun Oct 11 05:21:05 2009 +0900 [tegaki-engines] Fixed bug when meta file is a relative path. commit e055d58ef725ab0d7f1a94cebcc3edcc7983155b Author: Mathieu Blondel Date: Sun Sep 27 02:27:43 2009 +0900 [tegaki-python] Added directories for win32 and osx plus refactored some code. commit 799a160957b39ee1d87cad99c64366dfb368bbf7 Author: Mathieu Blondel Date: Sun Sep 27 01:30:07 2009 +0900 [all] Distutils on python 2.6 for win32 doesn't like end slashes for directories. commit 8d081be5fe5dafa2e295cd2c3f8abbaaf2626fde Author: Mathieu Blondel Date: Fri Sep 18 01:44:35 2009 +0900 [tegaki-python] Added remove_samples() to CharacterCollection. commit 2686e5c55b1a801508a1ff9441de83d15d51dbbd Author: Mathieu Blondel Date: Thu Sep 17 20:23:57 2009 +0900 [tegaki-python] Added a check_duplicate option to __add__. commit 2261ed94de25d0202d1e841f1f5c6ce9053035c1 Author: Mathieu Blondel Date: Thu Sep 17 00:32:28 2009 +0900 [tegaki-lab] Added Jahmm (http://code.google.com/p/jahmm/) support when run with Jython2.5. commit dee6ba1a576041f0f1c8d34a00e74cfb1685bd2a Author: Mathieu Blondel Date: Sat Sep 12 18:07:23 2009 +0900 [tegaki-python] A few new methods + tests. commit 6d85820de3d29bc6db4893c6ab2fa94ade514019 Author: Mathieu Blondel Date: Wed Aug 19 10:19:48 2009 +0900 [tegaki-python] The engines didn't get installed. commit c62e4450127c064d4f11ab708a2e1696d62babf6 Author: Mathieu Blondel Date: Fri Jul 31 21:30:41 2009 +0900 [tegaki-python] Don't rewrite .meta file if its content didn't change. commit 59f1a1d79a7ba289abeba9434267d2be2b12468b Author: Mathieu Blondel Date: Fri Jul 31 20:44:17 2009 +0900 [tegaki-python] Support recognizer/trainer options. commit 296311816535680513cbaf29d4d359f0a9049d35 Author: ian johnson Date: Tue Jul 28 14:18:55 2009 -0400 [tegaki-db] merging with Erik's branch commit ef55bbf1902c3fcd2252cf327829c4a6c664cc24 Merge: 6d58e08 19e72ea Author: ian johnson Date: Tue Jul 28 13:16:47 2009 -0400 Merge branch 'master' of git://github.com/enoex/hwr commit 19e72eaf1dd3cc26654f8e9e93af4e5b32dff3b8 Author: erik Date: Mon Jul 27 16:33:32 2009 -0400 Updated layout for most pages commit a33a4ad2adb308212537b13a76b1f50ff3952636 Author: Mathieu Blondel Date: Sun Jul 26 16:18:04 2009 +0200 [tegaki-python] Don't load empty strokes. commit 7e679657da6bf4aac8a215f207aafdc74329ea08 Author: Mathieu Blondel Date: Sun Jul 26 16:06:37 2009 +0200 [tegaki-python] Don't load modules called setup.py. commit a10ef239742e7d42c38a9183b3058f723a3cb21b Author: Mathieu Blondel Date: Fri Jul 24 08:34:53 2009 +0200 [tegaki-python] Added TEGAKI_ENGINE_PATH and TEGAKI_MODEL_PATH environment variables. commit 430adef612884ea7b2bc619851eac391932063c7 Author: Mathieu Blondel Date: Fri Jul 24 05:20:52 2009 +0200 [tegaki-python] Merged recognizers and trainers into one file. This is more convenient. commit 05cb3ee3471599829412d8ccc3bc4adabe581dea Author: Mathieu Blondel Date: Thu Jul 23 20:38:38 2009 +0900 [tegaki-python] Pluginify Recognizer and Trainer. commit 072589a2417565b3697ae3abc8e547b06ccde39d Author: Mathieu Blondel Date: Sun Jul 19 11:45:53 2009 +0200 [all] Update project URL. commit f09bf2f5a99907d9484758f214b8f8c9cda3ba9e Author: Mathieu Blondel Date: Sun Jul 19 11:37:41 2009 +0200 [tegaki-python] Fixed setup.py. commit beaaed5713b0ce999a1e13fec2f4b9a2b864f8f9 Author: Mathieu Blondel Date: Sun Jul 19 07:07:18 2009 +0200 [tegaki-python] Updated README. commit be3647c410dadf5a647973312728976812e15c64 Author: Mathieu Blondel Date: Sun Jul 19 05:48:36 2009 +0200 [tegaki-python] Added NEWS file. commit 10ea23607f9a0879e1c1c459cf83e763286871da Author: Mathieu Blondel Date: Sun Jul 19 05:23:37 2009 +0200 [tegaki,tegaki-gtk] Renamed the packages to respectively tegaki-python and tegaki-pygtk to make it clear that they are Python packages. tegaki-python-0.3.1/setup.py0000644000175000017500000000137111342122456015664 0ustar mathieumathieu# -*- coding: utf-8 -*- from distutils.core import setup import os def getversion(): currdir = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(currdir, "tegaki", "__init__.py") import re regexp = re.compile(r"VERSION = '([^']*)'") f = open(path) buf = f.read() f.close() return regexp.search(buf).group(1) # Please run # python setup.py install setup( name = 'tegaki-python', description = 'Tegaki Python library', author = 'Mathieu Blondel', author_email = 'mathieu ÂT mblondel DÔT org', url = 'http://www.tegaki.org', version = getversion(), license='GPL', packages = ['tegaki', 'tegaki.engines'], package_dir = {'tegaki':'tegaki', 'tegaki.engines':'tegaki/engines'} )tegaki-python-0.3.1/MANIFEST.in0000644000175000017500000000024411342122456015706 0ustar mathieumathieuinclude AUTHORS COPYING COPYRIGHT README TODO MANIFEST.in ChangeLog exclude *~ *.orig *.bak *.tmp *.pyc MANIFEST recursive-include tests * recursive-include data * tegaki-python-0.3.1/COPYRIGHT0000644000175000017500000000027411342122457015447 0ustar mathieumathieuThe code included in this package is copyrighted by Mathieu Blondel : Copyright (C) 2008 Mathieu Blondel. All Rights Reserved. See the COPYING File for the terms of distribution licence.tegaki-python-0.3.1/TODO0000644000175000017500000000057011342122456014642 0ustar mathieumathieu- Character transition model (bigram) - API to retrieve the most suitable recognizer and model available for some task or given some requirements (language, characters included, stroke-order free...) -> This is useful for applications (e.g. games) that want to use Tegaki as a service without asking the user to explicitly choose a recognizer or model.