From 8d32bef706153674359da84bb4d600b255976a83 Mon Sep 17 00:00:00 2001 From: Stephen Pascoe Date: Sun, 23 Oct 2011 21:42:03 +0100 Subject: [PATCH 1/2] Option to create canonical packing by sorting dictionary keys. --- msgpack/_msgpack.pyx | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx index 7a817466..2b97f861 100644 --- a/msgpack/_msgpack.pyx +++ b/msgpack/_msgpack.pyx @@ -50,6 +50,7 @@ cdef class Packer(object): cdef object _berrors cdef char *encoding cdef char *unicode_errors + cdef bool _canonical def __cinit__(self): cdef int buf_size = 1024*1024 @@ -59,7 +60,8 @@ cdef class Packer(object): self.pk.buf_size = buf_size self.pk.length = 0 - def __init__(self, default=None, encoding='utf-8', unicode_errors='strict'): + def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + canonical=False): if default is not None: if not PyCallable_Check(default): raise TypeError("default must be a callable.") @@ -79,6 +81,8 @@ cdef class Packer(object): self._berrors = unicode_errors self.unicode_errors = PyBytes_AsString(self._berrors) + self._canonical = canonical + def __dealloc__(self): free(self.pk.buf); @@ -131,11 +135,20 @@ cdef class Packer(object): d = o ret = msgpack_pack_map(&self.pk, len(d)) if ret == 0: - for k,v in d.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break + if self._canonical: + # Unfortunately can't use d.items() because a closure would + # be required + for k in sorted(d.keys()): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(d[k], nest_limit-1) + if ret != 0: break + else: + for k,v in d.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break elif PySequence_Check(o): ret = msgpack_pack_array(&self.pk, len(o)) if ret == 0: @@ -159,16 +172,16 @@ cdef class Packer(object): return buf -def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'): +def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict', canonical=False): """ pack an object `o` and write it to stream).""" - packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors) + packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors, canonical=canonical) stream.write(packer.pack(o)) -def packb(object o, default=None, encoding='utf-8', unicode_errors='strict'): +def packb(object o, default=None, encoding='utf-8', unicode_errors='strict', canonical=False): """ pack o and return packed bytes.""" - packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors) + packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors, canonical=canonical) return packer.pack(o) From ecdbd4573650071c224ac4696f983e24a4908372 Mon Sep 17 00:00:00 2001 From: Stephen Pascoe Date: Sun, 23 Oct 2011 22:11:13 +0100 Subject: [PATCH 2/2] Test case for canonicalise option. --- test/test_canonical.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 test/test_canonical.py diff --git a/test/test_canonical.py b/test/test_canonical.py new file mode 100644 index 00000000..d143ac06 --- /dev/null +++ b/test/test_canonical.py @@ -0,0 +1,27 @@ + +from nose.tools import * + +from msgpack import packb + + +def test_canonical(): + # This test serialises a dictionary 3 times. + # Without canonicalisation each serialisation would be different + # (on Python 2.6 anyway) + d = {'year': 1, 'month': 2, 'day': 3} + s1 = packb(d, canonical=True) + + for i in range(30): + d[i] = i + for i in range(30): + del d[i] + s2 = packb(d, canonical=True) + + d = dict(d) + s3 = packb(d, canonical=True) + + eq_(s1, s2, s3) + +if __name__ == '__main__': + from nose import main + main()