Update to 1.1

3 years ago

author
mitsuhiko
date
Sat Jun 14 20:54:44 2008 +0200
changeset 1
d9eb4efa4e5d
parent 0
29c66871f42d
child 2
c52766fbeb5e

Update to 1.1

phpserialize.pyfile | annotate | diff | revisions
setup.pyfile | annotate | diff | revisions
     1.1 --- a/phpserialize.py	Sat Jun 14 19:19:48 2008 +0200
     1.2 +++ b/phpserialize.py	Sat Jun 14 20:54:44 2008 +0200
     1.3 @@ -1,30 +1,122 @@
     1.4  # -*- coding: utf-8 -*-
     1.5 -"""
     1.6 -    PHP Serialize / Unserialize
     1.7 -    ===========================
     1.8 +r"""
     1.9 +    phpserialize
    1.10 +    ~~~~~~~~~~~~
    1.11  
    1.12      a port of the ``serialize`` and ``unserialize`` functions of
    1.13 -    php to python.
    1.14 +    php to python.  This module implements the python serialization
    1.15 +    interface (eg: provides `dumps`, `loads` and similar functions).
    1.16  
    1.17 +    Usage
    1.18 +    =====
    1.19 +
    1.20 +    >>> from phpserialize import *
    1.21 +    >>> obj = dumps("Hello World")
    1.22 +    >>> loads(obj)
    1.23 +    'Hello World'
    1.24 +
    1.25 +    Due to the fact that PHP doesn't know the concept of lists, lists
    1.26 +    are serialized like hash-maps in PHP.  As a matter of fact the
    1.27 +    reverse value of a serialized list is a dict:
    1.28 +
    1.29 +    >>> loads(dumps(range(2)))
    1.30 +    {0: 0, 1: 1}
    1.31 +
    1.32 +    If you want to have a list again, you can use the `dict_to_list`
    1.33 +    helper function:
    1.34 +
    1.35 +    >>> dict_to_list(loads(dumps(range(2))))
    1.36 +    [0, 1]
    1.37 +
    1.38 +    It's also possible to convert into a tuple by using the `dict_to_tuple`
    1.39 +    function:
    1.40 +
    1.41 +    >>> dict_to_tuple(loads(dumps((1, 2, 3))))
    1.42 +    (1, 2, 3)
    1.43 +
    1.44 +    Another problem are unicode strings.  By default unicode strings are
    1.45 +    encoded to 'utf-8' but not decoded on `unserialize`.  The reason for
    1.46 +    this is that phpserialize can't guess if you have binary or text data
    1.47 +    in the strings:
    1.48 +
    1.49 +    >>> loads(dumps(u'Hello W\xf6rld'))
    1.50 +    'Hello W\xc3\xb6rld'
    1.51 +
    1.52 +    If you know that you have only text data of a known charset in the result
    1.53 +    you can decode strings by setting `decode_strings` to True when calling
    1.54 +    loads:
    1.55 +
    1.56 +    >>> loads(dumps(u'Hello W\xf6rld'), decode_strings=True)
    1.57 +    u'Hello W\xf6rld'
    1.58 +
    1.59 +    Dictionary keys are limited to strings and integers.  `None` is converted
    1.60 +    into an empty string and floats and booleans into integers for PHP
    1.61 +    compatibility:
    1.62 +
    1.63 +    >>> loads(dumps({None: 14, 42.23: 'foo', True: [1, 2, 3]}))
    1.64 +    {'': 14, 1: {0: 1, 1: 2, 2: 3}, 42: 'foo'}
    1.65 +
    1.66 +    It also provides functions to read from file-like objects:
    1.67 +
    1.68 +    >>> from StringIO import StringIO
    1.69 +    >>> stream = StringIO('a:2:{i:0;i:1;i:1;i:2;}')
    1.70 +    >>> dict_to_list(load(stream))
    1.71 +    [1, 2]
    1.72 +
    1.73 +    And to write to those:
    1.74 +
    1.75 +    >>> stream = StringIO()
    1.76 +    >>> dump([1, 2], stream)
    1.77 +    >>> stream.getvalue()
    1.78 +    'a:2:{i:0;i:1;i:1;i:2;}'
    1.79 +
    1.80 +    Like `pickle` chaining of objects is supported:
    1.81 +
    1.82 +    >>> stream = StringIO()
    1.83 +    >>> dump([1, 2], stream)
    1.84 +    >>> dump("foo", stream)
    1.85 +    >>> stream.seek(0)
    1.86 +    >>> load(stream)
    1.87 +    {0: 1, 1: 2}
    1.88 +    >>> load(stream)
    1.89 +    'foo'
    1.90 +
    1.91 +    This feature however is not supported in PHP.  PHP will only unserialize
    1.92 +    the first object.
    1.93 +
    1.94 +    CHANGELOG
    1.95 +    =========
    1.96 +
    1.97 +    1.1
    1.98 +        -   added `dict_to_list` and `dict_to_tuple`
    1.99 +        -   added support for unicode
   1.100 +        -   allowed chaining of objects like pickle does.
   1.101 +
   1.102 +
   1.103 +    :copyright: 2007-2008 by Armin Ronacher.
   1.104      license: BSD
   1.105  """
   1.106 +from StringIO import StringIO
   1.107 +
   1.108  __author__ = 'Armin Ronacher <armin.ronacher@active-4.com>'
   1.109 -__version__ = '1.0'
   1.110 +__version__ = '1.1'
   1.111  
   1.112  
   1.113 -def serialize(data):
   1.114 -    """
   1.115 -    PHP serializes an object
   1.116 +def dumps(data, charset='utf-8', errors='strict'):
   1.117 +    """Return the PHP-serialized representation of the object as a string,
   1.118 +    instead of writing it to a file like `dump` does.
   1.119      """
   1.120      def _serialize(obj, keypos):
   1.121          if keypos:
   1.122              if isinstance(obj, (int, long, float, bool)):
   1.123                  return 'i:%i;' % obj
   1.124              if isinstance(obj, basestring):
   1.125 +                if isinstance(obj, unicode):
   1.126 +                    obj = obj.encode(charset, errors)
   1.127                  return 's:%i:"%s";' % (len(obj), obj)
   1.128              if obj is None:
   1.129                  return 's:0:"";'
   1.130 -            raise ValueError()
   1.131 +            raise TypeError('can\'t serialize %r as key' % type(obj))
   1.132          else:
   1.133              if obj is None:
   1.134                  return 'N;'
   1.135 @@ -35,6 +127,8 @@
   1.136              if isinstance(obj, float):
   1.137                  return 'd:%s;' % obj
   1.138              if isinstance(obj, basestring):
   1.139 +                if isinstance(obj, unicode):
   1.140 +                    obj = obj.encode(charset, errors)
   1.141                  return 's:%i:"%s";' % (len(obj), obj)
   1.142              if isinstance(obj, (list, tuple, dict)):
   1.143                  out = []
   1.144 @@ -46,70 +140,118 @@
   1.145                      out.append(_serialize(key, True))
   1.146                      out.append(_serialize(value, False))
   1.147                  return 'a:%i:{%s}' % (len(obj), ''.join(out))
   1.148 -            raise ValueError()
   1.149 +            raise TypeError('can\'t serialize %r' % type(obj))
   1.150      return _serialize(data, False)
   1.151  
   1.152  
   1.153 -def unserialize(data):
   1.154 +def load(fp, charset='utf-8', errors='strict', decode_strings=False):
   1.155 +    """Read a string from the open file object `fp` and interpret it as a
   1.156 +    data stream of PHP-serialized objects, reconstructing and returning
   1.157 +    the original object hierarchy.
   1.158 +
   1.159 +    `fp` must provide a `read()` method that takes an integer argument.  Both
   1.160 +    method should return strings.  Thus `fp` can be a file object opened for
   1.161 +    reading, a `StringIO` object, or any other custom object that meets this
   1.162 +    interface.
   1.163 +
   1.164 +    `load` will read exactly one object from the stream.  See the docstring of
   1.165 +    the module for this chained behavior.
   1.166      """
   1.167 -    Loads a php serialized string
   1.168 -    """
   1.169 -    def _unserialize(s, start):
   1.170 -        type_ = s[start].lower()
   1.171 -        end = s.find(':', start + 3)
   1.172 +    def _expect(e):
   1.173 +        v = fp.read(len(e))
   1.174 +        if v != e:
   1.175 +            raise ValueError('failed expectation, expected %r got %r' % (e, v))
   1.176 +
   1.177 +    def _read_until(delim):
   1.178 +        buf = []
   1.179 +        while 1:
   1.180 +            char = fp.read(1)
   1.181 +            if char == delim:
   1.182 +                break
   1.183 +            elif not char:
   1.184 +                raise ValueError('unexpected end of stream')
   1.185 +            buf.append(char)
   1.186 +        return ''.join(buf)
   1.187 +
   1.188 +    def _unserialize():
   1.189 +        type_ = fp.read(1).lower()
   1.190          if type_ == 'n':
   1.191 -            return None, start + 1
   1.192 +            _expect(';')
   1.193 +            return None
   1.194          if type_ in 'idb':
   1.195 -            pos = start + 2
   1.196 -            buf = []
   1.197 -            while True:
   1.198 -                char = s[pos]
   1.199 -                if char != ';':
   1.200 -                    buf.append(char)
   1.201 +            _expect(':')
   1.202 +            data = _read_until(';')
   1.203 +            if type_ == 'i':
   1.204 +                return int(data)
   1.205 +            if type_ == 'd':
   1.206 +                return float(data)
   1.207 +            return int(data) != 0
   1.208 +        if type_ == 's':
   1.209 +            _expect(':')
   1.210 +            length = int(_read_until(':'))
   1.211 +            _expect('"')
   1.212 +            data = fp.read(length)
   1.213 +            _expect('"')
   1.214 +            if decode_strings:
   1.215 +                data = data.decode(charset, errors)
   1.216 +            _expect(';')
   1.217 +            return data
   1.218 +        if type_ == 'a':
   1.219 +            _expect(':')
   1.220 +            items = int(_read_until(':')) * 2
   1.221 +            _expect('{')
   1.222 +            result = {}
   1.223 +            last_item = Ellipsis
   1.224 +            for idx in xrange(items):
   1.225 +                item = _unserialize()
   1.226 +                if last_item is Ellipsis:
   1.227 +                    last_item = item
   1.228                  else:
   1.229 -                    if type_ == 'i':
   1.230 -                        rv = int(''.join(buf))
   1.231 -                    elif type_ == 'd':
   1.232 -                        rv = float(''.join(buf))
   1.233 -                    else:
   1.234 -                        rv = int(''.join(buf)) != 0
   1.235 -                    return rv, pos
   1.236 -                pos += 1
   1.237 -        if type_ == 's':
   1.238 -            pos = end + 2
   1.239 -            end = pos + int(s[start + 2:end])
   1.240 -            data = s[pos:end]
   1.241 -            return data, end + 1
   1.242 -        if type_ == 'a':
   1.243 -            i = 0
   1.244 -            result = {}
   1.245 -            pos = end + 2
   1.246 -            data = s
   1.247 -            last_item = Ellipsis
   1.248 -            first_length = int(s[start + 2:end])
   1.249 -            while i < first_length * 2:
   1.250 -                item, pos = _unserialize(data, pos)
   1.251 -                if not last_item is Ellipsis:
   1.252                      result[last_item] = item
   1.253                      last_item = Ellipsis
   1.254 -                else:
   1.255 -                    last_item = item
   1.256 -                i += 1
   1.257 -                pos += 1
   1.258 -            return result, pos
   1.259 -        raise ValueError()
   1.260 -    return _unserialize(data, 0)[0]
   1.261 +            _expect('}')
   1.262 +            return result
   1.263 +        raise ValueError('unexpected opcode')
   1.264  
   1.265 +    return _unserialize()
   1.266  
   1.267 -# generic python accessing functions
   1.268  
   1.269 -def dump(obj, fp):
   1.270 -    data = serialize(obj)
   1.271 -    fp.write(data)
   1.272 +def loads(data, charset='utf-8', errors='strict', decode_strings=False):
   1.273 +    """Read a PHP-serialized object hierarchy from a string.  Characters in the
   1.274 +    string past the object's representation are ignored.
   1.275 +    """
   1.276 +    return load(StringIO(data), charset, errors, decode_strings)
   1.277  
   1.278 -def load(fp):
   1.279 -    data = fp.read()
   1.280 -    return unserialize(data)
   1.281  
   1.282 -dumps = serialize
   1.283 -loads = unserialize
   1.284 +def dump(data, fp, charset='utf-8', errors='strict'):
   1.285 +    """Write a PHP-serialized representation of obj to the open file object
   1.286 +    `fp`.  Unicode strings are encoded to `charset` with the error handling
   1.287 +    of `errors`.
   1.288 +
   1.289 +    `fp` must have a `write()` method that accepts a single string argument.
   1.290 +    It can thus be a file object opened for writing, a `StringIO` object, or
   1.291 +    any other custom object that meets this interface.
   1.292 +    """
   1.293 +    fp.write(dumps(data, charset, errors))
   1.294 +
   1.295 +
   1.296 +def dict_to_list(d):
   1.297 +    """Converts an ordered dict into a list."""
   1.298 +    try:
   1.299 +        return [d[x] for x in xrange(len(d))]
   1.300 +    except KeyError:
   1.301 +        raise ValueError('dict is not a sequence')
   1.302 +
   1.303 +
   1.304 +def dict_to_tuple(d):
   1.305 +    """Converts an ordered dict into a tuple."""
   1.306 +    return tuple(dict_to_list(d))
   1.307 +
   1.308 +
   1.309 +serialize = dumps
   1.310 +unserialize = loads
   1.311 +
   1.312 +
   1.313 +if __name__ == '__main__':
   1.314 +    import doctest
   1.315 +    doctest.testmod()
     2.1 --- a/setup.py	Sat Jun 14 19:19:48 2008 +0200
     2.2 +++ b/setup.py	Sat Jun 14 20:54:44 2008 +0200
     2.3 @@ -3,8 +3,8 @@
     2.4      name='phpserialize',
     2.5      author='Armin Ronacher',
     2.6      author_email='armin.ronacher@active-4.com',
     2.7 -    version='1.0',
     2.8 -    url='http://trac.pocoo.org/repos/sandbox/phpserialize',
     2.9 +    version='1.1',
    2.10 +    url='http://dev.pocoo.org/hg/phpserialize-main',
    2.11      py_modules=['phpserialize'],
    2.12      description='a port of the serialize and unserialize '
    2.13                  'functions of php to python.',

mercurial