3 years ago
Update to 1.1
| phpserialize.py | file | annotate | diff | revisions | |
| setup.py | file | annotate | diff | revisions |
1.1 --- a/phpserialize.py Sat Jun 14 19:19:48 2008 +0200 1.2 +++ b/phpserialize.py Sat Jun 14 20:54:44 2008 +0200 1.3 @@ -1,30 +1,122 @@ 1.4 # -*- coding: utf-8 -*- 1.5 -""" 1.6 - PHP Serialize / Unserialize 1.7 - =========================== 1.8 +r""" 1.9 + phpserialize 1.10 + ~~~~~~~~~~~~ 1.11 1.12 a port of the ``serialize`` and ``unserialize`` functions of 1.13 - php to python. 1.14 + php to python. This module implements the python serialization 1.15 + interface (eg: provides `dumps`, `loads` and similar functions). 1.16 1.17 + Usage 1.18 + ===== 1.19 + 1.20 + >>> from phpserialize import * 1.21 + >>> obj = dumps("Hello World") 1.22 + >>> loads(obj) 1.23 + 'Hello World' 1.24 + 1.25 + Due to the fact that PHP doesn't know the concept of lists, lists 1.26 + are serialized like hash-maps in PHP. As a matter of fact the 1.27 + reverse value of a serialized list is a dict: 1.28 + 1.29 + >>> loads(dumps(range(2))) 1.30 + {0: 0, 1: 1} 1.31 + 1.32 + If you want to have a list again, you can use the `dict_to_list` 1.33 + helper function: 1.34 + 1.35 + >>> dict_to_list(loads(dumps(range(2)))) 1.36 + [0, 1] 1.37 + 1.38 + It's also possible to convert into a tuple by using the `dict_to_tuple` 1.39 + function: 1.40 + 1.41 + >>> dict_to_tuple(loads(dumps((1, 2, 3)))) 1.42 + (1, 2, 3) 1.43 + 1.44 + Another problem are unicode strings. By default unicode strings are 1.45 + encoded to 'utf-8' but not decoded on `unserialize`. The reason for 1.46 + this is that phpserialize can't guess if you have binary or text data 1.47 + in the strings: 1.48 + 1.49 + >>> loads(dumps(u'Hello W\xf6rld')) 1.50 + 'Hello W\xc3\xb6rld' 1.51 + 1.52 + If you know that you have only text data of a known charset in the result 1.53 + you can decode strings by setting `decode_strings` to True when calling 1.54 + loads: 1.55 + 1.56 + >>> loads(dumps(u'Hello W\xf6rld'), decode_strings=True) 1.57 + u'Hello W\xf6rld' 1.58 + 1.59 + Dictionary keys are limited to strings and integers. `None` is converted 1.60 + into an empty string and floats and booleans into integers for PHP 1.61 + compatibility: 1.62 + 1.63 + >>> loads(dumps({None: 14, 42.23: 'foo', True: [1, 2, 3]})) 1.64 + {'': 14, 1: {0: 1, 1: 2, 2: 3}, 42: 'foo'} 1.65 + 1.66 + It also provides functions to read from file-like objects: 1.67 + 1.68 + >>> from StringIO import StringIO 1.69 + >>> stream = StringIO('a:2:{i:0;i:1;i:1;i:2;}') 1.70 + >>> dict_to_list(load(stream)) 1.71 + [1, 2] 1.72 + 1.73 + And to write to those: 1.74 + 1.75 + >>> stream = StringIO() 1.76 + >>> dump([1, 2], stream) 1.77 + >>> stream.getvalue() 1.78 + 'a:2:{i:0;i:1;i:1;i:2;}' 1.79 + 1.80 + Like `pickle` chaining of objects is supported: 1.81 + 1.82 + >>> stream = StringIO() 1.83 + >>> dump([1, 2], stream) 1.84 + >>> dump("foo", stream) 1.85 + >>> stream.seek(0) 1.86 + >>> load(stream) 1.87 + {0: 1, 1: 2} 1.88 + >>> load(stream) 1.89 + 'foo' 1.90 + 1.91 + This feature however is not supported in PHP. PHP will only unserialize 1.92 + the first object. 1.93 + 1.94 + CHANGELOG 1.95 + ========= 1.96 + 1.97 + 1.1 1.98 + - added `dict_to_list` and `dict_to_tuple` 1.99 + - added support for unicode 1.100 + - allowed chaining of objects like pickle does. 1.101 + 1.102 + 1.103 + :copyright: 2007-2008 by Armin Ronacher. 1.104 license: BSD 1.105 """ 1.106 +from StringIO import StringIO 1.107 + 1.108 __author__ = 'Armin Ronacher <armin.ronacher@active-4.com>' 1.109 -__version__ = '1.0' 1.110 +__version__ = '1.1' 1.111 1.112 1.113 -def serialize(data): 1.114 - """ 1.115 - PHP serializes an object 1.116 +def dumps(data, charset='utf-8', errors='strict'): 1.117 + """Return the PHP-serialized representation of the object as a string, 1.118 + instead of writing it to a file like `dump` does. 1.119 """ 1.120 def _serialize(obj, keypos): 1.121 if keypos: 1.122 if isinstance(obj, (int, long, float, bool)): 1.123 return 'i:%i;' % obj 1.124 if isinstance(obj, basestring): 1.125 + if isinstance(obj, unicode): 1.126 + obj = obj.encode(charset, errors) 1.127 return 's:%i:"%s";' % (len(obj), obj) 1.128 if obj is None: 1.129 return 's:0:"";' 1.130 - raise ValueError() 1.131 + raise TypeError('can\'t serialize %r as key' % type(obj)) 1.132 else: 1.133 if obj is None: 1.134 return 'N;' 1.135 @@ -35,6 +127,8 @@ 1.136 if isinstance(obj, float): 1.137 return 'd:%s;' % obj 1.138 if isinstance(obj, basestring): 1.139 + if isinstance(obj, unicode): 1.140 + obj = obj.encode(charset, errors) 1.141 return 's:%i:"%s";' % (len(obj), obj) 1.142 if isinstance(obj, (list, tuple, dict)): 1.143 out = [] 1.144 @@ -46,70 +140,118 @@ 1.145 out.append(_serialize(key, True)) 1.146 out.append(_serialize(value, False)) 1.147 return 'a:%i:{%s}' % (len(obj), ''.join(out)) 1.148 - raise ValueError() 1.149 + raise TypeError('can\'t serialize %r' % type(obj)) 1.150 return _serialize(data, False) 1.151 1.152 1.153 -def unserialize(data): 1.154 +def load(fp, charset='utf-8', errors='strict', decode_strings=False): 1.155 + """Read a string from the open file object `fp` and interpret it as a 1.156 + data stream of PHP-serialized objects, reconstructing and returning 1.157 + the original object hierarchy. 1.158 + 1.159 + `fp` must provide a `read()` method that takes an integer argument. Both 1.160 + method should return strings. Thus `fp` can be a file object opened for 1.161 + reading, a `StringIO` object, or any other custom object that meets this 1.162 + interface. 1.163 + 1.164 + `load` will read exactly one object from the stream. See the docstring of 1.165 + the module for this chained behavior. 1.166 """ 1.167 - Loads a php serialized string 1.168 - """ 1.169 - def _unserialize(s, start): 1.170 - type_ = s[start].lower() 1.171 - end = s.find(':', start + 3) 1.172 + def _expect(e): 1.173 + v = fp.read(len(e)) 1.174 + if v != e: 1.175 + raise ValueError('failed expectation, expected %r got %r' % (e, v)) 1.176 + 1.177 + def _read_until(delim): 1.178 + buf = [] 1.179 + while 1: 1.180 + char = fp.read(1) 1.181 + if char == delim: 1.182 + break 1.183 + elif not char: 1.184 + raise ValueError('unexpected end of stream') 1.185 + buf.append(char) 1.186 + return ''.join(buf) 1.187 + 1.188 + def _unserialize(): 1.189 + type_ = fp.read(1).lower() 1.190 if type_ == 'n': 1.191 - return None, start + 1 1.192 + _expect(';') 1.193 + return None 1.194 if type_ in 'idb': 1.195 - pos = start + 2 1.196 - buf = [] 1.197 - while True: 1.198 - char = s[pos] 1.199 - if char != ';': 1.200 - buf.append(char) 1.201 + _expect(':') 1.202 + data = _read_until(';') 1.203 + if type_ == 'i': 1.204 + return int(data) 1.205 + if type_ == 'd': 1.206 + return float(data) 1.207 + return int(data) != 0 1.208 + if type_ == 's': 1.209 + _expect(':') 1.210 + length = int(_read_until(':')) 1.211 + _expect('"') 1.212 + data = fp.read(length) 1.213 + _expect('"') 1.214 + if decode_strings: 1.215 + data = data.decode(charset, errors) 1.216 + _expect(';') 1.217 + return data 1.218 + if type_ == 'a': 1.219 + _expect(':') 1.220 + items = int(_read_until(':')) * 2 1.221 + _expect('{') 1.222 + result = {} 1.223 + last_item = Ellipsis 1.224 + for idx in xrange(items): 1.225 + item = _unserialize() 1.226 + if last_item is Ellipsis: 1.227 + last_item = item 1.228 else: 1.229 - if type_ == 'i': 1.230 - rv = int(''.join(buf)) 1.231 - elif type_ == 'd': 1.232 - rv = float(''.join(buf)) 1.233 - else: 1.234 - rv = int(''.join(buf)) != 0 1.235 - return rv, pos 1.236 - pos += 1 1.237 - if type_ == 's': 1.238 - pos = end + 2 1.239 - end = pos + int(s[start + 2:end]) 1.240 - data = s[pos:end] 1.241 - return data, end + 1 1.242 - if type_ == 'a': 1.243 - i = 0 1.244 - result = {} 1.245 - pos = end + 2 1.246 - data = s 1.247 - last_item = Ellipsis 1.248 - first_length = int(s[start + 2:end]) 1.249 - while i < first_length * 2: 1.250 - item, pos = _unserialize(data, pos) 1.251 - if not last_item is Ellipsis: 1.252 result[last_item] = item 1.253 last_item = Ellipsis 1.254 - else: 1.255 - last_item = item 1.256 - i += 1 1.257 - pos += 1 1.258 - return result, pos 1.259 - raise ValueError() 1.260 - return _unserialize(data, 0)[0] 1.261 + _expect('}') 1.262 + return result 1.263 + raise ValueError('unexpected opcode') 1.264 1.265 + return _unserialize() 1.266 1.267 -# generic python accessing functions 1.268 1.269 -def dump(obj, fp): 1.270 - data = serialize(obj) 1.271 - fp.write(data) 1.272 +def loads(data, charset='utf-8', errors='strict', decode_strings=False): 1.273 + """Read a PHP-serialized object hierarchy from a string. Characters in the 1.274 + string past the object's representation are ignored. 1.275 + """ 1.276 + return load(StringIO(data), charset, errors, decode_strings) 1.277 1.278 -def load(fp): 1.279 - data = fp.read() 1.280 - return unserialize(data) 1.281 1.282 -dumps = serialize 1.283 -loads = unserialize 1.284 +def dump(data, fp, charset='utf-8', errors='strict'): 1.285 + """Write a PHP-serialized representation of obj to the open file object 1.286 + `fp`. Unicode strings are encoded to `charset` with the error handling 1.287 + of `errors`. 1.288 + 1.289 + `fp` must have a `write()` method that accepts a single string argument. 1.290 + It can thus be a file object opened for writing, a `StringIO` object, or 1.291 + any other custom object that meets this interface. 1.292 + """ 1.293 + fp.write(dumps(data, charset, errors)) 1.294 + 1.295 + 1.296 +def dict_to_list(d): 1.297 + """Converts an ordered dict into a list.""" 1.298 + try: 1.299 + return [d[x] for x in xrange(len(d))] 1.300 + except KeyError: 1.301 + raise ValueError('dict is not a sequence') 1.302 + 1.303 + 1.304 +def dict_to_tuple(d): 1.305 + """Converts an ordered dict into a tuple.""" 1.306 + return tuple(dict_to_list(d)) 1.307 + 1.308 + 1.309 +serialize = dumps 1.310 +unserialize = loads 1.311 + 1.312 + 1.313 +if __name__ == '__main__': 1.314 + import doctest 1.315 + doctest.testmod()
2.1 --- a/setup.py Sat Jun 14 19:19:48 2008 +0200 2.2 +++ b/setup.py Sat Jun 14 20:54:44 2008 +0200 2.3 @@ -3,8 +3,8 @@ 2.4 name='phpserialize', 2.5 author='Armin Ronacher', 2.6 author_email='armin.ronacher@active-4.com', 2.7 - version='1.0', 2.8 - url='http://trac.pocoo.org/repos/sandbox/phpserialize', 2.9 + version='1.1', 2.10 + url='http://dev.pocoo.org/hg/phpserialize-main', 2.11 py_modules=['phpserialize'], 2.12 description='a port of the serialize and unserialize ' 2.13 'functions of php to python.',