| 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | """ |
|---|
| 3 | zine.utils.net |
|---|
| 4 | ~~~~~~~~~~~~~~ |
|---|
| 5 | |
|---|
| 6 | This module implements various network related functions and among |
|---|
| 7 | others a minimal urllib implementation that supports timeouts. |
|---|
| 8 | |
|---|
| 9 | :copyright: (c) 2010 by the Zine Team, see AUTHORS for more details. |
|---|
| 10 | :license: BSD, see LICENSE for more details. |
|---|
| 11 | """ |
|---|
| 12 | from cStringIO import StringIO, InputType |
|---|
| 13 | import os |
|---|
| 14 | import urlparse |
|---|
| 15 | import socket |
|---|
| 16 | import httplib |
|---|
| 17 | |
|---|
| 18 | from werkzeug import Headers, url_decode, cached_property |
|---|
| 19 | from werkzeug.contrib.iterio import IterO |
|---|
| 20 | |
|---|
| 21 | from zine.application import Response, get_application |
|---|
| 22 | from zine.utils.exceptions import ZineException |
|---|
| 23 | |
|---|
| 24 | |
|---|
| 25 | def open_url(url, data=None, timeout=None, |
|---|
| 26 | allow_internal_requests=True, **kwargs): |
|---|
| 27 | """This function parses the URL and opens the connection. The |
|---|
| 28 | following protocols are supported: |
|---|
| 29 | |
|---|
| 30 | - `http` |
|---|
| 31 | - `https` |
|---|
| 32 | |
|---|
| 33 | Per default requests to Zine itself trigger an internal request. This |
|---|
| 34 | can be disabled by setting `allow_internal_requests` to False. |
|---|
| 35 | """ |
|---|
| 36 | app = get_application() |
|---|
| 37 | if timeout is None: |
|---|
| 38 | timeout = app.cfg['default_network_timeout'] |
|---|
| 39 | parts = urlparse.urlsplit(url) |
|---|
| 40 | if app is not None: |
|---|
| 41 | blog_url = urlparse.urlsplit(app.cfg['blog_url']) |
|---|
| 42 | if allow_internal_requests and \ |
|---|
| 43 | parts.scheme in ('http', 'https') and \ |
|---|
| 44 | blog_url.netloc == parts.netloc and \ |
|---|
| 45 | parts.path.startswith(blog_url.path): |
|---|
| 46 | path = parts.path[len(blog_url.path):].lstrip('/') |
|---|
| 47 | method = kwargs.pop('method', None) |
|---|
| 48 | if method is None: |
|---|
| 49 | method = data is not None and 'POST' or 'GET' |
|---|
| 50 | make_response = lambda *a: URLResponse(url, *a) |
|---|
| 51 | return app.perform_subrequest(path.decode('utf-8'), |
|---|
| 52 | url_decode(parts.query), |
|---|
| 53 | method, data, timeout=timeout, |
|---|
| 54 | response_wrapper=make_response, |
|---|
| 55 | **kwargs) |
|---|
| 56 | handler = _url_handlers.get(parts.scheme) |
|---|
| 57 | if handler is None: |
|---|
| 58 | raise URLError('unsupported URL schema %r' % parts.scheme) |
|---|
| 59 | if isinstance(data, basestring): |
|---|
| 60 | data = StringIO(data) |
|---|
| 61 | try: |
|---|
| 62 | obj = handler(parts, timeout, **kwargs) |
|---|
| 63 | return obj.open(data) |
|---|
| 64 | except Exception, e: |
|---|
| 65 | if not isinstance(e, NetException): |
|---|
| 66 | e = NetException('%s: %s' % (e.__class__.__name__, str(e))) |
|---|
| 67 | raise e |
|---|
| 68 | |
|---|
| 69 | |
|---|
| 70 | def create_connection(address, timeout=30): |
|---|
| 71 | """Connect to address and return the socket object.""" |
|---|
| 72 | msg = "getaddrinfo returns an empty list" |
|---|
| 73 | host, port = address |
|---|
| 74 | |
|---|
| 75 | for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): |
|---|
| 76 | af, socktype, proto, canonname, sa = res |
|---|
| 77 | sock = None |
|---|
| 78 | try: |
|---|
| 79 | sock = socket.socket(af, socktype, proto) |
|---|
| 80 | sock.settimeout(timeout) |
|---|
| 81 | sock.connect(sa) |
|---|
| 82 | return sock |
|---|
| 83 | except socket.error, msg: |
|---|
| 84 | if sock is not None: |
|---|
| 85 | sock.close() |
|---|
| 86 | |
|---|
| 87 | raise ConnectionError(msg) |
|---|
| 88 | |
|---|
| 89 | |
|---|
| 90 | def get_content_length(data_or_fp): |
|---|
| 91 | """Try to get the content length from the given string or file |
|---|
| 92 | pointer. If the length can't be determined the return value |
|---|
| 93 | is None. |
|---|
| 94 | """ |
|---|
| 95 | try: |
|---|
| 96 | return len(data_or_fp) |
|---|
| 97 | except TypeError: |
|---|
| 98 | # special-case cStringIO objects which have no fs entry |
|---|
| 99 | if isinstance(data_or_fp, InputType): |
|---|
| 100 | return len(data_or_fp.getvalue()) |
|---|
| 101 | try: |
|---|
| 102 | return os.fstat(data_or_fp.fileno()).st_size |
|---|
| 103 | except (AttributeError, OSError): |
|---|
| 104 | pass |
|---|
| 105 | |
|---|
| 106 | |
|---|
| 107 | class NetException(ZineException): |
|---|
| 108 | pass |
|---|
| 109 | |
|---|
| 110 | |
|---|
| 111 | class CannotSendRequest(NetException): |
|---|
| 112 | pass |
|---|
| 113 | |
|---|
| 114 | |
|---|
| 115 | class BadStatusLine(NetException): |
|---|
| 116 | pass |
|---|
| 117 | |
|---|
| 118 | |
|---|
| 119 | class URLError(NetException): |
|---|
| 120 | pass |
|---|
| 121 | |
|---|
| 122 | |
|---|
| 123 | class ConnectionError(NetException): |
|---|
| 124 | pass |
|---|
| 125 | |
|---|
| 126 | |
|---|
| 127 | class URLHandler(object): |
|---|
| 128 | |
|---|
| 129 | default_port = 0 |
|---|
| 130 | |
|---|
| 131 | def __init__(self, parsed_url, timeout=30): |
|---|
| 132 | self.parsed_url = parsed_url |
|---|
| 133 | self.timeout = timeout |
|---|
| 134 | self.closed = False |
|---|
| 135 | self._socket = None |
|---|
| 136 | self._buffer = [] |
|---|
| 137 | |
|---|
| 138 | @property |
|---|
| 139 | def addr(self): |
|---|
| 140 | """The address tuple.""" |
|---|
| 141 | netloc = self.parsed_url.netloc |
|---|
| 142 | if netloc.startswith('['): |
|---|
| 143 | host_end = netloc.find(']') |
|---|
| 144 | if host_end < 0: |
|---|
| 145 | raise URLError('invalid ipv6 address') |
|---|
| 146 | host = netloc[1:host_end] |
|---|
| 147 | port = netloc[host_end + 2:] |
|---|
| 148 | else: |
|---|
| 149 | pieces = netloc.split(':', 1) |
|---|
| 150 | if len(pieces) == 1: |
|---|
| 151 | host = pieces[0] |
|---|
| 152 | port = None |
|---|
| 153 | else: |
|---|
| 154 | host, port = pieces |
|---|
| 155 | if not port: |
|---|
| 156 | port = self.default_port |
|---|
| 157 | else: |
|---|
| 158 | try: |
|---|
| 159 | port = int(port) |
|---|
| 160 | except ValueError: |
|---|
| 161 | raise URLError('not a valid port number') |
|---|
| 162 | return host, port |
|---|
| 163 | |
|---|
| 164 | @property |
|---|
| 165 | def host_string(self): |
|---|
| 166 | host, port = self.addr |
|---|
| 167 | try: |
|---|
| 168 | host = host.encode('ascii') |
|---|
| 169 | except UnicodeError: |
|---|
| 170 | host = host.encode('idna') |
|---|
| 171 | if port != self.default_port: |
|---|
| 172 | host = '%s:%d' % (host, port) |
|---|
| 173 | return host |
|---|
| 174 | |
|---|
| 175 | @property |
|---|
| 176 | def host(self): |
|---|
| 177 | return self.addr[0] |
|---|
| 178 | |
|---|
| 179 | @property |
|---|
| 180 | def port(self): |
|---|
| 181 | return self.addr[1] |
|---|
| 182 | |
|---|
| 183 | @property |
|---|
| 184 | def url(self): |
|---|
| 185 | return urlparse.urlunsplit(self.parsed_url) |
|---|
| 186 | |
|---|
| 187 | @property |
|---|
| 188 | def socket(self): |
|---|
| 189 | if self._socket is None: |
|---|
| 190 | if self.closed: |
|---|
| 191 | raise TypeError('handler closed') |
|---|
| 192 | self._socket = self.connect() |
|---|
| 193 | return self._socket |
|---|
| 194 | |
|---|
| 195 | def connect(self): |
|---|
| 196 | return create_connection(self.addr, self.timeout) |
|---|
| 197 | |
|---|
| 198 | def close(self): |
|---|
| 199 | if self._socket is not None: |
|---|
| 200 | self._socket.close() |
|---|
| 201 | self._socket = None |
|---|
| 202 | self.closed = True |
|---|
| 203 | |
|---|
| 204 | def send(self, data): |
|---|
| 205 | if self._buffer: |
|---|
| 206 | self.send_buffer() |
|---|
| 207 | if data is None: |
|---|
| 208 | return |
|---|
| 209 | try: |
|---|
| 210 | if hasattr(data, 'read'): |
|---|
| 211 | while 1: |
|---|
| 212 | s = data.read(8192) |
|---|
| 213 | if not s: |
|---|
| 214 | break |
|---|
| 215 | self.socket.sendall(s) |
|---|
| 216 | else: |
|---|
| 217 | self.socket.sendall(data) |
|---|
| 218 | except socket.error, v: |
|---|
| 219 | if v[0] == 32: # Broken pipe |
|---|
| 220 | self.close() |
|---|
| 221 | raise |
|---|
| 222 | |
|---|
| 223 | def send_buffered(self, data): |
|---|
| 224 | if hasattr(data, 'read'): |
|---|
| 225 | data = data.read() |
|---|
| 226 | self._buffer.append(data) |
|---|
| 227 | |
|---|
| 228 | def send_buffer(self): |
|---|
| 229 | buffer = ''.join(self._buffer) |
|---|
| 230 | del self._buffer[:] |
|---|
| 231 | self.send(buffer) |
|---|
| 232 | |
|---|
| 233 | def open(self, data=None): |
|---|
| 234 | """Return a `URLResponse` object.""" |
|---|
| 235 | return Response() |
|---|
| 236 | |
|---|
| 237 | |
|---|
| 238 | class HTTPHandler(URLHandler): |
|---|
| 239 | """Opens HTTP connections.""" |
|---|
| 240 | default_port = 80 |
|---|
| 241 | http_version = '1.1' |
|---|
| 242 | |
|---|
| 243 | STATE_IDLE, STATE_SENDING, STATE_SENT = range(3) |
|---|
| 244 | |
|---|
| 245 | def __init__(self, parsed_url, timeout=30, method=None): |
|---|
| 246 | URLHandler.__init__(self, parsed_url, timeout) |
|---|
| 247 | self.headers = Headers() |
|---|
| 248 | self._state = self.STATE_IDLE |
|---|
| 249 | self._method = method |
|---|
| 250 | |
|---|
| 251 | @property |
|---|
| 252 | def method(self): |
|---|
| 253 | return self._method or 'GET' |
|---|
| 254 | |
|---|
| 255 | def send(self, data): |
|---|
| 256 | if self._state == self.STATE_IDLE: |
|---|
| 257 | self._state = self.STATE_SENDING |
|---|
| 258 | return URLHandler.send(self, data) |
|---|
| 259 | |
|---|
| 260 | def send_request(self, data): |
|---|
| 261 | path = self.parsed_url.path or '/' |
|---|
| 262 | if self.parsed_url.query: |
|---|
| 263 | path += '?' + self.parsed_url.query |
|---|
| 264 | self.send_buffered('%s %s HTTP/%s\r\n' % (self._method, str(path), |
|---|
| 265 | self.http_version)) |
|---|
| 266 | self.send_buffered('\r\n'.join('%s: %s' % item for item in |
|---|
| 267 | self.headers.to_list()) + '\r\n\r\n') |
|---|
| 268 | if isinstance(data, basestring): |
|---|
| 269 | self.send_buffered(data) |
|---|
| 270 | data = None |
|---|
| 271 | self.send(data) |
|---|
| 272 | self._state = self.STATE_SENT |
|---|
| 273 | |
|---|
| 274 | def open(self, data=None): |
|---|
| 275 | # if no method is set switch between GET and POST based on |
|---|
| 276 | # the data. This is for example the case if the URL was |
|---|
| 277 | # opened with open_url(). |
|---|
| 278 | if self._method is None: |
|---|
| 279 | if data is not None: |
|---|
| 280 | self._method = 'POST' |
|---|
| 281 | else: |
|---|
| 282 | self._method = 'GET' |
|---|
| 283 | |
|---|
| 284 | if self._state != self.STATE_IDLE: |
|---|
| 285 | raise CannotSendRequest() |
|---|
| 286 | |
|---|
| 287 | if self.http_version == '1.1': |
|---|
| 288 | if 'host' not in self.headers: |
|---|
| 289 | self.headers['Host'] = self.host_string |
|---|
| 290 | if 'accept-encoding' not in self.headers: |
|---|
| 291 | self.headers['Accept-Encoding'] = 'identity' |
|---|
| 292 | |
|---|
| 293 | if 'content-length' not in self.headers: |
|---|
| 294 | content_length = get_content_length(data) |
|---|
| 295 | if content_length is not None: |
|---|
| 296 | self.headers['Content-Length'] = content_length |
|---|
| 297 | |
|---|
| 298 | self.send_request(data) |
|---|
| 299 | return HTTPResponse(self) |
|---|
| 300 | |
|---|
| 301 | |
|---|
| 302 | class HTTPSHandler(HTTPHandler): |
|---|
| 303 | """Opens HTTPS connections.""" |
|---|
| 304 | default_port = 443 |
|---|
| 305 | |
|---|
| 306 | def __init__(self, parsed_url, timeout=30, |
|---|
| 307 | default_method=None, key_file=None, |
|---|
| 308 | cert_file=None): |
|---|
| 309 | HTTPHandler.__init__(self, parsed_url, timeout, default_method) |
|---|
| 310 | self.key_file = key_file |
|---|
| 311 | self.cert_file = cert_file |
|---|
| 312 | |
|---|
| 313 | def connect(self): |
|---|
| 314 | try: |
|---|
| 315 | # 2.6 and higher |
|---|
| 316 | from ssl import wrap_socket |
|---|
| 317 | except ImportError: |
|---|
| 318 | # 2.4 and 2.5 |
|---|
| 319 | from httplib import FakeSocket |
|---|
| 320 | def wrap_socket(sock, key, cert): |
|---|
| 321 | ssl = socket.ssl(sock, key, cert) |
|---|
| 322 | return FakeSocket(sock, ssl) |
|---|
| 323 | return wrap_socket(HTTPHandler.connect(self), |
|---|
| 324 | self.key_file, self.cert_file) |
|---|
| 325 | |
|---|
| 326 | |
|---|
| 327 | class URLResponse(Response): |
|---|
| 328 | |
|---|
| 329 | def __init__(self, url, body, status=200, headers=None): |
|---|
| 330 | Response.__init__(self, body, status, headers) |
|---|
| 331 | self.url = url |
|---|
| 332 | |
|---|
| 333 | @cached_property |
|---|
| 334 | def stream(self): |
|---|
| 335 | return IterO(self.response) |
|---|
| 336 | |
|---|
| 337 | |
|---|
| 338 | class HTTPResponse(URLResponse): |
|---|
| 339 | |
|---|
| 340 | def __init__(self, http_handler): |
|---|
| 341 | self._socket = http_handler.socket |
|---|
| 342 | resp = httplib.HTTPResponse(self._socket, |
|---|
| 343 | method=http_handler._method) |
|---|
| 344 | resp.begin() |
|---|
| 345 | headers = resp.getheaders() |
|---|
| 346 | def make_iterable(): |
|---|
| 347 | while 1: |
|---|
| 348 | data = resp.read(8092) |
|---|
| 349 | if not data: |
|---|
| 350 | break |
|---|
| 351 | yield data |
|---|
| 352 | URLResponse.__init__(self, http_handler.url, make_iterable(), |
|---|
| 353 | resp.status, headers) |
|---|
| 354 | self._httplib_resp = resp |
|---|
| 355 | |
|---|
| 356 | def close(self): |
|---|
| 357 | Response.close(self) |
|---|
| 358 | if self._socket is not None: |
|---|
| 359 | self._socket.close() |
|---|
| 360 | self._socket = None |
|---|
| 361 | if self._httplib_resp is not None: |
|---|
| 362 | self._httplib_resp.close() |
|---|
| 363 | self._httplib_resp = None |
|---|
| 364 | |
|---|
| 365 | |
|---|
| 366 | _url_handlers = { |
|---|
| 367 | 'http': HTTPHandler, |
|---|
| 368 | 'https': HTTPSHandler |
|---|
| 369 | } |
|---|