| 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | """ |
|---|
| 3 | zine.pingback |
|---|
| 4 | ~~~~~~~~~~~~~ |
|---|
| 5 | |
|---|
| 6 | This module implements the pingback API and a function to emit pingbacks |
|---|
| 7 | to different blogs. The implementation here is a `Pingback 1.0`_ |
|---|
| 8 | implementation, compatible to the Pingback specification by Ian Hickson. |
|---|
| 9 | |
|---|
| 10 | .. _Pingback 1.0: http://www.hixie.ch/specs/pingback/pingback-1.0 |
|---|
| 11 | |
|---|
| 12 | Note that pingback support is implemented in the `Zine` core and |
|---|
| 13 | can't be removed. You can however disable it in the configuration if |
|---|
| 14 | you want. Plugins can hook into the pingback system by registering |
|---|
| 15 | a callback for an URL endpoint using `app.add_pingback_endpoint` during |
|---|
| 16 | the application setup. |
|---|
| 17 | |
|---|
| 18 | Important |
|---|
| 19 | ========= |
|---|
| 20 | |
|---|
| 21 | Due to a broken design for trackback we will *never* support trackbacks |
|---|
| 22 | in the `Zine` core. Neither do we handle incoming trackbacks, nor |
|---|
| 23 | do we emit trackbacks. |
|---|
| 24 | |
|---|
| 25 | |
|---|
| 26 | :copyright: (c) 2010 by the Zine Team, see AUTHORS for more details. |
|---|
| 27 | :license: BSD, see LICENSE for more details. |
|---|
| 28 | """ |
|---|
| 29 | import re |
|---|
| 30 | from xmlrpclib import ServerProxy |
|---|
| 31 | |
|---|
| 32 | from werkzeug.routing import RequestRedirect, NotFound |
|---|
| 33 | from werkzeug import unescape |
|---|
| 34 | |
|---|
| 35 | from zine.api import get_request, get_application, url_for, db, _ |
|---|
| 36 | from zine.models import Post, Comment |
|---|
| 37 | from zine.utils.exceptions import UserException |
|---|
| 38 | from zine.utils.xml import XMLRPC, Fault, strip_tags |
|---|
| 39 | from zine.utils.net import open_url, NetException |
|---|
| 40 | |
|---|
| 41 | |
|---|
| 42 | _title_re = re.compile(r'<title>(.*?)</title>(?i)') |
|---|
| 43 | _pingback_re = re.compile(r'<link rel="pingback" href="([^"]+)" ?/?>(?i)') |
|---|
| 44 | _chunk_re = re.compile(r'\n\n|<(?:p|div|h\d)[^>]*>') |
|---|
| 45 | |
|---|
| 46 | |
|---|
| 47 | class PingbackError(UserException): |
|---|
| 48 | """Raised if the remote server caused an exception while pingbacking. |
|---|
| 49 | This is not raised if the pingback function is unable to locate a |
|---|
| 50 | remote server. |
|---|
| 51 | """ |
|---|
| 52 | |
|---|
| 53 | _ = lambda x: x |
|---|
| 54 | default_messages = { |
|---|
| 55 | 16: _(u'source URL does not exist'), |
|---|
| 56 | 17: _(u'The source URL does not contain a link to the target URL'), |
|---|
| 57 | 32: _(u'The specified target URL does not exist'), |
|---|
| 58 | 33: _(u'The specified target URL cannot be used as a target'), |
|---|
| 59 | 48: _(u'The pingback has already been registered'), |
|---|
| 60 | 49: _(u'Access Denied') |
|---|
| 61 | } |
|---|
| 62 | del _ |
|---|
| 63 | |
|---|
| 64 | def __init__(self, fault_code, internal_message=None): |
|---|
| 65 | UserException.__init__(self) |
|---|
| 66 | self.fault_code = fault_code |
|---|
| 67 | self._internal_message = internal_message |
|---|
| 68 | |
|---|
| 69 | def as_fault(self): |
|---|
| 70 | """Return the pingback errors XMLRPC fault.""" |
|---|
| 71 | return Fault(self.fault_code, self.internal_message or |
|---|
| 72 | 'unknown server error') |
|---|
| 73 | |
|---|
| 74 | @property |
|---|
| 75 | def ignore_silently(self): |
|---|
| 76 | """If the error can be ignored silently.""" |
|---|
| 77 | return self.fault_code in (17, 33, 48, 49) |
|---|
| 78 | |
|---|
| 79 | @property |
|---|
| 80 | def means_missing(self): |
|---|
| 81 | """If the error means that the resource is missing or not |
|---|
| 82 | accepting pingbacks. |
|---|
| 83 | """ |
|---|
| 84 | return self.fault_code in (32, 33) |
|---|
| 85 | |
|---|
| 86 | @property |
|---|
| 87 | def internal_message(self): |
|---|
| 88 | if self._internal_message is not None: |
|---|
| 89 | return self._internal_message |
|---|
| 90 | return self.default_messages.get(self.fault_code) or 'server error' |
|---|
| 91 | |
|---|
| 92 | @property |
|---|
| 93 | def message(self): |
|---|
| 94 | msg = self.default_messages.get(self.fault_code) |
|---|
| 95 | if msg is not None: |
|---|
| 96 | return _(msg) |
|---|
| 97 | return _(u'An unknown server error (%s) occurred') % self.fault_code |
|---|
| 98 | |
|---|
| 99 | |
|---|
| 100 | def pingback(source_uri, target_uri): |
|---|
| 101 | """Try to notify the server behind `target_uri` that `source_uri` |
|---|
| 102 | points to `target_uri`. If that fails an `PingbackError` is raised. |
|---|
| 103 | """ |
|---|
| 104 | try: |
|---|
| 105 | response = open_url(target_uri) |
|---|
| 106 | except: |
|---|
| 107 | raise PingbackError(32) |
|---|
| 108 | |
|---|
| 109 | try: |
|---|
| 110 | pingback_uri = response.headers['X-Pingback'] |
|---|
| 111 | except KeyError: |
|---|
| 112 | match = _pingback_re.search(response.data) |
|---|
| 113 | if match is None: |
|---|
| 114 | raise PingbackError(33) |
|---|
| 115 | pingback_uri = unescape(match.group(1)) |
|---|
| 116 | |
|---|
| 117 | rpc = ServerProxy(pingback_uri) |
|---|
| 118 | try: |
|---|
| 119 | return rpc.pingback.ping(source_uri, target_uri) |
|---|
| 120 | except Fault, e: |
|---|
| 121 | raise PingbackError(e.faultCode) |
|---|
| 122 | except: |
|---|
| 123 | raise PingbackError(32) |
|---|
| 124 | |
|---|
| 125 | |
|---|
| 126 | def handle_pingback_request(source_uri, target_uri): |
|---|
| 127 | """This method is exported via XMLRPC as `pingback.ping` by the |
|---|
| 128 | pingback API. |
|---|
| 129 | """ |
|---|
| 130 | app = get_application() |
|---|
| 131 | |
|---|
| 132 | # next we check if the source URL does indeed exist |
|---|
| 133 | try: |
|---|
| 134 | response = open_url(source_uri) |
|---|
| 135 | except NetException: |
|---|
| 136 | raise Fault(16, 'The source URL does not exist.') |
|---|
| 137 | |
|---|
| 138 | # we only accept pingbacks for links below our blog URL |
|---|
| 139 | blog_url = app.cfg['blog_url'] |
|---|
| 140 | if not blog_url.endswith('/'): |
|---|
| 141 | blog_url += '/' |
|---|
| 142 | if not target_uri.startswith(blog_url): |
|---|
| 143 | raise Fault(32, 'The specified target URL does not exist.') |
|---|
| 144 | path_info = target_uri[len(blog_url):] |
|---|
| 145 | handler = endpoint = values = None |
|---|
| 146 | |
|---|
| 147 | while 1: |
|---|
| 148 | try: |
|---|
| 149 | endpoint, values = app.url_adapter.match(path_info) |
|---|
| 150 | except RequestRedirect, e: |
|---|
| 151 | path_info = e.new_url[len(blog_url):] |
|---|
| 152 | except NotFound, e: |
|---|
| 153 | break |
|---|
| 154 | else: |
|---|
| 155 | if endpoint in app.pingback_endpoints: |
|---|
| 156 | handler = app.pingback_endpoints[endpoint] |
|---|
| 157 | |
|---|
| 158 | # if we have an endpoint based handler use that one first |
|---|
| 159 | raise_later = None |
|---|
| 160 | if handler is not None: |
|---|
| 161 | try: |
|---|
| 162 | handler(response, target_uri, **values) |
|---|
| 163 | except PingbackError, e: |
|---|
| 164 | raise_later = e |
|---|
| 165 | |
|---|
| 166 | # if the handler was none or an acception happend in the |
|---|
| 167 | if handler is None or (raise_later is not None and |
|---|
| 168 | raise_later.means_missing): |
|---|
| 169 | for handler in app.pingback_url_handlers: |
|---|
| 170 | try: |
|---|
| 171 | if handler(response, target_uri, path_info): |
|---|
| 172 | raise_later = None |
|---|
| 173 | break |
|---|
| 174 | except PingbackError, e: |
|---|
| 175 | raise_later = e |
|---|
| 176 | # fatal error, abort |
|---|
| 177 | if not raise_later.means_missing: |
|---|
| 178 | break |
|---|
| 179 | else: |
|---|
| 180 | raise_later = PingbackError(33) |
|---|
| 181 | |
|---|
| 182 | # now if we have an exception raise it as XMLRPC fault |
|---|
| 183 | if raise_later is not None: |
|---|
| 184 | raise raise_later.as_fault() |
|---|
| 185 | |
|---|
| 186 | # return some debug info |
|---|
| 187 | return u'\n'.join(( |
|---|
| 188 | 'endpoint: %r', |
|---|
| 189 | 'values: %r', |
|---|
| 190 | 'path_info: %r', |
|---|
| 191 | 'source_uri: %s', |
|---|
| 192 | 'target_uri: %s', |
|---|
| 193 | 'handler: %r' |
|---|
| 194 | )) % (endpoint, values, path_info, source_uri, target_uri, handler) |
|---|
| 195 | |
|---|
| 196 | |
|---|
| 197 | def get_excerpt(response, url_hint, body_limit=1024 * 512): |
|---|
| 198 | """Get an excerpt from the given `response`. `url_hint` is the URL |
|---|
| 199 | which will be used as anchor for the excerpt. The return value is a |
|---|
| 200 | tuple in the form ``(title, body)``. If one of the two items could |
|---|
| 201 | not be calculated it will be `None`. |
|---|
| 202 | """ |
|---|
| 203 | if isinstance(response, basestring): |
|---|
| 204 | response = open_url(response) |
|---|
| 205 | contents = response.data[:body_limit] |
|---|
| 206 | title_match = _title_re.search(contents) |
|---|
| 207 | title = title_match and strip_tags(title_match.group(1)) or None |
|---|
| 208 | |
|---|
| 209 | link_re = re.compile(r'<a[^>]+?"\s*%s\s*"[^>]*>(.*?)</a>(?is)' % |
|---|
| 210 | re.escape(url_hint)) |
|---|
| 211 | for chunk in _chunk_re.split(contents): |
|---|
| 212 | match = link_re.search(chunk) |
|---|
| 213 | if not match: |
|---|
| 214 | continue |
|---|
| 215 | before = chunk[:match.start()] |
|---|
| 216 | after = chunk[match.end():] |
|---|
| 217 | raw_body = '%s\0%s' % (strip_tags(before).replace('\0', ''), |
|---|
| 218 | strip_tags(after).replace('\0', '')) |
|---|
| 219 | body_match = re.compile(r'(?:^|\b)(.{0,120})\0(.{0,120})(?:\b|$)') \ |
|---|
| 220 | .search(raw_body) |
|---|
| 221 | if body_match: |
|---|
| 222 | break |
|---|
| 223 | else: |
|---|
| 224 | return title, None |
|---|
| 225 | |
|---|
| 226 | before, after = body_match.groups() |
|---|
| 227 | link_text = strip_tags(match.group(1)) |
|---|
| 228 | if len(link_text) > 60: |
|---|
| 229 | link_text = link_text[:60] + u' …' |
|---|
| 230 | |
|---|
| 231 | bits = before.split() |
|---|
| 232 | bits.append(link_text) |
|---|
| 233 | bits.extend(after.split()) |
|---|
| 234 | return title, u'[…] %s […]' % u' '.join(bits) |
|---|
| 235 | |
|---|
| 236 | |
|---|
| 237 | def inject_header(f): |
|---|
| 238 | """Decorate a view function with this function to automatically set the |
|---|
| 239 | `X-Pingback` header if the status code is 200. |
|---|
| 240 | """ |
|---|
| 241 | def oncall(*args, **kwargs): |
|---|
| 242 | rv = f(*args, **kwargs) |
|---|
| 243 | if rv.status_code == 200: |
|---|
| 244 | rv.headers['X-Pingback'] = url_for('services/pingback', |
|---|
| 245 | _external=True) |
|---|
| 246 | return rv |
|---|
| 247 | oncall.__name__ = f.__name__ |
|---|
| 248 | oncall.__module__ = f.__module__ |
|---|
| 249 | oncall.__doc__ = f.__doc__ |
|---|
| 250 | return oncall |
|---|
| 251 | |
|---|
| 252 | |
|---|
| 253 | def pingback_post(response, target_uri, slug): |
|---|
| 254 | """This is the pingback handler for posts.""" |
|---|
| 255 | post = Post.query.filter_by(slug=slug).first() |
|---|
| 256 | if post is None: |
|---|
| 257 | return False |
|---|
| 258 | |
|---|
| 259 | if post is None or not post.pings_enabled: |
|---|
| 260 | raise PingbackError(33, 'no such post') |
|---|
| 261 | elif not post.can_read(): |
|---|
| 262 | raise PingbackError(49, 'access denied') |
|---|
| 263 | title, excerpt = get_excerpt(response, target_uri) |
|---|
| 264 | if not title: |
|---|
| 265 | raise PingbackError(17, 'no title provided') |
|---|
| 266 | elif not excerpt: |
|---|
| 267 | raise PingbackError(17, 'no useable link to target') |
|---|
| 268 | old_pingback = Comment.query.filter( |
|---|
| 269 | (Comment.is_pingback == True) & |
|---|
| 270 | (Comment.www == response.url) |
|---|
| 271 | ).first() |
|---|
| 272 | if old_pingback: |
|---|
| 273 | raise PingbackError(48, 'pingback has already been registered') |
|---|
| 274 | Comment(post, title, excerpt, '', response.url, is_pingback=True, |
|---|
| 275 | submitter_ip=get_request().remote_addr, parser='text') |
|---|
| 276 | db.commit() |
|---|
| 277 | return True |
|---|
| 278 | |
|---|
| 279 | |
|---|
| 280 | # the pingback service the application registers on creation |
|---|
| 281 | service = XMLRPC('pingback') |
|---|
| 282 | service.register_function(handle_pingback_request, 'pingback.ping') |
|---|
| 283 | |
|---|
| 284 | # a dict of default pingback endpoints (non plugin endpoints) |
|---|
| 285 | # these are used as defaults for pingback endpoints on startup |
|---|
| 286 | endpoints = {} |
|---|
| 287 | |
|---|
| 288 | # a dict of default pingback URL handlers (non plugin handlers) |
|---|
| 289 | # that are called one after another to find out if a yet unhandled |
|---|
| 290 | # URL reacts to pingbacks. |
|---|
| 291 | url_handlers = [pingback_post] |
|---|