Zine

open source content publishing system


source: zine/pingback.py @ 1279:088d2f519391

Revision 1279:088d2f519391, 9.7 KB checked in by Georg Brandl <georg@…>, 2 years ago (diff)

Update copyright notices.

Line 
1# -*- coding: utf-8 -*-
2"""
3    zine.pingback
4    ~~~~~~~~~~~~~
5
6    This module implements the pingback API and a function to emit pingbacks
7    to different blogs.  The implementation here is a `Pingback 1.0`_
8    implementation, compatible to the Pingback specification by Ian Hickson.
9
10    .. _Pingback 1.0: http://www.hixie.ch/specs/pingback/pingback-1.0
11
12    Note that pingback support is implemented in the `Zine` core and
13    can't be removed.  You can however disable it in the configuration if
14    you want.  Plugins can hook into the pingback system by registering
15    a callback for an URL endpoint using `app.add_pingback_endpoint` during
16    the application setup.
17
18    Important
19    =========
20
21    Due to a broken design for trackback we will *never* support trackbacks
22    in the `Zine` core.  Neither do we handle incoming trackbacks, nor
23    do we emit trackbacks.
24
25
26    :copyright: (c) 2010 by the Zine Team, see AUTHORS for more details.
27    :license: BSD, see LICENSE for more details.
28"""
29import re
30from xmlrpclib import ServerProxy
31
32from werkzeug.routing import RequestRedirect, NotFound
33from werkzeug import unescape
34
35from zine.api import get_request, get_application, url_for, db, _
36from zine.models import Post, Comment
37from zine.utils.exceptions import UserException
38from zine.utils.xml import XMLRPC, Fault, strip_tags
39from zine.utils.net import open_url, NetException
40
41
42_title_re = re.compile(r'<title>(.*?)</title>(?i)')
43_pingback_re = re.compile(r'<link rel="pingback" href="([^"]+)" ?/?>(?i)')
44_chunk_re = re.compile(r'\n\n|<(?:p|div|h\d)[^>]*>')
45
46
47class PingbackError(UserException):
48    """Raised if the remote server caused an exception while pingbacking.
49    This is not raised if the pingback function is unable to locate a
50    remote server.
51    """
52
53    _ = lambda x: x
54    default_messages = {
55        16: _(u'source URL does not exist'),
56        17: _(u'The source URL does not contain a link to the target URL'),
57        32: _(u'The specified target URL does not exist'),
58        33: _(u'The specified target URL cannot be used as a target'),
59        48: _(u'The pingback has already been registered'),
60        49: _(u'Access Denied')
61    }
62    del _
63
64    def __init__(self, fault_code, internal_message=None):
65        UserException.__init__(self)
66        self.fault_code = fault_code
67        self._internal_message = internal_message
68
69    def as_fault(self):
70        """Return the pingback errors XMLRPC fault."""
71        return Fault(self.fault_code, self.internal_message or
72                     'unknown server error')
73
74    @property
75    def ignore_silently(self):
76        """If the error can be ignored silently."""
77        return self.fault_code in (17, 33, 48, 49)
78
79    @property
80    def means_missing(self):
81        """If the error means that the resource is missing or not
82        accepting pingbacks.
83        """
84        return self.fault_code in (32, 33)
85
86    @property
87    def internal_message(self):
88        if self._internal_message is not None:
89            return self._internal_message
90        return self.default_messages.get(self.fault_code) or 'server error'
91
92    @property
93    def message(self):
94        msg = self.default_messages.get(self.fault_code)
95        if msg is not None:
96            return _(msg)
97        return _(u'An unknown server error (%s) occurred') % self.fault_code
98
99
100def pingback(source_uri, target_uri):
101    """Try to notify the server behind `target_uri` that `source_uri`
102    points to `target_uri`.  If that fails an `PingbackError` is raised.
103    """
104    try:
105        response = open_url(target_uri)
106    except:
107        raise PingbackError(32)
108
109    try:
110        pingback_uri = response.headers['X-Pingback']
111    except KeyError:
112        match = _pingback_re.search(response.data)
113        if match is None:
114            raise PingbackError(33)
115        pingback_uri = unescape(match.group(1))
116
117    rpc = ServerProxy(pingback_uri)
118    try:
119        return rpc.pingback.ping(source_uri, target_uri)
120    except Fault, e:
121        raise PingbackError(e.faultCode)
122    except:
123        raise PingbackError(32)
124
125
126def handle_pingback_request(source_uri, target_uri):
127    """This method is exported via XMLRPC as `pingback.ping` by the
128    pingback API.
129    """
130    app = get_application()
131
132    # next we check if the source URL does indeed exist
133    try:
134        response = open_url(source_uri)
135    except NetException:
136        raise Fault(16, 'The source URL does not exist.')
137
138    # we only accept pingbacks for links below our blog URL
139    blog_url = app.cfg['blog_url']
140    if not blog_url.endswith('/'):
141        blog_url += '/'
142    if not target_uri.startswith(blog_url):
143        raise Fault(32, 'The specified target URL does not exist.')
144    path_info = target_uri[len(blog_url):]
145    handler = endpoint = values = None
146
147    while 1:
148        try:
149            endpoint, values = app.url_adapter.match(path_info)
150        except RequestRedirect, e:
151            path_info = e.new_url[len(blog_url):]
152        except NotFound, e:
153            break
154        else:
155            if endpoint in app.pingback_endpoints:
156                handler = app.pingback_endpoints[endpoint]
157
158    # if we have an endpoint based handler use that one first
159    raise_later = None
160    if handler is not None:
161        try:
162            handler(response, target_uri, **values)
163        except PingbackError, e:
164            raise_later = e
165
166    # if the handler was none or an acception happend in the
167    if handler is None or (raise_later is not None and
168                           raise_later.means_missing):
169        for handler in app.pingback_url_handlers:
170            try:
171                if handler(response, target_uri, path_info):
172                    raise_later = None
173                    break
174            except PingbackError, e:
175                raise_later = e
176                # fatal error, abort
177                if not raise_later.means_missing:
178                    break
179        else:
180            raise_later = PingbackError(33)
181
182    # now if we have an exception raise it as XMLRPC fault
183    if raise_later is not None:
184        raise raise_later.as_fault()
185
186    # return some debug info
187    return u'\n'.join((
188        'endpoint: %r',
189        'values: %r',
190        'path_info: %r',
191        'source_uri: %s',
192        'target_uri: %s',
193        'handler: %r'
194    )) % (endpoint, values, path_info, source_uri, target_uri, handler)
195
196
197def get_excerpt(response, url_hint, body_limit=1024 * 512):
198    """Get an excerpt from the given `response`.  `url_hint` is the URL
199    which will be used as anchor for the excerpt.  The return value is a
200    tuple in the form ``(title, body)``.  If one of the two items could
201    not be calculated it will be `None`.
202    """
203    if isinstance(response, basestring):
204        response = open_url(response)
205    contents = response.data[:body_limit]
206    title_match = _title_re.search(contents)
207    title = title_match and strip_tags(title_match.group(1)) or None
208
209    link_re = re.compile(r'<a[^>]+?"\s*%s\s*"[^>]*>(.*?)</a>(?is)' %
210                         re.escape(url_hint))
211    for chunk in _chunk_re.split(contents):
212        match = link_re.search(chunk)
213        if not match:
214            continue
215        before = chunk[:match.start()]
216        after = chunk[match.end():]
217        raw_body = '%s\0%s' % (strip_tags(before).replace('\0', ''),
218                               strip_tags(after).replace('\0', ''))
219        body_match = re.compile(r'(?:^|\b)(.{0,120})\0(.{0,120})(?:\b|$)') \
220                       .search(raw_body)
221        if body_match:
222            break
223    else:
224        return title, None
225
226    before, after = body_match.groups()
227    link_text = strip_tags(match.group(1))
228    if len(link_text) > 60:
229        link_text = link_text[:60] + u' …'
230
231    bits = before.split()
232    bits.append(link_text)
233    bits.extend(after.split())
234    return title, u'[…] %s […]' % u' '.join(bits)
235
236
237def inject_header(f):
238    """Decorate a view function with this function to automatically set the
239    `X-Pingback` header if the status code is 200.
240    """
241    def oncall(*args, **kwargs):
242        rv = f(*args, **kwargs)
243        if rv.status_code == 200:
244            rv.headers['X-Pingback'] = url_for('services/pingback',
245                                               _external=True)
246        return rv
247    oncall.__name__ = f.__name__
248    oncall.__module__ = f.__module__
249    oncall.__doc__ = f.__doc__
250    return oncall
251
252
253def pingback_post(response, target_uri, slug):
254    """This is the pingback handler for posts."""
255    post = Post.query.filter_by(slug=slug).first()
256    if post is None:
257        return False
258
259    if post is None or not post.pings_enabled:
260        raise PingbackError(33, 'no such post')
261    elif not post.can_read():
262        raise PingbackError(49, 'access denied')
263    title, excerpt = get_excerpt(response, target_uri)
264    if not title:
265        raise PingbackError(17, 'no title provided')
266    elif not excerpt:
267        raise PingbackError(17, 'no useable link to target')
268    old_pingback = Comment.query.filter(
269        (Comment.is_pingback == True) &
270        (Comment.www == response.url)
271    ).first()
272    if old_pingback:
273        raise PingbackError(48, 'pingback has already been registered')
274    Comment(post, title, excerpt, '', response.url, is_pingback=True,
275            submitter_ip=get_request().remote_addr, parser='text')
276    db.commit()
277    return True
278
279
280# the pingback service the application registers on creation
281service = XMLRPC('pingback')
282service.register_function(handle_pingback_request, 'pingback.ping')
283
284# a dict of default pingback endpoints (non plugin endpoints)
285# these are used as defaults for pingback endpoints on startup
286endpoints = {}
287
288# a dict of default pingback URL handlers (non plugin handlers)
289# that are called one after another to find out if a yet unhandled
290# URL reacts to pingbacks.
291url_handlers = [pingback_post]
Note: See TracBrowser for help on using the repository browser.