Zine

open source content publishing system


source: zine/parsers.py @ 1287:59531dfdacf8

Revision 1287:59531dfdacf8, 8.1 KB checked in by Georg Brandl <georg@…>, 2 years ago (diff)

Remove "informations".

Line 
1# -*- coding: utf-8 -*-
2"""
3    zine.parsers
4    ~~~~~~~~~~~~
5
6    This module holds the base parser information and the dict of
7    default parsers.
8
9    :copyright: (c) 2010 by the Zine Team, see AUTHORS for more details.
10    :license: BSD, see LICENSE for more details.
11"""
12from zine.i18n import lazy_gettext
13from zine.application import iter_listeners, get_application
14from zine.utils.zeml import parse_html, parse_zeml, sanitize, split_intro, \
15     Element, RootElement
16from zine.utils.xml import replace_entities
17
18
19def parse(input_data, parser=None, reason='unknown'):
20    """Generate a doc tree out of the data provided.  If we are not in unbound
21    mode the `process-doc-tree` event is sent so that plugins can modify
22    the tree in place. The reason is useful for plugins to find out if they
23    want to render it or now. For example a normal blog post would have the
24    reason 'post', a comment 'comment', an isolated page from a plugin maybe
25    'page' etc.
26    """
27    input_data = u'\n'.join(input_data.splitlines())
28    app = get_application()
29    if parser is None:
30        try:
31            parser = app.parsers[app.cfg['default_parser']]
32        except KeyError:
33            # the plugin that provided the default parser is not
34            # longer available.  reset the config value to the builtin
35            # parser and parse afterwards.
36            t = app.cfg.edit()
37            t.revert_to_default('default_parser')
38            t.commit()
39            parser = app.parsers[app.cfg['default_parser']]
40    else:
41        try:
42            parser = app.parsers[parser]
43        except KeyError:
44            raise ValueError('parser %r does not exist' % (parser,))
45
46    tree = parser.parse(input_data, reason)
47
48    #! allow plugins to alter the doctree.
49    for callback in iter_listeners('process-doc-tree'):
50        item = callback(tree, input_data, reason)
51        if item is not None:
52            tree = item
53
54    return tree
55
56
57def render_preview(text, parser, component='post'):
58    """Renders a preview text for the given text using the parser
59    provided.
60    """
61    tree = parse(text, parser, '%s-preview' % component)
62    intro, body = split_intro(tree)
63    if intro:
64        return u'<div class="intro">%s</div>%s' % (intro.to_html(),
65                                                   body.to_html())
66    return body.to_html()
67
68
69class MarkupExtension(object):
70    """Handler for a markup language-agnostic markup extension.
71
72    The following attributes must/can be set on subclasses:
73
74        `name`
75            The name under which the extension is accessible. This is the tag
76            name for XML-like markup languages, or the directive name for
77            reStructuredText (reST), etc.
78        `is_block_level`
79            True if the element is to be rendered as a block-level element.
80            This may also change how the element is accessed; for example, in
81            reST, inline elements are used as roles, while block-level elements
82            are used as directives.
83        `is_void`
84            True if the element doesn't have content.
85        `is_isolated`
86            True if the element's contents should not be parsed by the markup
87            parser and converted to a ZEML tree.
88        `broken_by`
89            A sequence of element names by which this element is implicitly
90            closed.  Applies only to XML-like markup languages.
91        `attributes`
92            A set of allowed attribute (option) names.  Note that inline elements
93            may not support attributes in all markup languages.
94        `argument_attribute`
95            For markup languages that support arguments to elements as well
96            as attributes, if this is the name of an attribute given in
97            `attributes`, the element will accept one argument and map it
98            to the given attribute.  Note that inline elements may not support
99            arguments in all markup languages.
100
101    The `process` method must be overwritten.  It is given three arguments:
102
103        `attributes`
104            A dictionary of attributes (options) of the markup element.
105        `content`
106            The content of the element; if `is_isolated` is False, this has
107            already been parsed with the markup parser and is a ZEML tree,
108            otherwise it is raw text.
109        `reason`
110            The parsing reason -- either "post", "comment", "post-preview",
111            "comment-preview", or "system".  The element can change behavior
112            depending on the reason, for example disable potentially unsafe
113            features for comments.
114
115    It must return a ZEML tree.
116    """
117
118    name = None
119    is_void = False
120    is_isolated = False
121    is_block_level = True
122    broken_by = None
123    attributes = set()
124    argument_attribute = None
125
126    def __init__(self, app):
127        self.app = app
128
129    def process(self, attributes, content, reason):
130        """Called each time the element is encountered."""
131        raise NotImplementedError()
132
133
134class BaseParser(object):
135    """Baseclass for all kinds of parsers."""
136
137    #: the localized name of the parser.
138    name = None
139
140    def __init__(self, app):
141        self.app = app
142
143    def parse(self, input_data, reason):
144        """Return a ZEML tree."""
145        raise NotImplementedError()
146
147
148class ZEMLParser(BaseParser):
149    """The parser for the ZEML Markup language."""
150
151    name = lazy_gettext('Zine-Markup')
152
153    def parse(self, input_data, reason):
154        rv = parse_zeml(input_data, reason, self.app.markup_extensions)
155        if reason == 'comment':
156            rv = sanitize(rv)
157        return rv
158
159
160class HTMLParser(BaseParser):
161    """A parser that understands plain old HTML."""
162
163    name = lazy_gettext('HTML')
164
165    def parse(self, input_data, reason):
166        rv = parse_html(input_data)
167        if reason == 'comment':
168            rv = sanitize(rv)
169        return rv
170
171
172class PlainTextParser(BaseParser):
173    """Parses simple text into a ZEML tree by utilizing pottymouth."""
174
175    name = lazy_gettext('Text')
176
177    def _to_text(self, token):
178        """Convert a token to normal text."""
179        return replace_entities(unicode(token))
180
181    def _to_zeml(self, node, untrusted=False):
182        """Convert a potty-mouth node into a ZEML tree."""
183        from zine._ext.pottymouth import Token
184        def add_text(node, text):
185            if node.children:
186                node.children[-1].tail += text
187            else:
188                node.text += text
189
190        def convert(node, is_root):
191            if is_root:
192                result = RootElement()
193            else:
194                result = Element(node.name)
195            if node._attributes:
196                result.attributes.update(node._attributes)
197
198            for item in node:
199                if isinstance(item, (str, unicode, Token)):
200                    add_text(result, self._to_text(item))
201                else:
202                    child = convert(item, False)
203                    # remove the useless empty spans
204                    if child.name == 'span' and not child.attributes:
205                        add_text(result, child.text)
206                        result.children.extend(child.children)
207                        add_text(result, child.tail)
208                    else:
209                        result.children.append(child)
210
211            # fixes an output bug from pottymouth
212            if len(result.children) == 1 and node.name == 'p' and \
213               result.children[0].name == 'blockquote':
214                result = result.children[0]
215
216            # untrusted posts get nofollow on links
217            if untrusted and result.name == 'a':
218                result.attributes['rel'] = 'nofollow'
219
220            return result
221        return convert(node, True)
222
223    def parse(self, input_data, reason):
224        from zine._ext.pottymouth import PottyMouth
225        parser = PottyMouth(emdash=False, ellipsis=False, smart_quotes=False,
226                            youtube=False, image=False, italic=False,
227                            all_links=not self.app.cfg['plaintext_parser_nolinks'])
228        node = parser.parse(input_data)
229        return self._to_zeml(node, reason == 'comment')
230
231
232all_parsers = {
233    'zeml':             ZEMLParser,
234    'html':             HTMLParser,
235    'text':             PlainTextParser
236}
Note: See TracBrowser for help on using the repository browser.