23 months ago
Removed tag 2.5
1 # -*- coding: utf-8 -*-
2 """
3 jinja2.utils
4 ~~~~~~~~~~~~
6 Utility functions.
8 :copyright: (c) 2010 by the Jinja Team.
9 :license: BSD, see LICENSE for more details.
10 """
11 import re
12 import sys
13 import errno
14 try:
15 from thread import allocate_lock
16 except ImportError:
17 from dummy_thread import allocate_lock
18 from collections import deque
19 from itertools import imap
22 _word_split_re = re.compile(r'(\s+)')
23 _punctuation_re = re.compile(
24 '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
25 '|'.join(imap(re.escape, ('(', '<', '<'))),
26 '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '>')))
27 )
28 )
29 _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
30 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
31 _entity_re = re.compile(r'&([^;]+);')
32 _letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
33 _digits = '0123456789'
35 # special singleton representing missing values for the runtime
36 missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
38 # internal code
39 internal_code = set()
42 # concatenate a list of strings and convert them to unicode.
43 # unfortunately there is a bug in python 2.4 and lower that causes
44 # unicode.join trash the traceback.
45 _concat = u''.join
46 try:
47 def _test_gen_bug():
48 raise TypeError(_test_gen_bug)
49 yield None
50 _concat(_test_gen_bug())
51 except TypeError, _error:
52 if not _error.args or _error.args[0] is not _test_gen_bug:
53 def concat(gen):
54 try:
55 return _concat(list(gen))
56 except:
57 # this hack is needed so that the current frame
58 # does not show up in the traceback.
59 exc_type, exc_value, tb = sys.exc_info()
60 raise exc_type, exc_value, tb.tb_next
61 else:
62 concat = _concat
63 del _test_gen_bug, _error
66 # for python 2.x we create outselves a next() function that does the
67 # basics without exception catching.
68 try:
69 next = next
70 except NameError:
71 def next(x):
72 return x.next()
75 # if this python version is unable to deal with unicode filenames
76 # when passed to encode we let this function encode it properly.
77 # This is used in a couple of places. As far as Jinja is concerned
78 # filenames are unicode *or* bytestrings in 2.x and unicode only in
79 # 3.x because compile cannot handle bytes
80 if sys.version_info < (3, 0):
81 def _encode_filename(filename):
82 if isinstance(filename, unicode):
83 return filename.encode('utf-8')
84 return filename
85 else:
86 def _encode_filename(filename):
87 assert filename is None or isinstance(filename, str), \
88 'filenames must be strings'
89 return filename
91 from keyword import iskeyword as is_python_keyword
94 # common types. These do exist in the special types module too which however
95 # does not exist in IronPython out of the box. Also that way we don't have
96 # to deal with implementation specific stuff here
97 class _C(object):
98 def method(self): pass
99 def _func():
100 yield None
101 FunctionType = type(_func)
102 GeneratorType = type(_func())
103 MethodType = type(_C.method)
104 CodeType = type(_C.method.func_code)
105 try:
106 raise TypeError()
107 except TypeError:
108 _tb = sys.exc_info()[2]
109 TracebackType = type(_tb)
110 FrameType = type(_tb.tb_frame)
111 del _C, _tb, _func
114 def contextfunction(f):
115 """This decorator can be used to mark a function or method context callable.
116 A context callable is passed the active :class:`Context` as first argument when
117 called from the template. This is useful if a function wants to get access
118 to the context or functions provided on the context object. For example
119 a function that returns a sorted list of template variables the current
120 template exports could look like this::
122 @contextfunction
123 def get_exported_names(context):
124 return sorted(context.exported_vars)
125 """
126 f.contextfunction = True
127 return f
130 def evalcontextfunction(f):
131 """This decoraotr can be used to mark a function or method as an eval
132 context callable. This is similar to the :func:`contextfunction`
133 but instead of passing the context, an evaluation context object is
134 passed. For more information about the eval context, see
135 :ref:`eval-context`.
137 .. versionadded:: 2.4
138 """
139 f.evalcontextfunction = True
140 return f
143 def environmentfunction(f):
144 """This decorator can be used to mark a function or method as environment
145 callable. This decorator works exactly like the :func:`contextfunction`
146 decorator just that the first argument is the active :class:`Environment`
147 and not context.
148 """
149 f.environmentfunction = True
150 return f
153 def internalcode(f):
154 """Marks the function as internally used"""
155 internal_code.add(f.func_code)
156 return f
159 def is_undefined(obj):
160 """Check if the object passed is undefined. This does nothing more than
161 performing an instance check against :class:`Undefined` but looks nicer.
162 This can be used for custom filters or tests that want to react to
163 undefined variables. For example a custom default filter can look like
164 this::
166 def default(var, default=''):
167 if is_undefined(var):
168 return default
169 return var
170 """
171 from jinja2.runtime import Undefined
172 return isinstance(obj, Undefined)
175 def consume(iterable):
176 """Consumes an iterable without doing anything with it."""
177 for event in iterable:
178 pass
181 def clear_caches():
182 """Jinja2 keeps internal caches for environments and lexers. These are
183 used so that Jinja2 doesn't have to recreate environments and lexers all
184 the time. Normally you don't have to care about that but if you are
185 messuring memory consumption you may want to clean the caches.
186 """
187 from jinja2.environment import _spontaneous_environments
188 from jinja2.lexer import _lexer_cache
189 _spontaneous_environments.clear()
190 _lexer_cache.clear()
193 def import_string(import_name, silent=False):
194 """Imports an object based on a string. This use useful if you want to
195 use import paths as endpoints or something similar. An import path can
196 be specified either in dotted notation (``xml.sax.saxutils.escape``)
197 or with a colon as object delimiter (``xml.sax.saxutils:escape``).
199 If the `silent` is True the return value will be `None` if the import
200 fails.
202 :return: imported object
203 """
204 try:
205 if ':' in import_name:
206 module, obj = import_name.split(':', 1)
207 elif '.' in import_name:
208 items = import_name.split('.')
209 module = '.'.join(items[:-1])
210 obj = items[-1]
211 else:
212 return __import__(import_name)
213 return getattr(__import__(module, None, None, [obj]), obj)
214 except (ImportError, AttributeError):
215 if not silent:
216 raise
219 def open_if_exists(filename, mode='rb'):
220 """Returns a file descriptor for the filename if that file exists,
221 otherwise `None`.
222 """
223 try:
224 return open(filename, mode)
225 except IOError, e:
226 if e.errno not in (errno.ENOENT, errno.EISDIR):
227 raise
230 def object_type_repr(obj):
231 """Returns the name of the object's type. For some recognized
232 singletons the name of the object is returned instead. (For
233 example for `None` and `Ellipsis`).
234 """
235 if obj is None:
236 return 'None'
237 elif obj is Ellipsis:
238 return 'Ellipsis'
239 # __builtin__ in 2.x, builtins in 3.x
240 if obj.__class__.__module__ in ('__builtin__', 'builtins'):
241 name = obj.__class__.__name__
242 else:
243 name = obj.__class__.__module__ + '.' + obj.__class__.__name__
244 return '%s object' % name
247 def pformat(obj, verbose=False):
248 """Prettyprint an object. Either use the `pretty` library or the
249 builtin `pprint`.
250 """
251 try:
252 from pretty import pretty
253 return pretty(obj, verbose=verbose)
254 except ImportError:
255 from pprint import pformat
256 return pformat(obj)
259 def urlize(text, trim_url_limit=None, nofollow=False):
260 """Converts any URLs in text into clickable links. Works on http://,
261 https:// and www. links. Links can have trailing punctuation (periods,
262 commas, close-parens) and leading punctuation (opening parens) and
263 it'll still do the right thing.
265 If trim_url_limit is not None, the URLs in link text will be limited
266 to trim_url_limit characters.
268 If nofollow is True, the URLs in link text will get a rel="nofollow"
269 attribute.
270 """
271 trim_url = lambda x, limit=trim_url_limit: limit is not None \
272 and (x[:limit] + (len(x) >=limit and '...'
273 or '')) or x
274 words = _word_split_re.split(unicode(escape(text)))
275 nofollow_attr = nofollow and ' rel="nofollow"' or ''
276 for i, word in enumerate(words):
277 match = _punctuation_re.match(word)
278 if match:
279 lead, middle, trail = match.groups()
280 if middle.startswith('www.') or (
281 '@' not in middle and
282 not middle.startswith('http://') and
283 len(middle) > 0 and
284 middle[0] in _letters + _digits and (
285 middle.endswith('.org') or
286 middle.endswith('.net') or
287 middle.endswith('.com')
288 )):
289 middle = '<a href="http://%s"%s>%s</a>' % (middle,
290 nofollow_attr, trim_url(middle))
291 if middle.startswith('http://') or \
292 middle.startswith('https://'):
293 middle = '<a href="%s"%s>%s</a>' % (middle,
294 nofollow_attr, trim_url(middle))
295 if '@' in middle and not middle.startswith('www.') and \
296 not ':' in middle and _simple_email_re.match(middle):
297 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
298 if lead + middle + trail != word:
299 words[i] = lead + middle + trail
300 return u''.join(words)
303 def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
304 """Generate some lorem impsum for the template."""
305 from jinja2.constants import LOREM_IPSUM_WORDS
306 from random import choice, randrange
307 words = LOREM_IPSUM_WORDS.split()
308 result = []
310 for _ in xrange(n):
311 next_capitalized = True
312 last_comma = last_fullstop = 0
313 word = None
314 last = None
315 p = []
317 # each paragraph contains out of 20 to 100 words.
318 for idx, _ in enumerate(xrange(randrange(min, max))):
319 while True:
320 word = choice(words)
321 if word != last:
322 last = word
323 break
324 if next_capitalized:
325 word = word.capitalize()
326 next_capitalized = False
327 # add commas
328 if idx - randrange(3, 8) > last_comma:
329 last_comma = idx
330 last_fullstop += 2
331 word += ','
332 # add end of sentences
333 if idx - randrange(10, 20) > last_fullstop:
334 last_comma = last_fullstop = idx
335 word += '.'
336 next_capitalized = True
337 p.append(word)
339 # ensure that the paragraph ends with a dot.
340 p = u' '.join(p)
341 if p.endswith(','):
342 p = p[:-1] + '.'
343 elif not p.endswith('.'):
344 p += '.'
345 result.append(p)
347 if not html:
348 return u'\n\n'.join(result)
349 return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
352 class Markup(unicode):
353 r"""Marks a string as being safe for inclusion in HTML/XML output without
354 needing to be escaped. This implements the `__html__` interface a couple
355 of frameworks and web applications use. :class:`Markup` is a direct
356 subclass of `unicode` and provides all the methods of `unicode` just that
357 it escapes arguments passed and always returns `Markup`.
359 The `escape` function returns markup objects so that double escaping can't
360 happen. If you want to use autoescaping in Jinja just enable the
361 autoescaping feature in the environment.
363 The constructor of the :class:`Markup` class can be used for three
364 different things: When passed an unicode object it's assumed to be safe,
365 when passed an object with an HTML representation (has an `__html__`
366 method) that representation is used, otherwise the object passed is
367 converted into a unicode string and then assumed to be safe:
369 >>> Markup("Hello <em>World</em>!")
370 Markup(u'Hello <em>World</em>!')
371 >>> class Foo(object):
372 ... def __html__(self):
373 ... return '<a href="#">foo</a>'
374 ...
375 >>> Markup(Foo())
376 Markup(u'<a href="#">foo</a>')
378 If you want object passed being always treated as unsafe you can use the
379 :meth:`escape` classmethod to create a :class:`Markup` object:
381 >>> Markup.escape("Hello <em>World</em>!")
382 Markup(u'Hello <em>World</em>!')
384 Operations on a markup string are markup aware which means that all
385 arguments are passed through the :func:`escape` function:
387 >>> em = Markup("<em>%s</em>")
388 >>> em % "foo & bar"
389 Markup(u'<em>foo & bar</em>')
390 >>> strong = Markup("<strong>%(text)s</strong>")
391 >>> strong % {'text': '<blink>hacker here</blink>'}
392 Markup(u'<strong><blink>hacker here</blink></strong>')
393 >>> Markup("<em>Hello</em> ") + "<foo>"
394 Markup(u'<em>Hello</em> <foo>')
395 """
396 __slots__ = ()
398 def __new__(cls, base=u'', encoding=None, errors='strict'):
399 if hasattr(base, '__html__'):
400 base = base.__html__()
401 if encoding is None:
402 return unicode.__new__(cls, base)
403 return unicode.__new__(cls, base, encoding, errors)
405 def __html__(self):
406 return self
408 def __add__(self, other):
409 if hasattr(other, '__html__') or isinstance(other, basestring):
410 return self.__class__(unicode(self) + unicode(escape(other)))
411 return NotImplemented
413 def __radd__(self, other):
414 if hasattr(other, '__html__') or isinstance(other, basestring):
415 return self.__class__(unicode(escape(other)) + unicode(self))
416 return NotImplemented
418 def __mul__(self, num):
419 if isinstance(num, (int, long)):
420 return self.__class__(unicode.__mul__(self, num))
421 return NotImplemented
422 __rmul__ = __mul__
424 def __mod__(self, arg):
425 if isinstance(arg, tuple):
426 arg = tuple(imap(_MarkupEscapeHelper, arg))
427 else:
428 arg = _MarkupEscapeHelper(arg)
429 return self.__class__(unicode.__mod__(self, arg))
431 def __repr__(self):
432 return '%s(%s)' % (
433 self.__class__.__name__,
434 unicode.__repr__(self)
435 )
437 def join(self, seq):
438 return self.__class__(unicode.join(self, imap(escape, seq)))
439 join.__doc__ = unicode.join.__doc__
441 def split(self, *args, **kwargs):
442 return map(self.__class__, unicode.split(self, *args, **kwargs))
443 split.__doc__ = unicode.split.__doc__
445 def rsplit(self, *args, **kwargs):
446 return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
447 rsplit.__doc__ = unicode.rsplit.__doc__
449 def splitlines(self, *args, **kwargs):
450 return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
451 splitlines.__doc__ = unicode.splitlines.__doc__
453 def unescape(self):
454 r"""Unescape markup again into an unicode string. This also resolves
455 known HTML4 and XHTML entities:
457 >>> Markup("Main » <em>About</em>").unescape()
458 u'Main \xbb <em>About</em>'
459 """
460 from jinja2.constants import HTML_ENTITIES
461 def handle_match(m):
462 name = m.group(1)
463 if name in HTML_ENTITIES:
464 return unichr(HTML_ENTITIES[name])
465 try:
466 if name[:2] in ('#x', '#X'):
467 return unichr(int(name[2:], 16))
468 elif name.startswith('#'):
469 return unichr(int(name[1:]))
470 except ValueError:
471 pass
472 return u''
473 return _entity_re.sub(handle_match, unicode(self))
475 def striptags(self):
476 r"""Unescape markup into an unicode string and strip all tags. This
477 also resolves known HTML4 and XHTML entities. Whitespace is
478 normalized to one:
480 >>> Markup("Main » <em>About</em>").striptags()
481 u'Main \xbb About'
482 """
483 stripped = u' '.join(_striptags_re.sub('', self).split())
484 return Markup(stripped).unescape()
486 @classmethod
487 def escape(cls, s):
488 """Escape the string. Works like :func:`escape` with the difference
489 that for subclasses of :class:`Markup` this function would return the
490 correct subclass.
491 """
492 rv = escape(s)
493 if rv.__class__ is not cls:
494 return cls(rv)
495 return rv
497 def make_wrapper(name):
498 orig = getattr(unicode, name)
499 def func(self, *args, **kwargs):
500 args = _escape_argspec(list(args), enumerate(args))
501 _escape_argspec(kwargs, kwargs.iteritems())
502 return self.__class__(orig(self, *args, **kwargs))
503 func.__name__ = orig.__name__
504 func.__doc__ = orig.__doc__
505 return func
507 for method in '__getitem__', 'capitalize', \
508 'title', 'lower', 'upper', 'replace', 'ljust', \
509 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
510 'translate', 'expandtabs', 'swapcase', 'zfill':
511 locals()[method] = make_wrapper(method)
513 # new in python 2.5
514 if hasattr(unicode, 'partition'):
515 partition = make_wrapper('partition'),
516 rpartition = make_wrapper('rpartition')
518 # new in python 2.6
519 if hasattr(unicode, 'format'):
520 format = make_wrapper('format')
522 # not in python 3
523 if hasattr(unicode, '__getslice__'):
524 __getslice__ = make_wrapper('__getslice__')
526 del method, make_wrapper
529 def _escape_argspec(obj, iterable):
530 """Helper for various string-wrapped functions."""
531 for key, value in iterable:
532 if hasattr(value, '__html__') or isinstance(value, basestring):
533 obj[key] = escape(value)
534 return obj
537 class _MarkupEscapeHelper(object):
538 """Helper for Markup.__mod__"""
540 def __init__(self, obj):
541 self.obj = obj
543 __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
544 __str__ = lambda s: str(escape(s.obj))
545 __unicode__ = lambda s: unicode(escape(s.obj))
546 __repr__ = lambda s: str(escape(repr(s.obj)))
547 __int__ = lambda s: int(s.obj)
548 __float__ = lambda s: float(s.obj)
551 class LRUCache(object):
552 """A simple LRU Cache implementation."""
554 # this is fast for small capacities (something below 1000) but doesn't
555 # scale. But as long as it's only used as storage for templates this
556 # won't do any harm.
558 def __init__(self, capacity):
559 self.capacity = capacity
560 self._mapping = {}
561 self._queue = deque()
562 self._postinit()
564 def _postinit(self):
565 # alias all queue methods for faster lookup
566 self._popleft = self._queue.popleft
567 self._pop = self._queue.pop
568 if hasattr(self._queue, 'remove'):
569 self._remove = self._queue.remove
570 self._wlock = allocate_lock()
571 self._append = self._queue.append
573 def _remove(self, obj):
574 """Python 2.4 compatibility."""
575 for idx, item in enumerate(self._queue):
576 if item == obj:
577 del self._queue[idx]
578 break
580 def __getstate__(self):
581 return {
582 'capacity': self.capacity,
583 '_mapping': self._mapping,
584 '_queue': self._queue
585 }
587 def __setstate__(self, d):
588 self.__dict__.update(d)
589 self._postinit()
591 def __getnewargs__(self):
592 return (self.capacity,)
594 def copy(self):
595 """Return an shallow copy of the instance."""
596 rv = self.__class__(self.capacity)
597 rv._mapping.update(self._mapping)
598 rv._queue = deque(self._queue)
599 return rv
601 def get(self, key, default=None):
602 """Return an item from the cache dict or `default`"""
603 try:
604 return self[key]
605 except KeyError:
606 return default
608 def setdefault(self, key, default=None):
609 """Set `default` if the key is not in the cache otherwise
610 leave unchanged. Return the value of this key.
611 """
612 try:
613 return self[key]
614 except KeyError:
615 self[key] = default
616 return default
618 def clear(self):
619 """Clear the cache."""
620 self._wlock.acquire()
621 try:
622 self._mapping.clear()
623 self._queue.clear()
624 finally:
625 self._wlock.release()
627 def __contains__(self, key):
628 """Check if a key exists in this cache."""
629 return key in self._mapping
631 def __len__(self):
632 """Return the current size of the cache."""
633 return len(self._mapping)
635 def __repr__(self):
636 return '<%s %r>' % (
637 self.__class__.__name__,
638 self._mapping
639 )
641 def __getitem__(self, key):
642 """Get an item from the cache. Moves the item up so that it has the
643 highest priority then.
645 Raise an `KeyError` if it does not exist.
646 """
647 rv = self._mapping[key]
648 if self._queue[-1] != key:
649 try:
650 self._remove(key)
651 except ValueError:
652 # if something removed the key from the container
653 # when we read, ignore the ValueError that we would
654 # get otherwise.
655 pass
656 self._append(key)
657 return rv
659 def __setitem__(self, key, value):
660 """Sets the value for an item. Moves the item up so that it
661 has the highest priority then.
662 """
663 self._wlock.acquire()
664 try:
665 if key in self._mapping:
666 try:
667 self._remove(key)
668 except ValueError:
669 # __getitem__ is not locked, it might happen
670 pass
671 elif len(self._mapping) == self.capacity:
672 del self._mapping[self._popleft()]
673 self._append(key)
674 self._mapping[key] = value
675 finally:
676 self._wlock.release()
678 def __delitem__(self, key):
679 """Remove an item from the cache dict.
680 Raise an `KeyError` if it does not exist.
681 """
682 self._wlock.acquire()
683 try:
684 del self._mapping[key]
685 try:
686 self._remove(key)
687 except ValueError:
688 # __getitem__ is not locked, it might happen
689 pass
690 finally:
691 self._wlock.release()
693 def items(self):
694 """Return a list of items."""
695 result = [(key, self._mapping[key]) for key in list(self._queue)]
696 result.reverse()
697 return result
699 def iteritems(self):
700 """Iterate over all items."""
701 return iter(self.items())
703 def values(self):
704 """Return a list of all values."""
705 return [x[1] for x in self.items()]
707 def itervalue(self):
708 """Iterate over all values."""
709 return iter(self.values())
711 def keys(self):
712 """Return a list of all keys ordered by most recent usage."""
713 return list(self)
715 def iterkeys(self):
716 """Iterate over all keys in the cache dict, ordered by
717 the most recent usage.
718 """
719 return reversed(tuple(self._queue))
721 __iter__ = iterkeys
723 def __reversed__(self):
724 """Iterate over the values in the cache dict, oldest items
725 coming first.
726 """
727 return iter(tuple(self._queue))
729 __copy__ = copy
732 # register the LRU cache as mutable mapping if possible
733 try:
734 from collections import MutableMapping
735 MutableMapping.register(LRUCache)
736 except ImportError:
737 pass
740 class Cycler(object):
741 """A cycle helper for templates."""
743 def __init__(self, *items):
744 if not items:
745 raise RuntimeError('at least one item has to be provided')
746 self.items = items
747 self.reset()
749 def reset(self):
750 """Resets the cycle."""
751 self.pos = 0
753 @property
754 def current(self):
755 """Returns the current item."""
756 return self.items[self.pos]
758 def next(self):
759 """Goes one item ahead and returns it."""
760 rv = self.current
761 self.pos = (self.pos + 1) % len(self.items)
762 return rv
765 class Joiner(object):
766 """A joining helper for templates."""
768 def __init__(self, sep=u', '):
769 self.sep = sep
770 self.used = False
772 def __call__(self):
773 if not self.used:
774 self.used = True
775 return u''
776 return self.sep
779 # we have to import it down here as the speedups module imports the
780 # markup type which is define above.
781 try:
782 from jinja2._speedups import escape, soft_unicode
783 except ImportError:
784 def escape(s):
785 """Convert the characters &, <, >, ' and " in string s to HTML-safe
786 sequences. Use this if you need to display text that might contain
787 such characters in HTML. Marks return value as markup string.
788 """
789 if hasattr(s, '__html__'):
790 return s.__html__()
791 return Markup(unicode(s)
792 .replace('&', '&')
793 .replace('>', '>')
794 .replace('<', '<')
795 .replace("'", ''')
796 .replace('"', '"')
797 )
799 def soft_unicode(s):
800 """Make a string unicode if it isn't already. That way a markup
801 string is not converted back to unicode.
802 """
803 if not isinstance(s, unicode):
804 s = unicode(s)
805 return s
808 # partials
809 try:
810 from functools import partial
811 except ImportError:
812 class partial(object):
813 def __init__(self, _func, *args, **kwargs):
814 self._func = _func
815 self._args = args
816 self._kwargs = kwargs
817 def __call__(self, *args, **kwargs):
818 kwargs.update(self._kwargs)
819 return self._func(*(self._args + args), **kwargs)