Source code for django_babel.extract

# -*- coding: utf-8 -*-
try:
    # Django >= 2.1
    from django.template.base import Lexer, TokenType
    TOKEN_TEXT = TokenType.TEXT
    TOKEN_VAR = TokenType.VAR
    TOKEN_BLOCK = TokenType.BLOCK
except ImportError:
    # Django < 2.1
    from django.template.base import Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK

from django.utils.translation import trim_whitespace
from django.utils.encoding import smart_text

try:
    from django.utils.translation.trans_real import (
        inline_re, block_re, endblock_re, plural_re, constant_re)
except ImportError:
    # Django 1.11+
    from django.utils.translation.template import (
        inline_re, block_re, endblock_re, plural_re, constant_re)


def join_tokens(tokens, trim=False):
    message = ''.join(tokens)
    if trim:
        message = trim_whitespace(message)
    return message


def strip_quotes(s):
    if (s[0] == s[-1]) and s.startswith(("'", '"')):
        return s[1:-1]
    return s


[docs]def extract_django(fileobj, keywords, comment_tags, options): """Extract messages from Django template files. :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ intrans = False inplural = False trimmed = False message_context = None singular = [] plural = [] lineno = 1 encoding = options.get('encoding', 'utf8') text = fileobj.read().decode(encoding) try: text_lexer = Lexer(text) except TypeError: # Django 1.9 changed the way we invoke Lexer; older versions # require two parameters. text_lexer = Lexer(text, None) for t in text_lexer.tokenize(): lineno += t.contents.count('\n') if intrans: if t.token_type == TOKEN_BLOCK: endbmatch = endblock_re.match(t.contents) pluralmatch = plural_re.match(t.contents) if endbmatch: if inplural: if message_context: yield ( lineno, 'npgettext', [smart_text(message_context), smart_text(join_tokens(singular, trimmed)), smart_text(join_tokens(plural, trimmed))], [], ) else: yield ( lineno, 'ngettext', (smart_text(join_tokens(singular, trimmed)), smart_text(join_tokens(plural, trimmed))), []) else: if message_context: yield ( lineno, 'pgettext', [smart_text(message_context), smart_text(join_tokens(singular, trimmed))], [], ) else: yield ( lineno, None, smart_text(join_tokens(singular, trimmed)), []) intrans = False inplural = False message_context = None singular = [] plural = [] elif pluralmatch: inplural = True else: raise SyntaxError('Translation blocks must not include ' 'other block tags: %s' % t.contents) elif t.token_type == TOKEN_VAR: if inplural: plural.append('%%(%s)s' % t.contents) else: singular.append('%%(%s)s' % t.contents) elif t.token_type == TOKEN_TEXT: if inplural: plural.append(t.contents) else: singular.append(t.contents) else: if t.token_type == TOKEN_BLOCK: imatch = inline_re.match(t.contents) bmatch = block_re.match(t.contents) cmatches = constant_re.findall(t.contents) if imatch: g = imatch.group(1) g = strip_quotes(g) message_context = imatch.group(3) if message_context: # strip quotes message_context = message_context[1:-1] yield ( lineno, 'pgettext', [smart_text(message_context), smart_text(g)], [], ) message_context = None else: yield lineno, None, smart_text(g), [] elif bmatch: if bmatch.group(2): message_context = bmatch.group(2)[1:-1] for fmatch in constant_re.findall(t.contents): stripped_fmatch = strip_quotes(fmatch) yield lineno, None, smart_text(stripped_fmatch), [] intrans = True inplural = False trimmed = 'trimmed' in t.split_contents() singular = [] plural = [] elif cmatches: for cmatch in cmatches: stripped_cmatch = strip_quotes(cmatch) yield lineno, None, smart_text(stripped_cmatch), [] elif t.token_type == TOKEN_VAR: parts = t.contents.split('|') cmatch = constant_re.match(parts[0]) if cmatch: stripped_cmatch = strip_quotes(cmatch.group(1)) yield lineno, None, smart_text(stripped_cmatch), [] for p in parts[1:]: if p.find(':_(') >= 0: p1 = p.split(':', 1)[1] if p1[0] == '_': p1 = p1[1:] if p1[0] == '(': p1 = p1.strip('()') p1 = strip_quotes(p1) yield lineno, None, smart_text(p1), []