blob: fa7fffa870f9edae113c61b6cc246aaf1e29e8ff [file] [log] [blame]
# ext/extract.py
# Copyright 2006-2023 the Mako authors and contributors <see AUTHORS file>
#
# This module is part of Mako and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from io import BytesIO
from io import StringIO
import re
from mako import lexer
from mako import parsetree
class MessageExtractor:
use_bytes = True
def process_file(self, fileobj):
template_node = lexer.Lexer(
fileobj.read(), input_encoding=self.config["encoding"]
).parse()
yield from self.extract_nodes(template_node.get_children())
def extract_nodes(self, nodes):
translator_comments = []
in_translator_comments = False
input_encoding = self.config["encoding"] or "ascii"
comment_tags = list(
filter(None, re.split(r"\s+", self.config["comment-tags"]))
)
for node in nodes:
child_nodes = None
if (
in_translator_comments
and isinstance(node, parsetree.Text)
and not node.content.strip()
):
# Ignore whitespace within translator comments
continue
if isinstance(node, parsetree.Comment):
value = node.text.strip()
if in_translator_comments:
translator_comments.extend(
self._split_comment(node.lineno, value)
)
continue
for comment_tag in comment_tags:
if value.startswith(comment_tag):
in_translator_comments = True
translator_comments.extend(
self._split_comment(node.lineno, value)
)
continue
if isinstance(node, parsetree.DefTag):
code = node.function_decl.code
child_nodes = node.nodes
elif isinstance(node, parsetree.BlockTag):
code = node.body_decl.code
child_nodes = node.nodes
elif isinstance(node, parsetree.CallTag):
code = node.code.code
child_nodes = node.nodes
elif isinstance(node, parsetree.PageTag):
code = node.body_decl.code
elif isinstance(node, parsetree.CallNamespaceTag):
code = node.expression
child_nodes = node.nodes
elif isinstance(node, parsetree.ControlLine):
if node.isend:
in_translator_comments = False
continue
code = node.text
elif isinstance(node, parsetree.Code):
in_translator_comments = False
code = node.code.code
elif isinstance(node, parsetree.Expression):
code = node.code.code
else:
continue
# Comments don't apply unless they immediately precede the message
if (
translator_comments
and translator_comments[-1][0] < node.lineno - 1
):
translator_comments = []
translator_strings = [
comment[1] for comment in translator_comments
]
if isinstance(code, str) and self.use_bytes:
code = code.encode(input_encoding, "backslashreplace")
used_translator_comments = False
# We add extra newline to work around a pybabel bug
# (see python-babel/babel#274, parse_encoding dies if the first
# input string of the input is non-ascii)
# Also, because we added it, we have to subtract one from
# node.lineno
if self.use_bytes:
code = BytesIO(b"\n" + code)
else:
code = StringIO("\n" + code)
for message in self.process_python(
code, node.lineno - 1, translator_strings
):
yield message
used_translator_comments = True
if used_translator_comments:
translator_comments = []
in_translator_comments = False
if child_nodes:
yield from self.extract_nodes(child_nodes)
@staticmethod
def _split_comment(lineno, comment):
"""Return the multiline comment at lineno split into a list of
comment line numbers and the accompanying comment line"""
return [
(lineno + index, line)
for index, line in enumerate(comment.splitlines())
]