dev-python/nbconvert: Support mistune-2

Signed-off-by: Michał Górny <mgorny@gentoo.org>
author: Michał Górny <mgorny@gentoo.org> 2022-05-27 06:54:04 +0200
committer: Michał Górny <mgorny@gentoo.org> 2022-05-27 08:10:27 +0200
commit: f66a4fc204598c2abfe388b680694e3ff0d2dbbb (patch)
tree: c01205c73cf867ab682b122b9b5ea0333ea0b38b /dev-python/nbconvert
parent: dev-python/nbconvert: Remove old (diff)
download: gentoo-f66a4fc204598c2abfe388b680694e3ff0d2dbbb.tar.gz
gentoo-f66a4fc204598c2abfe388b680694e3ff0d2dbbb.tar.bz2
gentoo-f66a4fc204598c2abfe388b680694e3ff0d2dbbb.zip
2 files changed, 421 insertions, 0 deletions
diff --git a/dev-python/nbconvert/files/nbconvert-6.5.0-mistune-2.patch b/dev-python/nbconvert/files/nbconvert-6.5.0-mistune-2.patch
new file mode 100644
index 000000000000..4a3f4731b32d
--- /dev/null
+++ b/dev-python/nbconvert/files/nbconvert-6.5.0-mistune-2.patch
@@ -0,0 +1,339 @@
+From 6e5ba41803cc8c3192f001b3ede9b74454220bda Mon Sep 17 00:00:00 2001
+From: Tiago de Paula <tiagodepalves@gmail.com>
+Date: Mon, 9 May 2022 09:39:31 -0300
+Subject: [PATCH] Update to Mistune 2.0.2 (#1764)
+
+Co-authored-by: Steven Silvester <steven.silvester@ieee.org>
+---
+ nbconvert/filters/markdown_mistune.py | 212 ++++++++++++++------------
+ setup.py                              |   2 +-
+ 2 files changed, 119 insertions(+), 95 deletions(-)
+
+diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
+index 382a5388..636e1e8c 100644
+--- a/nbconvert/filters/markdown_mistune.py
++++ b/nbconvert/filters/markdown_mistune.py
+@@ -21,7 +21,7 @@ except ImportError:
+     from cgi import escape as html_escape
+ 
+ import bs4
+-import mistune
++from mistune import BlockParser, HTMLRenderer, InlineParser, Markdown
+ from pygments import highlight
+ from pygments.formatters import HtmlFormatter
+ from pygments.lexers import get_lexer_by_name
+@@ -34,158 +34,183 @@ class InvalidNotebook(Exception):
+     pass
+ 
+ 
+-class MathBlockGrammar(mistune.BlockGrammar):
+-    """This defines a single regex comprised of the different patterns that
+-    identify math content spanning multiple lines. These are used by the
+-    MathBlockLexer.
++class MathBlockParser(BlockParser):
++    """This acts as a pass-through to the MathInlineParser. It is needed in
++    order to avoid other block level rules splitting math sections apart.
+     """
+ 
+-    multi_math_str = "|".join(
+-        [r"^\$\$.*?\$\$", r"^\\\\\[.*?\\\\\]", r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}"]
++    MULTILINE_MATH = re.compile(
++        r"(?<!\\)[$]{2}.*?(?<!\\)[$]{2}|"
++        r"\\\\\[.*?\\\\\]|"
++        r"\\begin\{([a-z]*\*?)\}.*?\\end\{\1\}",
++        re.DOTALL,
+     )
+-    multiline_math = re.compile(multi_math_str, re.DOTALL)
+ 
++    RULE_NAMES = ("multiline_math",) + BlockParser.RULE_NAMES
+ 
+-class MathBlockLexer(mistune.BlockLexer):
+-    """This acts as a pass-through to the MathInlineLexer. It is needed in
+-    order to avoid other block level rules splitting math sections apart.
+-    """
++    # Regex for header that doesn't require space after '#'
++    AXT_HEADING = re.compile(r" {0,3}(#{1,6})(?!#+)\s*([^\n]*?)$")
+ 
+-    default_rules = ["multiline_math"] + mistune.BlockLexer.default_rules
++    def parse_multiline_math(self, m, state):
++        """Pass token through mutiline math."""
++        return {"type": "multiline_math", "text": m.group(0)}
+ 
+-    def __init__(self, rules=None, **kwargs):
+-        if rules is None:
+-            rules = MathBlockGrammar()
+-        super().__init__(rules, **kwargs)
+ 
+-    def parse_multiline_math(self, m):
+-        """Add token to pass through mutiline math."""
+-        self.tokens.append({"type": "multiline_math", "text": m.group(0)})
++def _dotall(pattern):
++    """Make the '.' special character match any character inside the pattern, including a newline.
+ 
+-
+-class MathInlineGrammar(mistune.InlineGrammar):
+-    """This defines different ways of declaring math objects that should be
+-    passed through to mathjax unaffected. These are used by the MathInlineLexer.
++    This is implemented with the inline flag `(?s:...)` and is equivalent to using `re.DOTALL` when
++    it is the only pattern used. It is necessary since `mistune>=2.0.0`, where the pattern is passed
++    to the undocumented `re.Scanner`.
+     """
+-
+-    inline_math = re.compile(r"^\$(.+?)\$|^\\\\\((.+?)\\\\\)", re.DOTALL)
+-    block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\\\[(.*?)\\\\\]", re.DOTALL)
+-    latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}", re.DOTALL)
+-    text = re.compile(r"^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)")
++    return f"(?s:{pattern})"
+ 
+ 
+-class MathInlineLexer(mistune.InlineLexer):
+-    r"""This interprets the content of LaTeX style math objects using the rules
+-    defined by the MathInlineGrammar.
++class MathInlineParser(InlineParser):
++    r"""This interprets the content of LaTeX style math objects.
+ 
+     In particular this grabs ``$$...$$``, ``\\[...\\]``, ``\\(...\\)``, ``$...$``,
+     and ``\begin{foo}...\end{foo}`` styles for declaring mathematics. It strips
+     delimiters from all these varieties, and extracts the type of environment
+     in the last case (``foo`` in this example).
+     """
+-    default_rules = [
+-        "block_math",
+-        "inline_math",
++    BLOCK_MATH_TEX = _dotall(r"(?<!\\)\$\$(.*?)(?<!\\)\$\$")
++    BLOCK_MATH_LATEX = _dotall(r"(?<!\\)\\\\\[(.*?)(?<!\\)\\\\\]")
++    INLINE_MATH_TEX = _dotall(r"(?<![$\\])\$(.+?)(?<![$\\])\$")
++    INLINE_MATH_LATEX = _dotall(r"(?<!\\)\\\\\((.*?)(?<!\\)\\\\\)")
++    LATEX_ENVIRONMENT = _dotall(r"\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}")
++
++    # The order is important here
++    RULE_NAMES = (
++        "block_math_tex",
++        "block_math_latex",
++        "inline_math_tex",
++        "inline_math_latex",
+         "latex_environment",
+-    ] + mistune.InlineLexer.default_rules
+-
+-    def __init__(self, renderer, rules=None, **kwargs):
+-        if rules is None:
+-            rules = MathInlineGrammar()
+-        super().__init__(renderer, rules, **kwargs)
+-
+-    def output_inline_math(self, m):
+-        return self.renderer.inline_math(m.group(1) or m.group(2))
+-
+-    def output_block_math(self, m):
+-        return self.renderer.block_math(m.group(1) or m.group(2) or "")
+-
+-    def output_latex_environment(self, m):
+-        return self.renderer.latex_environment(m.group(1), m.group(2))
+-
+-
+-class MarkdownWithMath(mistune.Markdown):
+-    def __init__(self, renderer, **kwargs):
+-        if "inline" not in kwargs:
+-            kwargs["inline"] = MathInlineLexer
+-        if "block" not in kwargs:
+-            kwargs["block"] = MathBlockLexer
+-        super().__init__(renderer, **kwargs)
+-
+-    def output_multiline_math(self):
+-        return self.inline(self.token["text"])
+-
+-
+-class IPythonRenderer(mistune.Renderer):
+-    def block_code(self, code, lang):
+-        if lang:
++    ) + InlineParser.RULE_NAMES
++
++    def parse_block_math_tex(self, m, state):
++        # sometimes the Scanner keeps the final '$$', so we use the
++        # full matched string and remove the math markers
++        text = m.group(0)[2:-2]
++        return "block_math", text
++
++    def parse_block_math_latex(self, m, state):
++        text = m.group(1)
++        return "block_math", text
++
++    def parse_inline_math_tex(self, m, state):
++        text = m.group(1)
++        return "inline_math", text
++
++    def parse_inline_math_latex(self, m, state):
++        text = m.group(1)
++        return "inline_math", text
++
++    def parse_latex_environment(self, m, state):
++        name, text = m.group(1), m.group(2)
++        return "latex_environment", name, text
++
++
++class MarkdownWithMath(Markdown):
++    def __init__(self, renderer, block=None, inline=None, plugins=None):
++        if block is None:
++            block = MathBlockParser()
++        if inline is None:
++            inline = MathInlineParser(renderer, hard_wrap=False)
++        super().__init__(renderer, block, inline, plugins)
++
++    def render(self, s):
++        """Compatibility method with `mistune==0.8.4`."""
++        return self.parse(s)
++
++
++class IPythonRenderer(HTMLRenderer):
++    def __init__(
++        self,
++        escape=True,
++        allow_harmful_protocols=True,
++        embed_images=False,
++        exclude_anchor_links=False,
++        anchor_link_text="¶",
++        path="",
++        attachments=None,
++    ):
++        super().__init__(escape, allow_harmful_protocols)
++        self.embed_images = embed_images
++        self.exclude_anchor_links = exclude_anchor_links
++        self.anchor_link_text = anchor_link_text
++        self.path = path
++        if attachments is not None:
++            self.attachments = attachments
++        else:
++            self.attachments = {}
++
++    def block_code(self, code, info=None):
++        if info:
+             try:
++                lang = info.strip().split(None, 1)[0]
+                 lexer = get_lexer_by_name(lang, stripall=True)
+             except ClassNotFound:
+                 code = lang + "\n" + code
+                 lang = None
+ 
+         if not lang:
+-            return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code)
++            return super().block_code(code)
+ 
+         formatter = HtmlFormatter()
+         return highlight(code, lexer, formatter)
+ 
+     def block_html(self, html):
+-        embed_images = self.options.get("embed_images", False)
+-
+-        if embed_images:
++        if self.embed_images:
+             html = self._html_embed_images(html)
+ 
+         return super().block_html(html)
+ 
+     def inline_html(self, html):
+-        embed_images = self.options.get("embed_images", False)
+-
+-        if embed_images:
++        if self.embed_images:
+             html = self._html_embed_images(html)
+ 
+         return super().inline_html(html)
+ 
+-    def header(self, text, level, raw=None):
+-        html = super().header(text, level, raw=raw)
+-        if self.options.get("exclude_anchor_links"):
++    def heading(self, text, level):
++        html = super().heading(text, level)
++        if self.exclude_anchor_links:
+             return html
+-        anchor_link_text = self.options.get("anchor_link_text", "¶")
+-        return add_anchor(html, anchor_link_text=anchor_link_text)
++        return add_anchor(html, anchor_link_text=self.anchor_link_text)
+ 
+     def escape_html(self, text):
+         return html_escape(text)
+ 
++    def multiline_math(self, text):
++        return text
++
+     def block_math(self, text):
+-        return "$$%s$$" % self.escape_html(text)
++        return f"$${self.escape_html(text)}$$"
+ 
+     def latex_environment(self, name, text):
+-        name = self.escape_html(name)
+-        text = self.escape_html(text)
+-        return rf"\begin{{{name}}}{text}\end{{{name}}}"
++        name, text = self.escape_html(name), self.escape_html(text)
++        return f"\\begin{{{name}}}{text}\\end{{{name}}}"
+ 
+     def inline_math(self, text):
+-        return "$%s$" % self.escape_html(text)
++        return f"${self.escape_html(text)}$"
+ 
+-    def image(self, src, title, text):
++    def image(self, src, text, title):
+         """Rendering a image with title and text.
+ 
+         :param src: source link of the image.
+-        :param title: title text of the image.
+         :param text: alt text of the image.
++        :param title: title text of the image.
+         """
+-        attachments = self.options.get("attachments", {})
+         attachment_prefix = "attachment:"
+-        embed_images = self.options.get("embed_images", False)
+ 
+         if src.startswith(attachment_prefix):
+             name = src[len(attachment_prefix) :]
+ 
+-            if name not in attachments:
++            if name not in self.attachments:
+                 raise InvalidNotebook(f"missing attachment: {name}")
+ 
+-            attachment = attachments[name]
++            attachment = self.attachments[name]
+             # we choose vector over raster, and lossless over lossy
+             preferred_mime_types = ["image/svg+xml", "image/png", "image/jpeg"]
+             for preferred_mime_type in preferred_mime_types:
+@@ -197,13 +222,13 @@ class IPythonRenderer(mistune.Renderer):
+             data = attachment[mime_type]
+             src = "data:" + mime_type + ";base64," + data
+ 
+-        elif embed_images:
++        elif self.embed_images:
+             base64_url = self._src_to_base64(src)
+ 
+             if base64_url is not None:
+                 src = base64_url
+ 
+-        return super().image(src, title, text)
++        return super().image(src, text, title)
+ 
+     def _src_to_base64(self, src):
+         """Turn the source file into a base64 url.
+@@ -211,8 +236,7 @@ class IPythonRenderer(mistune.Renderer):
+         :param src: source link of the file.
+         :return: the base64 url or None if the file was not found.
+         """
+-        path = self.options.get("path", "")
+-        src_path = os.path.join(path, src)
++        src_path = os.path.join(self.path, src)
+ 
+         if not os.path.exists(src_path):
+             return None
+diff --git a/setup.py b/setup.py
+index 7220a875..2dfa2534 100644
+--- a/setup.py
++++ b/setup.py
+@@ -245,7 +245,7 @@ setup_args["install_requires"] = [
+     "jupyter_core>=4.7",
+     "jupyterlab_pygments",
+     "MarkupSafe>=2.0",
+-    "mistune>=0.8.1,<2",
++    "mistune>=2.0.2",
+     "nbclient>=0.5.0",
+     "nbformat>=5.1",
+     "packaging",
+-- 
+2.35.1
+
diff --git a/dev-python/nbconvert/nbconvert-6.5.0-r1.ebuild b/dev-python/nbconvert/nbconvert-6.5.0-r1.ebuild
new file mode 100644
index 000000000000..39c667a2c576
--- /dev/null
+++ b/dev-python/nbconvert/nbconvert-6.5.0-r1.ebuild
@@ -0,0 +1,82 @@
+# Copyright 1999-2022 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=setuptools
+PYTHON_COMPAT=( python3_{8..10} )
+
+inherit distutils-r1
+
+DESCRIPTION="Converting Jupyter Notebooks"
+HOMEPAGE="
+	https://nbconvert.readthedocs.io/
+	https://github.com/jupyter/nbconvert/
+	https://pypi.org/project/nbconvert/
+"
+SRC_URI="
+	mirror://pypi/${PN:0:1}/${PN}/${P}.tar.gz
+"
+
+LICENSE="BSD"
+SLOT="0"
+KEYWORDS="~amd64 ~arm ~arm64 ~hppa ~ia64 ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86"
+
+RDEPEND="
+	dev-python/beautifulsoup4[${PYTHON_USEDEP}]
+	dev-python/bleach[${PYTHON_USEDEP}]
+	dev-python/defusedxml[${PYTHON_USEDEP}]
+	>=dev-python/entrypoints-0.2.2[${PYTHON_USEDEP}]
+	dev-python/jinja[${PYTHON_USEDEP}]
+	dev-python/jupyter_core[${PYTHON_USEDEP}]
+	dev-python/jupyterlab_pygments[${PYTHON_USEDEP}]
+	>=dev-python/markupsafe-2.0[${PYTHON_USEDEP}]
+	>=dev-python/mistune-2.0.2[${PYTHON_USEDEP}]
+	dev-python/nbclient[${PYTHON_USEDEP}]
+	dev-python/nbformat[${PYTHON_USEDEP}]
+	>=dev-python/pandocfilters-1.4.1[${PYTHON_USEDEP}]
+	dev-python/pygments[${PYTHON_USEDEP}]
+	>=dev-python/traitlets-5.1.1[${PYTHON_USEDEP}]
+	dev-python/testpath[${PYTHON_USEDEP}]
+	www-servers/tornado[${PYTHON_USEDEP}]
+"
+BDEPEND="
+	test? (
+		dev-python/pebble[${PYTHON_USEDEP}]
+		dev-python/ipykernel[${PYTHON_USEDEP}]
+		dev-python/ipywidgets[${PYTHON_USEDEP}]
+		>=dev-python/jupyter_client-4.2[${PYTHON_USEDEP}]
+	)
+"
+
+distutils_enable_tests pytest
+
+PATCHES=(
+	"${FILESDIR}"/${P}-mistune-2.patch
+)
+
+src_test() {
+	mkdir -p "${HOME}/.local" || die
+	cp -r share "${HOME}/.local/" || die
+	distutils-r1_src_test
+}
+
+python_test() {
+	local EPYTEST_DESELECT=(
+		# Missing pyppeteer for now
+		# TODO: Doesn't skip?
+		nbconvert/exporters/tests/test_webpdf.py
+		# Needs pyppeteer too
+		'nbconvert/tests/test_nbconvertapp.py::TestNbConvertApp::test_webpdf_with_chromium'
+	)
+
+	epytest --pyargs nbconvert
+}
+
+pkg_postinst() {
+	if ! has_version app-text/pandoc ; then
+		einfo "Pandoc is required for converting to formats other than Python,"
+		einfo "HTML, and Markdown. If you need this functionality, install"
+		einfo "app-text/pandoc."
+	fi
+}
author	Michał Górny <mgorny@gentoo.org>	2022-05-27 06:54:04 +0200
committer	Michał Górny <mgorny@gentoo.org>	2022-05-27 08:10:27 +0200
commit	f66a4fc204598c2abfe388b680694e3ff0d2dbbb (patch)
tree	c01205c73cf867ab682b122b9b5ea0333ea0b38b /dev-python/nbconvert
parent	dev-python/nbconvert: Remove old (diff)
download	gentoo-f66a4fc204598c2abfe388b680694e3ff0d2dbbb.tar.gz gentoo-f66a4fc204598c2abfe388b680694e3ff0d2dbbb.tar.bz2 gentoo-f66a4fc204598c2abfe388b680694e3ff0d2dbbb.zip