Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lark_channel/channel/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ class StreamThrottleConfig:
class MarkdownConverter:
enabled: bool = True
table_mode: TableMode = "off"
tag_md_mode: TagMdMode = "structured"
tag_md_mode: TagMdMode = "native"


@dataclass
Expand Down
67 changes: 61 additions & 6 deletions lark_channel/channel/normalize/converters/post.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
"""Converter: PostContent → Markdown (headings / bold / italic / code / links)."""

import re
from typing import Any, Dict, List, Tuple

from ...types import PostContent, ResourceDescriptor

_AT_MENTION_RE = re.compile(r'<at(\s+)user_id(\s*)=(\s*)"(.*?)">(.*?)</at>')
_IMAGE_KEY_RE = re.compile(r"!\[(.*?)\]\(([^)]+)\)")


def convert(content: PostContent) -> Tuple[str, List[ResourceDescriptor]]:
md = _post_to_markdown(content.post) if content.post else content.text
md, md_resources = _post_to_markdown(content.post) if content.post else (content.text or "", [])
resources = _post_resources(content.post) if content.post else []
resources.extend(md_resources)
return md, resources


Expand All @@ -19,16 +24,25 @@ def _iter_documents(post: Dict[str, Any]) -> List[Dict[str, Any]]:
return [doc for doc in post.values() if isinstance(doc, dict)]


def _post_to_markdown(post: Dict[str, Any]) -> str:
def _post_to_markdown(post: Dict[str, Any]) -> Tuple[str, List[ResourceDescriptor]]:
docs = _iter_documents(post)
if not docs:
return ""
return "", []
locale = docs[0]

# Choose source paragraphs: prefer content_v2, fallback to content.
content_v2 = locale.get("content_v2")
if isinstance(content_v2, list) and len(content_v2) > 0:
source_paragraphs = content_v2
else:
source_paragraphs = locale.get("content") or []

lines: List[str] = []
resources: List[ResourceDescriptor] = []
title = locale.get("title")
if title:
lines.append(f"# {title}")
for para in locale.get("content") or []:
for para in source_paragraphs:
chunks: List[str] = []
for el in para or []:
if not isinstance(el, dict):
Expand Down Expand Up @@ -64,11 +78,13 @@ def _post_to_markdown(post: Dict[str, Any]) -> str:
elif tag == "hr":
chunks.append("---")
elif tag == "md":
chunks.append(el.get("text") or "")
text, res = _process_md_text(el.get("text") or "")
chunks.append(text)
resources.extend(res)
line = "".join(chunks)
if line:
lines.append(line)
return "\n\n".join(lines).strip()
return "\n\n".join(lines).strip(), resources


def _post_resources(post: Dict[str, Any]) -> List[ResourceDescriptor]:
Expand Down Expand Up @@ -105,3 +121,42 @@ def add(kind: str, key: Any, *, file_name: Any = None) -> None:
elif tag == "file":
add("file", el.get("file_key"), file_name=el.get("file_name"))
return resources


def _process_md_text(text: str) -> Tuple[str, List[ResourceDescriptor]]:
"""Post-process raw markdown text from an "md" element.

Splits by fenced code block delimiters (```) and only applies
transformations (at-mention replacement, image key extraction)
to text outside of properly paired code blocks. Unclosed fences
are treated as outside-code-block text.
"""
resources: List[ResourceDescriptor] = []
parts = text.split("```")
total = len(parts)
for i, part in enumerate(parts):
# Odd-index segments are inside code blocks, UNLESS it's the last
# segment of an even-length split (unclosed fence).
is_inside = (i % 2 == 1)
if is_inside and total % 2 == 0 and i == total - 1:
is_inside = False
if not is_inside:
# Outside code block: apply at-mention replacement.
def _replace_at(m: re.Match) -> str:
user_id = m.group(4)
name = m.group(5)
if user_id in ("all", "all_members"):
return "@all"
return f"@{name}" if name else f"@{user_id}"

parts[i] = _AT_MENTION_RE.sub(_replace_at, part)

# Extract image keys from ![...](key) patterns.
for _alt, img_key in _IMAGE_KEY_RE.findall(parts[i]):
if img_key:
resources.append(ResourceDescriptor(
type="image", # type: ignore[arg-type]
file_key=img_key,
))
# Inside code block: preserve as-is.
return "```".join(parts), resources
13 changes: 7 additions & 6 deletions lark_channel/channel/outbound/markdown/to_post.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,21 +91,22 @@ def markdown_to_post_ast(
locale: str = "zh_cn",
mentions: "list[Identity] | None" = None,
table_mode: str = "off",
tag_md_mode: str = "structured",
tag_md_mode: str = "native",
) -> Dict[str, Any]:
"""Produce a Lark post AST (`{locale: {title, content: [[...]]}}`) from Markdown.

Mentions supplied via `mentions` are appended (inline @tags) to the first
paragraph so the recipient actually gets notified.

``tag_md_mode``:
- ``"structured"`` (default): parse Markdown into explicit post nodes
- ``"native"`` (default): wrap the raw markdown into one or more
``tag:md`` rows (split at code-fence boundaries) and let the Feishu
client's own markdown parser render natively. Renders headers /
blockquotes / lists with native styling, but rendering depends on
Feishu client version.
- ``"structured"``: parse Markdown into explicit post nodes
(``tag:text`` with style attributes, ``tag:a`` for links,
``tag:code_block`` for fenced code, etc). Cross-client deterministic.
- ``"native"``: wrap the raw markdown into one or more ``tag:md`` rows
(split at code-fence boundaries) and let the Feishu client's own
markdown parser render natively. Renders headers/blockquotes/lists
with native styling, but rendering depends on Feishu client version.
"""
if tag_md_mode == "native":
return _build_native_md_ast(md, title=title, locale=locale, mentions=mentions)
Expand Down
150 changes: 150 additions & 0 deletions lark_channel/channel/tests/test_flatten.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,156 @@ def test_post_direct_document_shape_flattens_text_and_resources():
assert r[0].file_key == "img_direct"


def test_post_content_v2_md_preferred_and_post_processed():
post = {
"zh_cn": {
"title": "V2",
"content": [[{"tag": "text", "text": "legacy content"}]],
"content_v2": [
[
{
"tag": "md",
"text": (
'hello <at user_id="ou_1">Alice</at> '
'and <at user_id="all">All</at> '
"![diagram](img_v2)\n\n"
"```text\n"
'<at user_id="ou_code">Code</at> ![ignored](img_code)\n'
"```"
),
}
]
],
}
}

t, r = flatten(PostContent(post=post))

assert "# V2" in t
assert "legacy content" not in t
assert "hello @Alice and @all ![diagram](img_v2)" in t
assert '<at user_id="ou_code">Code</at> ![ignored](img_code)' in t
assert [(x.type, x.file_key) for x in r] == [("image", "img_v2")]


def test_post_content_v2_empty_falls_back_to_content():
"""An empty content_v2 list must fall back to legacy content paragraphs."""
post = {
"zh_cn": {
"title": "Fallback",
"content_v2": [],
"content": [[{"tag": "text", "text": "from legacy"}]],
}
}

t, r = flatten(PostContent(post=post))

assert "# Fallback" in t
assert "from legacy" in t
assert r == []


def test_post_content_v2_non_list_falls_back_to_content():
"""A non-list content_v2 (malformed) must fall back to legacy content."""
post = {
"zh_cn": {
"title": "Bad",
"content_v2": "not-a-list",
"content": [[{"tag": "text", "text": "still works"}]],
}
}

t, _ = flatten(PostContent(post=post))

assert "still works" in t


def test_post_md_text_at_all_members_alias_and_unnamed_at():
"""`all_members` resolves to @all; <at> without inner text falls back to user_id."""
post = {
"zh_cn": {
"content_v2": [
[
{
"tag": "md",
"text": (
'hi <at user_id="all_members"></at> '
'and <at user_id="ou_42"></at> done'
),
}
]
],
}
}

t, r = flatten(PostContent(post=post))

assert "hi @all and @ou_42 done" in t
assert r == []


def test_post_md_text_unclosed_fence_is_treated_as_outside():
"""An unclosed code fence must not protect at-mentions / image keys after it."""
post = {
"zh_cn": {
"content_v2": [
[
{
"tag": "md",
"text": (
'before <at user_id="ou_1">Alice</at>\n'
"```python\n"
"still no close fence ![pic](img_unclosed)\n"
'<at user_id="ou_2">Bob</at>'
),
}
]
],
}
}

t, r = flatten(PostContent(post=post))

assert "before @Alice" in t
assert "@Bob" in t
assert [(x.type, x.file_key) for x in r] == [
("image", "img_unclosed"),
]


def test_post_md_text_multiple_paired_fences_protect_inner_blocks():
"""Multiple complete fence pairs: only outside-of-fence transformations apply."""
post = {
"zh_cn": {
"content_v2": [
[
{
"tag": "md",
"text": (
"outer1 ![a](img_a)\n"
"```\nblock1 <at user_id=\"x\">X</at>\n```\n"
"outer2 ![b](img_b)\n"
"```\nblock2 ![c](img_c)\n```\n"
"outer3"
),
}
]
],
}
}

t, r = flatten(PostContent(post=post))

# Inside-fence content preserved verbatim; outside-fence transformed.
assert 'block1 <at user_id="x">X</at>' in t
assert "block2 ![c](img_c)" in t
# Only outside-fence images extracted (img_a, img_b), inner img_c skipped.
assert [(x.type, x.file_key) for x in r] == [
("image", "img_a"),
("image", "img_b"),
]


def test_merge_forward_flatten_recursive():
child = TextContent(text="child content")
item = MergeForwardItem(
Expand Down
26 changes: 15 additions & 11 deletions lark_channel/channel/tests/test_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,26 @@ def _zh(ast):
return ast["zh_cn"]


def _structured(md, **kwargs):
return markdown_to_post_ast(md, tag_md_mode="structured", **kwargs)


def test_plain_paragraph():
ast = markdown_to_post_ast("hello world")
ast = _structured("hello world")
assert _zh(ast)["title"] == ""
paras = _zh(ast)["content"]
assert paras == [[{"tag": "text", "text": "hello world"}]]


def test_heading_becomes_bold_text():
ast = markdown_to_post_ast("# Title\n\nbody")
ast = _structured("# Title\n\nbody")
paras = _zh(ast)["content"]
assert paras[0][0]["style"] == ["bold"] and paras[0][0]["text"] == "Title"
assert paras[1] == [{"tag": "text", "text": "body"}]


def test_bold_italic_code_inline():
ast = markdown_to_post_ast("**bold** and *it* and `code`")
ast = _structured("**bold** and *it* and `code`")
paras = _zh(ast)["content"]
runs = paras[0]
styles = [(r.get("text"), r.get("style", [])) for r in runs if r["tag"] == "text"]
Expand All @@ -33,15 +37,15 @@ def test_bold_italic_code_inline():


def test_link_emits_a_tag():
ast = markdown_to_post_ast("see [docs](https://x.example)")
ast = _structured("see [docs](https://x.example)")
runs = _zh(ast)["content"][0]
a_tag = next(r for r in runs if r["tag"] == "a")
assert a_tag["text"] == "docs"
assert a_tag["href"] == "https://x.example"


def test_code_block_fenced():
ast = markdown_to_post_ast("```python\nprint(1)\n```")
ast = _structured("```python\nprint(1)\n```")
paras = _zh(ast)["content"]
cb = paras[0][0]
assert cb["tag"] == "code_block"
Expand All @@ -50,26 +54,26 @@ def test_code_block_fenced():


def test_bullet_list_each_paragraph():
ast = markdown_to_post_ast("- one\n- two\n- three")
ast = _structured("- one\n- two\n- three")
paras = _zh(ast)["content"]
assert len(paras) == 3
assert paras[0][0]["text"].startswith("• one")


def test_hr():
ast = markdown_to_post_ast("top\n\n---\n\nbot")
ast = _structured("top\n\n---\n\nbot")
paras = _zh(ast)["content"]
assert any(p == [{"tag": "hr"}] for p in paras)


def test_blockquote_marker():
ast = markdown_to_post_ast("> quoted line")
ast = _structured("> quoted line")
first = _zh(ast)["content"][0]
assert first[0] == {"tag": "text", "text": "│ "}


def test_mentions_injected():
ast = markdown_to_post_ast(
ast = _structured(
"hi",
mentions=[Identity(open_id="ou_1", display_name="Alice")],
)
Expand All @@ -82,12 +86,12 @@ def test_mentions_injected():

def test_table_mode_bullets():
md = "| name | age |\n|---|---|\n| Alice | 30 |\n| Bob | 25 |"
ast = markdown_to_post_ast(md, table_mode="bullets")
ast = _structured(md, table_mode="bullets")
paras = _zh(ast)["content"]
assert paras[0][0]["text"].startswith("• name: Alice")
assert paras[1][0]["text"].startswith("• name: Bob")


def test_empty_input_yields_empty_paragraph():
ast = markdown_to_post_ast("")
ast = _structured("")
assert _zh(ast)["content"] == [[]] or _zh(ast)["content"] == [[{"tag": "text", "text": ""}]]
Loading
Loading