diff --git a/lark_channel/channel/config.py b/lark_channel/channel/config.py
index 4d799e7..969ff6b 100644
--- a/lark_channel/channel/config.py
+++ b/lark_channel/channel/config.py
@@ -243,7 +243,7 @@ class StreamThrottleConfig:
class MarkdownConverter:
enabled: bool = True
table_mode: TableMode = "off"
- tag_md_mode: TagMdMode = "structured"
+ tag_md_mode: TagMdMode = "native"
@dataclass
diff --git a/lark_channel/channel/normalize/converters/post.py b/lark_channel/channel/normalize/converters/post.py
index 153bc7c..3479568 100644
--- a/lark_channel/channel/normalize/converters/post.py
+++ b/lark_channel/channel/normalize/converters/post.py
@@ -1,13 +1,18 @@
"""Converter: PostContent → Markdown (headings / bold / italic / code / links)."""
+import re
from typing import Any, Dict, List, Tuple
from ...types import PostContent, ResourceDescriptor
+_AT_MENTION_RE = re.compile(r'(.*?)')
+_IMAGE_KEY_RE = re.compile(r"!\[(.*?)\]\(([^)]+)\)")
+
def convert(content: PostContent) -> Tuple[str, List[ResourceDescriptor]]:
- md = _post_to_markdown(content.post) if content.post else content.text
+ md, md_resources = _post_to_markdown(content.post) if content.post else (content.text or "", [])
resources = _post_resources(content.post) if content.post else []
+ resources.extend(md_resources)
return md, resources
@@ -19,16 +24,25 @@ def _iter_documents(post: Dict[str, Any]) -> List[Dict[str, Any]]:
return [doc for doc in post.values() if isinstance(doc, dict)]
-def _post_to_markdown(post: Dict[str, Any]) -> str:
+def _post_to_markdown(post: Dict[str, Any]) -> Tuple[str, List[ResourceDescriptor]]:
docs = _iter_documents(post)
if not docs:
- return ""
+ return "", []
locale = docs[0]
+
+ # Choose source paragraphs: prefer content_v2, fallback to content.
+ content_v2 = locale.get("content_v2")
+ if isinstance(content_v2, list) and len(content_v2) > 0:
+ source_paragraphs = content_v2
+ else:
+ source_paragraphs = locale.get("content") or []
+
lines: List[str] = []
+ resources: List[ResourceDescriptor] = []
title = locale.get("title")
if title:
lines.append(f"# {title}")
- for para in locale.get("content") or []:
+ for para in source_paragraphs:
chunks: List[str] = []
for el in para or []:
if not isinstance(el, dict):
@@ -64,11 +78,13 @@ def _post_to_markdown(post: Dict[str, Any]) -> str:
elif tag == "hr":
chunks.append("---")
elif tag == "md":
- chunks.append(el.get("text") or "")
+ text, res = _process_md_text(el.get("text") or "")
+ chunks.append(text)
+ resources.extend(res)
line = "".join(chunks)
if line:
lines.append(line)
- return "\n\n".join(lines).strip()
+ return "\n\n".join(lines).strip(), resources
def _post_resources(post: Dict[str, Any]) -> List[ResourceDescriptor]:
@@ -105,3 +121,42 @@ def add(kind: str, key: Any, *, file_name: Any = None) -> None:
elif tag == "file":
add("file", el.get("file_key"), file_name=el.get("file_name"))
return resources
+
+
+def _process_md_text(text: str) -> Tuple[str, List[ResourceDescriptor]]:
+ """Post-process raw markdown text from an "md" element.
+
+ Splits by fenced code block delimiters (```) and only applies
+ transformations (at-mention replacement, image key extraction)
+ to text outside of properly paired code blocks. Unclosed fences
+ are treated as outside-code-block text.
+ """
+ resources: List[ResourceDescriptor] = []
+ parts = text.split("```")
+ total = len(parts)
+ for i, part in enumerate(parts):
+ # Odd-index segments are inside code blocks, UNLESS it's the last
+ # segment of an even-length split (unclosed fence).
+ is_inside = (i % 2 == 1)
+ if is_inside and total % 2 == 0 and i == total - 1:
+ is_inside = False
+ if not is_inside:
+ # Outside code block: apply at-mention replacement.
+ def _replace_at(m: re.Match) -> str:
+ user_id = m.group(4)
+ name = m.group(5)
+ if user_id in ("all", "all_members"):
+ return "@all"
+ return f"@{name}" if name else f"@{user_id}"
+
+ parts[i] = _AT_MENTION_RE.sub(_replace_at, part)
+
+ # Extract image keys from  patterns.
+ for _alt, img_key in _IMAGE_KEY_RE.findall(parts[i]):
+ if img_key:
+ resources.append(ResourceDescriptor(
+ type="image", # type: ignore[arg-type]
+ file_key=img_key,
+ ))
+ # Inside code block: preserve as-is.
+ return "```".join(parts), resources
diff --git a/lark_channel/channel/outbound/markdown/to_post.py b/lark_channel/channel/outbound/markdown/to_post.py
index f8f05ff..6fb8738 100644
--- a/lark_channel/channel/outbound/markdown/to_post.py
+++ b/lark_channel/channel/outbound/markdown/to_post.py
@@ -91,7 +91,7 @@ def markdown_to_post_ast(
locale: str = "zh_cn",
mentions: "list[Identity] | None" = None,
table_mode: str = "off",
- tag_md_mode: str = "structured",
+ tag_md_mode: str = "native",
) -> Dict[str, Any]:
"""Produce a Lark post AST (`{locale: {title, content: [[...]]}}`) from Markdown.
@@ -99,13 +99,14 @@ def markdown_to_post_ast(
paragraph so the recipient actually gets notified.
``tag_md_mode``:
- - ``"structured"`` (default): parse Markdown into explicit post nodes
+ - ``"native"`` (default): wrap the raw markdown into one or more
+ ``tag:md`` rows (split at code-fence boundaries) and let the Feishu
+ client's own markdown parser render natively. Renders headers /
+ blockquotes / lists with native styling, but rendering depends on
+ Feishu client version.
+ - ``"structured"``: parse Markdown into explicit post nodes
(``tag:text`` with style attributes, ``tag:a`` for links,
``tag:code_block`` for fenced code, etc). Cross-client deterministic.
- - ``"native"``: wrap the raw markdown into one or more ``tag:md`` rows
- (split at code-fence boundaries) and let the Feishu client's own
- markdown parser render natively. Renders headers/blockquotes/lists
- with native styling, but rendering depends on Feishu client version.
"""
if tag_md_mode == "native":
return _build_native_md_ast(md, title=title, locale=locale, mentions=mentions)
diff --git a/lark_channel/channel/tests/test_flatten.py b/lark_channel/channel/tests/test_flatten.py
index 9806b28..6da5e29 100644
--- a/lark_channel/channel/tests/test_flatten.py
+++ b/lark_channel/channel/tests/test_flatten.py
@@ -153,6 +153,156 @@ def test_post_direct_document_shape_flattens_text_and_resources():
assert r[0].file_key == "img_direct"
+def test_post_content_v2_md_preferred_and_post_processed():
+ post = {
+ "zh_cn": {
+ "title": "V2",
+ "content": [[{"tag": "text", "text": "legacy content"}]],
+ "content_v2": [
+ [
+ {
+ "tag": "md",
+ "text": (
+ 'hello Alice '
+ 'and All '
+ "\n\n"
+ "```text\n"
+ 'Code \n'
+ "```"
+ ),
+ }
+ ]
+ ],
+ }
+ }
+
+ t, r = flatten(PostContent(post=post))
+
+ assert "# V2" in t
+ assert "legacy content" not in t
+ assert "hello @Alice and @all " in t
+ assert 'Code ' in t
+ assert [(x.type, x.file_key) for x in r] == [("image", "img_v2")]
+
+
+def test_post_content_v2_empty_falls_back_to_content():
+ """An empty content_v2 list must fall back to legacy content paragraphs."""
+ post = {
+ "zh_cn": {
+ "title": "Fallback",
+ "content_v2": [],
+ "content": [[{"tag": "text", "text": "from legacy"}]],
+ }
+ }
+
+ t, r = flatten(PostContent(post=post))
+
+ assert "# Fallback" in t
+ assert "from legacy" in t
+ assert r == []
+
+
+def test_post_content_v2_non_list_falls_back_to_content():
+ """A non-list content_v2 (malformed) must fall back to legacy content."""
+ post = {
+ "zh_cn": {
+ "title": "Bad",
+ "content_v2": "not-a-list",
+ "content": [[{"tag": "text", "text": "still works"}]],
+ }
+ }
+
+ t, _ = flatten(PostContent(post=post))
+
+ assert "still works" in t
+
+
+def test_post_md_text_at_all_members_alias_and_unnamed_at():
+ """`all_members` resolves to @all; without inner text falls back to user_id."""
+ post = {
+ "zh_cn": {
+ "content_v2": [
+ [
+ {
+ "tag": "md",
+ "text": (
+ 'hi '
+ 'and done'
+ ),
+ }
+ ]
+ ],
+ }
+ }
+
+ t, r = flatten(PostContent(post=post))
+
+ assert "hi @all and @ou_42 done" in t
+ assert r == []
+
+
+def test_post_md_text_unclosed_fence_is_treated_as_outside():
+ """An unclosed code fence must not protect at-mentions / image keys after it."""
+ post = {
+ "zh_cn": {
+ "content_v2": [
+ [
+ {
+ "tag": "md",
+ "text": (
+ 'before Alice\n'
+ "```python\n"
+ "still no close fence \n"
+ 'Bob'
+ ),
+ }
+ ]
+ ],
+ }
+ }
+
+ t, r = flatten(PostContent(post=post))
+
+ assert "before @Alice" in t
+ assert "@Bob" in t
+ assert [(x.type, x.file_key) for x in r] == [
+ ("image", "img_unclosed"),
+ ]
+
+
+def test_post_md_text_multiple_paired_fences_protect_inner_blocks():
+ """Multiple complete fence pairs: only outside-of-fence transformations apply."""
+ post = {
+ "zh_cn": {
+ "content_v2": [
+ [
+ {
+ "tag": "md",
+ "text": (
+ "outer1 \n"
+ "```\nblock1 X\n```\n"
+ "outer2 \n"
+ "```\nblock2 \n```\n"
+ "outer3"
+ ),
+ }
+ ]
+ ],
+ }
+ }
+
+ t, r = flatten(PostContent(post=post))
+
+ # Inside-fence content preserved verbatim; outside-fence transformed.
+ assert 'block1 X' in t
+ assert "block2 " in t
+ # Only outside-fence images extracted (img_a, img_b), inner img_c skipped.
+ assert [(x.type, x.file_key) for x in r] == [
+ ("image", "img_a"),
+ ("image", "img_b"),
+ ]
+
+
def test_merge_forward_flatten_recursive():
child = TextContent(text="child content")
item = MergeForwardItem(
diff --git a/lark_channel/channel/tests/test_markdown.py b/lark_channel/channel/tests/test_markdown.py
index b06fa73..0d732a5 100644
--- a/lark_channel/channel/tests/test_markdown.py
+++ b/lark_channel/channel/tests/test_markdown.py
@@ -8,22 +8,26 @@ def _zh(ast):
return ast["zh_cn"]
+def _structured(md, **kwargs):
+ return markdown_to_post_ast(md, tag_md_mode="structured", **kwargs)
+
+
def test_plain_paragraph():
- ast = markdown_to_post_ast("hello world")
+ ast = _structured("hello world")
assert _zh(ast)["title"] == ""
paras = _zh(ast)["content"]
assert paras == [[{"tag": "text", "text": "hello world"}]]
def test_heading_becomes_bold_text():
- ast = markdown_to_post_ast("# Title\n\nbody")
+ ast = _structured("# Title\n\nbody")
paras = _zh(ast)["content"]
assert paras[0][0]["style"] == ["bold"] and paras[0][0]["text"] == "Title"
assert paras[1] == [{"tag": "text", "text": "body"}]
def test_bold_italic_code_inline():
- ast = markdown_to_post_ast("**bold** and *it* and `code`")
+ ast = _structured("**bold** and *it* and `code`")
paras = _zh(ast)["content"]
runs = paras[0]
styles = [(r.get("text"), r.get("style", [])) for r in runs if r["tag"] == "text"]
@@ -33,7 +37,7 @@ def test_bold_italic_code_inline():
def test_link_emits_a_tag():
- ast = markdown_to_post_ast("see [docs](https://x.example)")
+ ast = _structured("see [docs](https://x.example)")
runs = _zh(ast)["content"][0]
a_tag = next(r for r in runs if r["tag"] == "a")
assert a_tag["text"] == "docs"
@@ -41,7 +45,7 @@ def test_link_emits_a_tag():
def test_code_block_fenced():
- ast = markdown_to_post_ast("```python\nprint(1)\n```")
+ ast = _structured("```python\nprint(1)\n```")
paras = _zh(ast)["content"]
cb = paras[0][0]
assert cb["tag"] == "code_block"
@@ -50,26 +54,26 @@ def test_code_block_fenced():
def test_bullet_list_each_paragraph():
- ast = markdown_to_post_ast("- one\n- two\n- three")
+ ast = _structured("- one\n- two\n- three")
paras = _zh(ast)["content"]
assert len(paras) == 3
assert paras[0][0]["text"].startswith("• one")
def test_hr():
- ast = markdown_to_post_ast("top\n\n---\n\nbot")
+ ast = _structured("top\n\n---\n\nbot")
paras = _zh(ast)["content"]
assert any(p == [{"tag": "hr"}] for p in paras)
def test_blockquote_marker():
- ast = markdown_to_post_ast("> quoted line")
+ ast = _structured("> quoted line")
first = _zh(ast)["content"][0]
assert first[0] == {"tag": "text", "text": "│ "}
def test_mentions_injected():
- ast = markdown_to_post_ast(
+ ast = _structured(
"hi",
mentions=[Identity(open_id="ou_1", display_name="Alice")],
)
@@ -82,12 +86,12 @@ def test_mentions_injected():
def test_table_mode_bullets():
md = "| name | age |\n|---|---|\n| Alice | 30 |\n| Bob | 25 |"
- ast = markdown_to_post_ast(md, table_mode="bullets")
+ ast = _structured(md, table_mode="bullets")
paras = _zh(ast)["content"]
assert paras[0][0]["text"].startswith("• name: Alice")
assert paras[1][0]["text"].startswith("• name: Bob")
def test_empty_input_yields_empty_paragraph():
- ast = markdown_to_post_ast("")
+ ast = _structured("")
assert _zh(ast)["content"] == [[]] or _zh(ast)["content"] == [[{"tag": "text", "text": ""}]]
diff --git a/lark_channel/channel/tests/test_markdown_native_mode.py b/lark_channel/channel/tests/test_markdown_native_mode.py
index ea016dd..3b4b13f 100644
--- a/lark_channel/channel/tests/test_markdown_native_mode.py
+++ b/lark_channel/channel/tests/test_markdown_native_mode.py
@@ -41,7 +41,7 @@
def test_markdown_converter_has_tag_md_mode_field_with_structured_default():
conv = MarkdownConverter()
- assert conv.tag_md_mode == "structured"
+ assert conv.tag_md_mode == "native"
def test_markdown_converter_accepts_native_tag_md_mode():
@@ -108,6 +108,10 @@ def test_two_fences_separated_by_prose(self):
class TestNativeMode:
+ def test_default_mode_is_native(self):
+ out = markdown_to_post_ast("# Hello")
+ assert out["zh_cn"]["content"] == [[{"tag": "md", "text": "# Hello"}]]
+
def test_plain_text_native_returns_single_md_node(self):
out = markdown_to_post_ast("hello world", tag_md_mode="native")
assert out == {
@@ -170,21 +174,21 @@ def _load_snapshot():
path = Path(__file__).parent / "snapshots" / "markdown_structured.json"
return json.loads(path.read_text(encoding="utf-8"))
- def test_structured_default_kwarg_equals_snapshot(self):
+ def test_structured_explicit_kwarg_equals_snapshot(self):
snapshot = self._load_snapshot()
for label, text in FIXTURES.items():
- actual = markdown_to_post_ast(text)
+ actual = markdown_to_post_ast(text, tag_md_mode="structured")
assert actual == snapshot[label], (
f"structured-mode regression for {label!r}\n"
f"expected: {json.dumps(snapshot[label], ensure_ascii=False)}\n"
f"actual: {json.dumps(actual, ensure_ascii=False)}"
)
- def test_structured_explicit_kwarg_equals_default(self):
+ def test_native_default_matches_explicit_native(self):
for label, text in FIXTURES.items():
assert markdown_to_post_ast(text) == markdown_to_post_ast(
- text, tag_md_mode="structured"
- ), f"explicit and default disagree for {label!r}"
+ text, tag_md_mode="native"
+ ), f"native explicit and default disagree for {label!r}"
class TestSenderBuildPost:
@@ -291,12 +295,12 @@ async def test_outbound_post_default_still_produces_structured_payload(self):
from lark_channel.channel.types import OutboundPost
d, calls = make_driver()
- s = OutboundSender(d) # default OutboundConfig: structured
+ s = OutboundSender(d) # default OutboundConfig: native
await s.send(OutboundPost(markdown="# Hello"), receive_id="oc_x")
content = json.loads(calls[0]["content"])
node = content["zh_cn"]["content"][0][0]
- assert node["tag"] == "text"
- assert "bold" in node.get("style", [])
+ assert node["tag"] == "md"
+ assert node["text"] == "# Hello"
@pytest.mark.asyncio
async def test_outbound_post_native_with_code_fence_produces_multi_row(self):
diff --git a/lark_channel/channel/tests/test_media_caption.py b/lark_channel/channel/tests/test_media_caption.py
index a070c75..d963903 100644
--- a/lark_channel/channel/tests/test_media_caption.py
+++ b/lark_channel/channel/tests/test_media_caption.py
@@ -180,7 +180,8 @@ async def test_video_caption_native_post_body_uses_media_tag():
@pytest.mark.asyncio
async def test_image_caption_structured_adds_media_as_final_row():
d, calls = make_caption_driver(image_key="img_structured")
- s = OutboundSender(d)
+ cfg = OutboundConfig(markdown_converter=MarkdownConverter(tag_md_mode="structured"))
+ s = OutboundSender(d, cfg)
await s.send(
OutboundImage(source=MediaSource(kind="buffer", buffer=b"png"), caption="**bold**"),
receive_id="oc_1",
diff --git a/lark_channel/channel/tests/test_sender.py b/lark_channel/channel/tests/test_sender.py
index 0101eae..365faf5 100644
--- a/lark_channel/channel/tests/test_sender.py
+++ b/lark_channel/channel/tests/test_sender.py
@@ -104,7 +104,9 @@ async def test_post_from_markdown_emits_post_msg():
# Wrapping with "post" causes server error 230001 (invalid message content).
assert "post" not in content
zh = content["zh_cn"]
- assert zh["content"][0][0]["text"] == "bold"
+ # Default tag_md_mode is now "native", so markdown is preserved as raw md node.
+ assert zh["content"][0][0]["tag"] == "md"
+ assert zh["content"][0][0]["text"] == "**bold**"
@pytest.mark.asyncio
diff --git a/lark_channel/channel/tests/test_sender_extras.py b/lark_channel/channel/tests/test_sender_extras.py
index 1cddf76..f156033 100644
--- a/lark_channel/channel/tests/test_sender_extras.py
+++ b/lark_channel/channel/tests/test_sender_extras.py
@@ -58,7 +58,7 @@ async def noop(**kwargs):
s = OutboundSender(
SendDriver(create_message=create_message, reply_message=noop),
- OutboundConfig(markdown_converter=MarkdownConverter(table_mode="bullets")),
+ OutboundConfig(markdown_converter=MarkdownConverter(table_mode="bullets", tag_md_mode="structured")),
)
md = "| name | age |\n|---|---|\n| Alice | 30 |"
await s.send(OutboundPost(markdown=md), receive_id="oc_1")