".format(self.config["group"]),
+ self._sub_msg_link, str(s))
+ return result
+
+ def _sub_msg_link(self, match):
+ if self.page_ids.get(int(match.group(2))) is None:
+ return match.group(0)
+ return match.group(0).replace(match.group(1), self.page_ids[int(match.group(2))] + "#")
def _create_publish_dir(self):
pubdir = self.config["publish_dir"]
diff --git a/tgarchive/example/config.yaml b/tgarchive/example/config.yaml
index 4f7a695..77310b7 100644
--- a/tgarchive/example/config.yaml
+++ b/tgarchive/example/config.yaml
@@ -19,6 +19,9 @@ media_dir: "media"
# If left empty, files of all types are downloaded.
media_mime_types: []
+# Preserve formatting in messages (inline links, bold, italic, underline, etc.).
+html_messages: True
+
# Takeout mode allows you to fetch messages at a higher rate than the standard mode.
# It is the method used in the desktop client to export data.
# You can use a larger fetch_batch_size. Set this as False to use the standard mode.
diff --git a/tgarchive/example/rss_template.html b/tgarchive/example/rss_template.html
index 867dace..55bb202 100644
--- a/tgarchive/example/rss_template.html
+++ b/tgarchive/example/rss_template.html
@@ -31,7 +31,7 @@
{% if m.type == "message" %}
- {{ nl2br(m.content | escape) | safe | urlize }}
+ {{ nl2br(urlize(m.content)) }}
{% else %}
{% if m.type == "user_joined" %}
Joined.
diff --git a/tgarchive/example/template.html b/tgarchive/example/template.html
index a77233c..7fe35c3 100644
--- a/tgarchive/example/template.html
+++ b/tgarchive/example/template.html
@@ -123,7 +123,7 @@
{% if m.type == "message" %}
- {{ nl2br(m.content | escape) | safe | urlize }}
+ {{ nl2br(urlize(m.content)) }}
{% else %}
{% if m.type == "user_joined" %}
Joined.
diff --git a/tgarchive/sync.py b/tgarchive/sync.py
index e014485..a695e93 100644
--- a/tgarchive/sync.py
+++ b/tgarchive/sync.py
@@ -97,6 +97,7 @@ def sync(self, ids=None, from_id=None):
def new_client(self, session, config):
client = TelegramClient(session, config["api_id"], config["api_hash"])
client.start()
+ client.parse_mode = 'html'
if config.get("use_takeout", False):
for retry in range(3):
try:
@@ -124,6 +125,7 @@ def finish_takeout(self):
self.client.__exit__(None, None, None)
def _get_messages(self, group, offset_id, ids=None) -> Message:
+ msg_text_type = "text" if self.config.get("html_messages") else "raw_text"
messages = self._fetch_messages(group, offset_id, ids)
# https://docs.telethon.dev/en/latest/quick-references/objects-reference.html#message
for m in messages:
@@ -160,7 +162,7 @@ def _get_messages(self, group, offset_id, ids=None) -> Message:
id=m.id,
date=m.date,
edit_date=m.edit_date,
- content=sticker if sticker else m.raw_text,
+ content=sticker if sticker else getattr(m, msg_text_type),
reply_to=m.reply_to_msg_id if m.reply_to and m.reply_to.reply_to_msg_id else None,
user=self._get_user(m.sender),
media=med