Skip to main content

hashiverse_lib/tools/
plain_text_post.rs

1//! # Plain-text → hashiverse HTML conversion
2//!
3//! Hashiverse posts are stored and transmitted as a constrained subset of HTML (so that
4//! rich posts from the web client, API clients, and plain-text API clients are all the
5//! same format on the wire). This module provides the one-way convenience path for
6//! callers that have nothing but a string of text — mainly the Python client, plain-text
7//! API integrations, and quick CLI posts.
8//!
9//! The output is the same HTML shape produced by the Tiptap editor in the web client:
10//! HTML-escaped body, `#hashtag` tokens rewritten as `<hashtag>` elements, `@<64-hex-id>`
11//! mentions rewritten as `<mention>` elements, and literal newlines turned into `<br>`.
12//! `submit_post()` then parses the result into the canonical on-wire representation.
13
14/// Converts a plain-text post into well-formed HTML that `submit_post()` can parse.
15///
16/// - HTML-escapes `<`, `>`, `&`, `"` in the input to prevent injection
17/// - Converts `#hashtag` patterns into `<hashtag hashtag="...">` elements
18/// - Converts `@<64-hex-char-id>` patterns into `<mention client_id="...">` elements
19/// - Converts newlines into `<br>` tags
20pub fn convert_text_to_hashiverse_html(text: &str) -> String {
21    let escaped = html_escape(text);
22    let chars: Vec<char> = escaped.chars().collect();
23    let len = chars.len();
24    let mut output = String::with_capacity(escaped.len() * 2);
25    let mut i = 0;
26
27    while i < len {
28        match chars[i] {
29            '#' => {
30                let start = i + 1;
31                let mut end = start;
32                while end < len && chars[end].is_alphanumeric() {
33                    end += 1;
34                }
35                if end > start {
36                    let hashtag_text: String = chars[start..end].iter().collect();
37                    output.push_str(&convert_text_to_hashiverse_html_x_hashtag(&hashtag_text));
38                    i = end;
39                } else {
40                    output.push('#');
41                    i += 1;
42                }
43            }
44            '@' => {
45                let start = i + 1;
46                let mut end = start;
47                while end < len && end - start < 64 && is_hex_char(chars[end]) {
48                    end += 1;
49                }
50                let hex_len = end - start;
51                // Must be exactly 64 hex chars, and the next char (if any) must NOT be hex
52                // to avoid matching a prefix of a longer hex string
53                if hex_len == 64 && (end >= len || !is_hex_char(chars[end])) {
54                    let hex_string: String = chars[start..end].iter().collect();
55                    output.push_str(&convert_text_to_hashiverse_html_x_mention(&hex_string));
56                    i = end;
57                } else {
58                    output.push('@');
59                    i += 1;
60                }
61            }
62            '\n' => {
63                output.push_str("<br>");
64                i += 1;
65            }
66            '\r' => {
67                // Skip carriage returns — \r\n is handled by skipping \r and emitting <br> on \n
68                i += 1;
69            }
70            c => {
71                output.push(c);
72                i += 1;
73            }
74        }
75    }
76
77    output
78}
79
80/// Render a hashtag as the canonical hashiverse element.
81///
82/// Accepts either `"rust"` or `"#rust"` — a single leading `#` is stripped
83/// before validation. If the remaining text is empty or contains any
84/// non-alphanumeric character, this function returns the original `hashtag`
85/// parameter **untouched** (an identity no-op) so malformed input never
86/// produces malformed HTML. Otherwise it emits the canonical element with a
87/// lower-cased `hashtag` attribute (used for indexing) and a case-preserving
88/// visible span.
89pub fn convert_text_to_hashiverse_html_x_hashtag(hashtag: &str) -> String {
90    let stripped = hashtag.strip_prefix('#').unwrap_or(hashtag);
91    if stripped.is_empty() || !stripped.chars().all(char::is_alphanumeric) {
92        return hashtag.to_string();
93    }
94    let stripped_lower = stripped.to_lowercase();
95    format!(
96        "<hashtag hashtag=\"{}\"><span class=\"plugin-hashtag-left\">#</span><span class=\"plugin-hashtag-right\">{}</span></hashtag>",
97        stripped_lower, stripped,
98    )
99}
100
101/// Render a 64-hex client_id as a `<mention>` element. Caller is responsible
102/// for validating the hex length; we accept any string.
103pub fn convert_text_to_hashiverse_html_x_mention(client_id: &str) -> String {
104    format!("<mention client_id=\"{}\"></mention>", client_id)
105}
106
107/// Render the canonical URL preview card. Same shape as the web client's
108/// `build_card_dom` (hashiverse-client-web/src/tabs/compose/UrlPreviewExtension.ts):
109///
110/// - With `image_url`: a `card-image-container` wraps the `<img>` and the
111///   domain label, sibling to a `card-inner` column holding the title link
112///   (and optional description).
113/// - Without `image_url`: no image container; the domain label moves *inside*
114///   `card-inner`, above the title link.
115///
116/// The description div is omitted entirely when `description` is empty.
117/// Domain is derived from `url` internally. All field values are HTML-escaped.
118pub fn convert_text_to_hashiverse_html_x_url_preview(
119    title: &str,
120    description: &str,
121    image_url: &str,
122    url: &str,
123) -> String {
124    let domain = extract_host_or_url(url);
125    let mut out = String::with_capacity(512);
126    out.push_str("<div class=\"plugin-urlpreview-card\">");
127    if !image_url.is_empty() {
128        out.push_str("<div class=\"plugin-urlpreview-card-image-container\">");
129        out.push_str(&format!(
130            "<img src=\"{}\" alt=\"\" class=\"plugin-urlpreview-card-image unblur-image\">",
131            html_escape(image_url),
132        ));
133        out.push_str(&format!(
134            "<div class=\"plugin-urlpreview-card-domain\">{}</div>",
135            html_escape(domain),
136        ));
137        out.push_str("</div>");
138    }
139    out.push_str("<div class=\"plugin-urlpreview-card-inner\">");
140    if image_url.is_empty() {
141        out.push_str(&format!(
142            "<div class=\"plugin-urlpreview-card-domain\">{}</div>",
143            html_escape(domain),
144        ));
145    }
146    out.push_str(&format!(
147        "<a class=\"plugin-urlpreview-card-title\" href=\"{}\" rel=\"noopener noreferrer nofollow\">{}</a>",
148        html_escape(url),
149        html_escape(title),
150    ));
151    if !description.is_empty() {
152        out.push_str(&format!(
153            "<div class=\"plugin-urlpreview-card-description\">{}</div>",
154            html_escape(description),
155        ));
156    }
157    out.push_str("</div>");
158    out.push_str("</div>");
159    out
160}
161
162fn extract_host_or_url(url: &str) -> &str {
163    match url.split_once("://") {
164        Some((_, after)) => after
165            .split(['/', '?', '#'])
166            .next()
167            .filter(|s| !s.is_empty())
168            .unwrap_or(url),
169        None => url,
170    }
171}
172
173fn html_escape(text: &str) -> String {
174    // Reserve a little more room in case we escape
175    let mut escaped = String::with_capacity(11 * text.len() / 10);
176    for c in text.chars() {
177        match c {
178            '&' => escaped.push_str("&amp;"),
179            '<' => escaped.push_str("&lt;"),
180            '>' => escaped.push_str("&gt;"),
181            '"' => escaped.push_str("&quot;"),
182            other => escaped.push(other),
183        }
184    }
185    escaped
186}
187
188fn is_hex_char(c: char) -> bool {
189    c.is_ascii_hexdigit()
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    // --- Hashtag tests ---
197
198    #[test]
199    fn test_hashtag_at_start() {
200        let result = convert_text_to_hashiverse_html("#rust is great");
201        assert!(result.contains("<hashtag hashtag=\"rust\">"));
202        assert!(result.contains("<span class=\"plugin-hashtag-right\">rust</span>"));
203        assert!(result.ends_with(" is great"));
204    }
205
206    #[test]
207    fn test_hashtag_at_end() {
208        let result = convert_text_to_hashiverse_html("hello #rust");
209        assert!(result.starts_with("hello "));
210        assert!(result.contains("<hashtag hashtag=\"rust\">"));
211    }
212
213    #[test]
214    fn test_hashtag_in_middle() {
215        let result = convert_text_to_hashiverse_html("I love #rust programming");
216        assert!(result.contains("<hashtag hashtag=\"rust\">"));
217        assert!(result.contains(" programming"));
218    }
219
220    #[test]
221    fn test_multiple_hashtags() {
222        let result = convert_text_to_hashiverse_html("#rust and #golang");
223        assert!(result.contains("<hashtag hashtag=\"rust\">"));
224        assert!(result.contains("<hashtag hashtag=\"golang\">"));
225    }
226
227    #[test]
228    fn test_adjacent_hashtags() {
229        let result = convert_text_to_hashiverse_html("#rust#golang");
230        assert!(result.contains("<hashtag hashtag=\"rust\">"));
231        assert!(result.contains("<hashtag hashtag=\"golang\">"));
232    }
233
234    #[test]
235    fn test_hashtag_case_lowered_in_attribute() {
236        let result = convert_text_to_hashiverse_html("#RuStLang");
237        assert!(result.contains("hashtag=\"rustlang\""));
238        // The display text preserves original case
239        assert!(result.contains("<span class=\"plugin-hashtag-right\">RuStLang</span>"));
240    }
241
242    #[test]
243    fn test_bare_hash_no_alphanumeric() {
244        assert_eq!(convert_text_to_hashiverse_html("# alone"), "# alone");
245    }
246
247    #[test]
248    fn test_hash_at_end_of_string() {
249        assert_eq!(convert_text_to_hashiverse_html("test #"), "test #");
250    }
251
252    #[test]
253    fn test_unicode_hashtag() {
254        let result = convert_text_to_hashiverse_html("#日本語");
255        assert!(result.contains("hashtag=\"日本語\""));
256        assert!(result.contains("<span class=\"plugin-hashtag-right\">日本語</span>"));
257    }
258
259    #[test]
260    fn test_hashtag_with_numbers() {
261        let result = convert_text_to_hashiverse_html("#web3");
262        assert!(result.contains("hashtag=\"web3\""));
263    }
264
265    #[test]
266    fn test_hashtag_terminated_by_punctuation() {
267        let result = convert_text_to_hashiverse_html("#rust, nice");
268        assert!(result.contains("<hashtag hashtag=\"rust\">"));
269        assert!(result.contains("</hashtag>, nice"));
270    }
271
272    // --- Mention tests ---
273
274    #[test]
275    fn test_valid_mention() {
276        let hex_id = "a".repeat(64);
277        let input = format!("hello @{} world", hex_id);
278        let result = convert_text_to_hashiverse_html(&input);
279        assert!(result.contains(&format!("<mention client_id=\"{}\"></mention>", hex_id)));
280        assert!(result.starts_with("hello "));
281        assert!(result.ends_with(" world"));
282    }
283
284    #[test]
285    fn test_mention_mixed_case_hex() {
286        let hex_id = "aAbBcCdDeEfF0011223344556677889900112233445566778899aAbBcCdDeEfF";
287        assert_eq!(hex_id.len(), 64);
288        let input = format!("@{}", hex_id);
289        let result = convert_text_to_hashiverse_html(&input);
290        assert!(result.contains(&format!("<mention client_id=\"{}\"></mention>", hex_id)));
291    }
292
293    #[test]
294    fn test_mention_too_short() {
295        let result = convert_text_to_hashiverse_html("@abcdef");
296        assert_eq!(result, "@abcdef");
297        assert!(!result.contains("<mention"));
298    }
299
300    #[test]
301    fn test_mention_non_hex_after_at() {
302        let result = convert_text_to_hashiverse_html("@hello");
303        assert_eq!(result, "@hello");
304    }
305
306    #[test]
307    fn test_bare_at() {
308        assert_eq!(convert_text_to_hashiverse_html("@"), "@");
309    }
310
311    #[test]
312    fn test_at_end_of_string() {
313        assert_eq!(convert_text_to_hashiverse_html("test @"), "test @");
314    }
315
316    #[test]
317    fn test_mention_65_hex_chars_not_matched() {
318        // 65 hex chars — should NOT match as a mention (next char is also hex)
319        let hex_65 = "a".repeat(65);
320        let input = format!("@{}", hex_65);
321        let result = convert_text_to_hashiverse_html(&input);
322        assert!(!result.contains("<mention"));
323    }
324
325    #[test]
326    fn test_mention_64_hex_then_non_hex() {
327        let hex_id = "b".repeat(64);
328        let input = format!("@{}xyz", hex_id);
329        let result = convert_text_to_hashiverse_html(&input);
330        assert!(result.contains(&format!("<mention client_id=\"{}\"></mention>", hex_id)));
331        assert!(result.ends_with("xyz"));
332    }
333
334    // --- HTML escaping tests ---
335
336    #[test]
337    fn test_html_injection_escaped() {
338        let result = convert_text_to_hashiverse_html("<script>alert(1)</script>");
339        assert!(result.contains("&lt;script&gt;"));
340        assert!(!result.contains("<script>"));
341    }
342
343    #[test]
344    fn test_ampersand_escaped() {
345        let result = convert_text_to_hashiverse_html("AT&T");
346        assert_eq!(result, "AT&amp;T");
347    }
348
349    #[test]
350    fn test_quotes_escaped() {
351        let result = convert_text_to_hashiverse_html("he said \"hello\"");
352        assert!(result.contains("&quot;"));
353    }
354
355    // --- Newline tests ---
356
357    #[test]
358    fn test_newline_to_br() {
359        let result = convert_text_to_hashiverse_html("line1\nline2");
360        assert_eq!(result, "line1<br>line2");
361    }
362
363    #[test]
364    fn test_crlf_to_br() {
365        let result = convert_text_to_hashiverse_html("line1\r\nline2");
366        assert_eq!(result, "line1<br>line2");
367    }
368
369    #[test]
370    fn test_bare_cr_skipped() {
371        let result = convert_text_to_hashiverse_html("line1\rline2");
372        assert_eq!(result, "line1line2");
373    }
374
375    // --- Combined tests ---
376
377    #[test]
378    fn test_combined_post() {
379        let hex_id = "c".repeat(64);
380        let input = format!("Hello #hashiverse from @{}!\nGreat to be here.", hex_id);
381        let result = convert_text_to_hashiverse_html(&input);
382        assert!(result.contains("<hashtag hashtag=\"hashiverse\">"));
383        assert!(result.contains(&format!("<mention client_id=\"{}\"></mention>", hex_id)));
384        assert!(result.contains("<br>"));
385        assert!(result.contains("Great to be here."));
386    }
387
388    #[test]
389    fn test_empty_string() {
390        assert_eq!(convert_text_to_hashiverse_html(""), "");
391    }
392
393    #[test]
394    fn test_plain_text_no_specials() {
395        assert_eq!(convert_text_to_hashiverse_html("just a normal post"), "just a normal post");
396    }
397
398    // --- Round-trip test: verify scraper can parse the output the same way submit_post does ---
399
400    #[test]
401    fn test_round_trip_hashtag_extraction() {
402        let result = convert_text_to_hashiverse_html("I love #Rust and #golang");
403        let html = scraper::Html::parse_fragment(&result);
404        let selector = scraper::Selector::parse("hashtag").unwrap();
405        let hashtags: Vec<&str> = html.select(&selector)
406            .filter_map(|el| el.attr("hashtag"))
407            .collect();
408        assert_eq!(hashtags, vec!["rust", "golang"]);
409    }
410
411    #[test]
412    fn test_round_trip_mention_extraction() {
413        let hex_id = "d".repeat(64);
414        let result = convert_text_to_hashiverse_html(&format!("hello @{}", hex_id));
415        let html = scraper::Html::parse_fragment(&result);
416        let selector = scraper::Selector::parse("mention").unwrap();
417        let client_ids: Vec<&str> = html.select(&selector)
418            .filter_map(|el| el.attr("client_id"))
419            .collect();
420        assert_eq!(client_ids, vec![hex_id.as_str()]);
421    }
422
423    #[test]
424    fn test_round_trip_combined() {
425        let hex_id = "e".repeat(64);
426        let input = format!("#hashiverse post by @{} about #Rust", hex_id);
427        let result = convert_text_to_hashiverse_html(&input);
428        let html = scraper::Html::parse_fragment(&result);
429
430        let hashtag_selector = scraper::Selector::parse("hashtag").unwrap();
431        let hashtags: Vec<&str> = html.select(&hashtag_selector)
432            .filter_map(|el| el.attr("hashtag"))
433            .collect();
434        assert_eq!(hashtags, vec!["hashiverse", "rust"]);
435
436        let mention_selector = scraper::Selector::parse("mention").unwrap();
437        let client_ids: Vec<&str> = html.select(&mention_selector)
438            .filter_map(|el| el.attr("client_id"))
439            .collect();
440        assert_eq!(client_ids, vec![hex_id.as_str()]);
441    }
442
443    // --- Fragment helpers: _x_hashtag, _x_mention, _x_url_preview ---
444
445    #[test]
446    fn test_x_hashtag_lowercases_attribute_preserves_span_text() {
447        let result = convert_text_to_hashiverse_html_x_hashtag("RuStLang");
448        assert!(result.contains("hashtag=\"rustlang\""));
449        assert!(result.contains("<span class=\"plugin-hashtag-right\">RuStLang</span>"));
450        assert!(result.contains("<span class=\"plugin-hashtag-left\">#</span>"));
451    }
452
453    #[test]
454    fn test_x_hashtag_handles_unicode() {
455        let result = convert_text_to_hashiverse_html_x_hashtag("日本語");
456        assert!(result.contains("hashtag=\"日本語\""));
457        assert!(result.contains("<span class=\"plugin-hashtag-right\">日本語</span>"));
458    }
459
460    #[test]
461    fn test_x_hashtag_non_alphanumeric_returns_input_untouched() {
462        // Bad input must NOT produce a <hashtag> element (would be malformed
463        // HTML). The contract is "identity no-op" — return the original
464        // parameter byte-for-byte.
465        for bad in ["a<b", "a\"b", "a b", "a&b", "##rust", "#a<b"] {
466            assert_eq!(convert_text_to_hashiverse_html_x_hashtag(bad), bad);
467        }
468    }
469
470    #[test]
471    fn test_x_hashtag_empty_returns_empty() {
472        assert_eq!(convert_text_to_hashiverse_html_x_hashtag(""), "");
473    }
474
475    #[test]
476    fn test_x_hashtag_lone_hash_returns_lone_hash() {
477        // Strip the '#' → empty → fail validation → return the input.
478        assert_eq!(convert_text_to_hashiverse_html_x_hashtag("#"), "#");
479    }
480
481    #[test]
482    fn test_x_hashtag_strips_leading_hash_if_provided() {
483        let with_hash = convert_text_to_hashiverse_html_x_hashtag("#rust");
484        let without_hash = convert_text_to_hashiverse_html_x_hashtag("rust");
485        assert_eq!(with_hash, without_hash);
486        assert!(with_hash.contains("hashtag=\"rust\""));
487        assert!(with_hash.contains("<span class=\"plugin-hashtag-right\">rust</span>"));
488    }
489
490    #[test]
491    fn test_x_hashtag_no_op_does_not_emit_hashtag_element() {
492        // Defensive: any caller-supplied invalid input must never end up in a
493        // `<hashtag>` element.
494        for bad in ["a<b", "a\"b", "a b", "", "##rust", "#a<b"] {
495            let result = convert_text_to_hashiverse_html_x_hashtag(bad);
496            assert!(!result.contains("<hashtag"), "unexpected element for {bad:?}: {result:?}");
497            assert!(!result.contains("plugin-hashtag-left"), "unexpected span for {bad:?}: {result:?}");
498        }
499    }
500
501    #[test]
502    fn test_x_mention_emits_64hex_client_id() {
503        let hex_id = "f".repeat(64);
504        let result = convert_text_to_hashiverse_html_x_mention(&hex_id);
505        assert_eq!(result, format!("<mention client_id=\"{}\"></mention>", hex_id));
506    }
507
508    #[test]
509    fn test_x_url_preview_with_image_renders_image_container() {
510        let result = convert_text_to_hashiverse_html_x_url_preview(
511            "Title",
512            "Desc",
513            "https://img.example/x.png",
514            "https://example.com/path",
515        );
516        assert!(result.starts_with("<div class=\"plugin-urlpreview-card\"><div class=\"plugin-urlpreview-card-image-container\">"));
517        assert!(result.contains("<img src=\"https://img.example/x.png\" alt=\"\" class=\"plugin-urlpreview-card-image unblur-image\">"));
518        assert!(result.contains("<div class=\"plugin-urlpreview-card-domain\">example.com</div>"));
519        assert!(result.contains("<a class=\"plugin-urlpreview-card-title\" href=\"https://example.com/path\" rel=\"noopener noreferrer nofollow\">Title</a>"));
520        assert!(result.contains("<div class=\"plugin-urlpreview-card-description\">Desc</div>"));
521    }
522
523    #[test]
524    fn test_x_url_preview_without_image_moves_domain_into_inner() {
525        let result = convert_text_to_hashiverse_html_x_url_preview(
526            "Title",
527            "Desc",
528            "",
529            "https://example.com/path",
530        );
531        assert!(!result.contains("plugin-urlpreview-card-image-container"));
532        assert!(!result.contains("<img "));
533        // domain div sits inside card-inner, BEFORE the title link
534        let inner_pos = result.find("plugin-urlpreview-card-inner").unwrap();
535        let domain_pos = result.find("plugin-urlpreview-card-domain").unwrap();
536        let title_pos = result.find("plugin-urlpreview-card-title").unwrap();
537        assert!(inner_pos < domain_pos && domain_pos < title_pos);
538    }
539
540    #[test]
541    fn test_x_url_preview_omits_description_when_blank() {
542        let result = convert_text_to_hashiverse_html_x_url_preview(
543            "Title",
544            "",
545            "https://img.example/x.png",
546            "https://example.com/",
547        );
548        assert!(!result.contains("plugin-urlpreview-card-description"));
549    }
550
551    #[test]
552    fn test_x_url_preview_extracts_domain_from_https_url() {
553        let result = convert_text_to_hashiverse_html_x_url_preview(
554            "T", "", "", "https://sub.example.com:8443/path?q=1#frag",
555        );
556        assert!(result.contains("<div class=\"plugin-urlpreview-card-domain\">sub.example.com:8443</div>"));
557    }
558
559    #[test]
560    fn test_x_url_preview_falls_back_to_full_url_when_no_scheme() {
561        let result = convert_text_to_hashiverse_html_x_url_preview("T", "", "", "not-a-url");
562        assert!(result.contains("<div class=\"plugin-urlpreview-card-domain\">not-a-url</div>"));
563    }
564
565    #[test]
566    fn test_x_url_preview_html_escapes_attribute_and_text_values() {
567        let result = convert_text_to_hashiverse_html_x_url_preview(
568            "Title with <script> & \"quotes\"",
569            "Desc with <html> & \"chars\"",
570            "https://i/x?a=1&b=2",
571            "https://example.com/?q=1&r=2",
572        );
573        // Title text inside the <a>:
574        assert!(result.contains(">Title with &lt;script&gt; &amp; &quot;quotes&quot;</a>"));
575        // Description text inside the description div:
576        assert!(result.contains(">Desc with &lt;html&gt; &amp; &quot;chars&quot;</div>"));
577        // URLs in attributes — & must become &amp;:
578        assert!(result.contains("src=\"https://i/x?a=1&amp;b=2\""));
579        assert!(result.contains("href=\"https://example.com/?q=1&amp;r=2\""));
580    }
581
582    #[test]
583    fn test_existing_convert_text_to_hashiverse_html_unchanged_after_refactor() {
584        // Sanity-check that the post-refactor output is byte-identical for a
585        // representative input — the inline format!s were lifted into helpers,
586        // not changed.
587        let hex = "a".repeat(64);
588        let input = format!("Hi #Rust @{} bye", hex);
589        let result = convert_text_to_hashiverse_html(&input);
590        assert_eq!(
591            result,
592            format!(
593                "Hi <hashtag hashtag=\"rust\"><span class=\"plugin-hashtag-left\">#</span><span class=\"plugin-hashtag-right\">Rust</span></hashtag> <mention client_id=\"{}\"></mention> bye",
594                hex
595            )
596        );
597    }
598}