>()[..], &chars[replace_newlines_to..]].concat();
} else if tag.starts_with("") {
replace_newlines_to = index;
}
let tag: Vec = tag.chars().collect();
chars = [chars[..index].to_vec(), tag.clone(), chars[end..].to_vec()].concat();
let offset: isize = index as isize - end as isize + tag.len() as isize;
replace_newlines_to = if offset > 0 {
replace_newlines_to + offset as usize
} else {
replace_newlines_to - offset.abs() as usize
};
}
let substring = chars[..replace_newlines_to].into_iter().collect::();
let text = [substring.replace('\n', "
"), chars[replace_newlines_to..].into_iter().collect::()].concat();
Ok(text)
}
slidge-style-parser/src/parser.rs 0000664 0000000 0000000 00000020634 15156463707 0017431 0 ustar 00root root 0000000 0000000 const KEYWORDS: [char; 4] = ['*', '_', '~', '`'];
const NO_SUB_PARSING_KEYWORDS: [char; 1] = ['`'];
const QUOTE_KEYWORDS: [char; 1] = ['>'];
pub fn parse_with_limits(chars: &Vec, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
let mut styles = Vec::new();
let mut index = start;
let end = end.min(chars.len() - 1);
while index <= end {
let c = chars[index];
if c == '\\' {
styles.push(("\\".to_owned(), index, index + 1, index + 1, index + 1));
index += 2;
continue;
}
if QUOTE_KEYWORDS.contains(&c) {
if is_quote_start(chars, index, depth) {
let to = seek_end_of_quote(chars, index, end, depth);
styles.push((">".to_owned(), index, index + 1, to, to));
styles.append(&mut parse_with_limits(chars, index + 1, to, depth + 1));
index = to;
continue;
} else if is_nested_quote(chars, index, depth) {
styles.push((">>".to_owned(), index, index + 1, index + 1, index + 1));
} else {
styles.push((">".to_owned(), index, index + 1, index + 1, index + 1));
}
index += 1;
continue;
}
if c == '<' {
styles.push(("<".to_owned(), index, index + 1, index + 1, index + 1));
index += 1;
continue;
}
if c == '`' && is_char_repeating(chars, c, 2, index + 1, end) {
let end_of_line = seek_end_of_line(chars, index + 1, end);
if end_of_line == end {
index += 3;
continue;
}
match seek_end_block(chars, c, end_of_line, end, depth) {
Some(to) => {
if to != end_of_line && is_quote_start(chars, index, depth) {
let keyword = if end_of_line == index + 3 {
"```".to_owned()
} else {
"```language".to_owned()
};
let remove_end = if depth > 0 && (to == end || to == chars.len()) {
to
} else {
to + 4 + depth
};
styles.push((keyword, index, end_of_line + 1, to, remove_end));
styles.append(&mut parse_quotes_in_code_block(chars, index + 3, to, depth));
index = to;
}
}
None => ()
}
index += 3;
continue;
}
if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) {
index += 1;
continue;
}
if c == '|' && is_char_repeating(chars, c, 1, index + 1, end) {
match seek_end(chars, c, index + 2, 1, end) {
Some(to) => {
if to != index + 2 {
let keyword = "||".to_owned();
styles.push((keyword, index, index + 2, to, to + 2));
styles.append(&mut parse_with_limits(chars, index + 2, to - 1, depth));
}
index = to + 2;
continue;
}
None => ()
}
index += 2;
continue;
}
if !KEYWORDS.contains(&c) {
index += 1;
continue;
}
match seek_end(chars, c, index + 1, 0, end) {
Some (to) => {
if to != index + 1 {
styles.push((c.to_string(), index, index + 1, to, to + 1));
if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
styles.append(&mut parse_with_limits(chars, index + 1, to - 1, depth));
}
}
index = to + 1;
}
None => ()
}
index += 1;
}
styles
}
fn parse_quotes_in_code_block(chars: &Vec, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
let mut quotes = Vec::new();
let mut index = start;
let end = end.min(chars.len() - 1);
if depth < 1 {
return quotes;
}
while index <= end {
let c = chars[index];
if QUOTE_KEYWORDS.contains(&c) {
if is_nested_quote(chars, index, depth) {
quotes.push(("```>".to_owned(), index, index + 1, index + 1, index + 1));
}
index += 1;
continue;
}
index += 1;
}
quotes
}
fn is_nested_quote(chars: &Vec, start: usize, depth: usize) -> bool {
let mut index = start;
let mut count = 0;
while index > 0 {
if chars[index] == '\n' {
return true;
}
if !QUOTE_KEYWORDS.contains(&chars[index]) {
return false;
}
count += 1;
if count > depth {
return false;
}
index -= 1;
}
true
}
fn is_char_repeating(chars: &Vec, keyword: char, repetitions: usize, index: usize, end: usize) -> bool {
(0..repetitions as usize)
.all(|i| index + i <= end && chars[index + i] == keyword)
}
fn preceeded_by_whitespace(chars: &Vec, index: usize, start: usize) -> bool {
index == start || chars[index - 1].is_whitespace()
}
fn followed_by_whitespace(chars: &Vec, index: usize, end: usize) -> bool {
index >= end || chars[index + 1].is_whitespace()
}
fn seek_end(chars: &Vec, keyword: char, start: usize, repetitions: usize, end: usize) -> Option {
for i in start..=end {
let c = chars[i];
if c == '\n' {
return None;
}
if c == keyword
&& !chars[i - 1].is_whitespace()
&& is_char_repeating(chars, keyword, repetitions, i + 1, end)
{
match seek_higher_order_end(chars, c, i + 1, end) {
Some(higher_order_i) => {
return Some(higher_order_i);
}
None => {
return Some(i);
}
}
}
}
None
}
fn seek_higher_order_end(chars: &Vec, keyword: char, start: usize, end: usize) -> Option {
let mut skip = true;
for i in start..=end {
let c = chars[i];
if c == '\n' {
return None;
}
if c != keyword {
skip = false;
continue;
}
if chars[i - 1].is_whitespace() && !followed_by_whitespace(chars, i, end) {
return None; // "*bold* *<--- beginning of new bold>*"
}
if followed_by_whitespace(chars, i, end) && !skip {
return Some(i);
}
}
None
}
fn seek_end_of_line(chars: &Vec, start: usize, end: usize) -> usize {
chars[start..=end]
.iter()
.enumerate()
.find(|&(_, &c)| c == '\n')
.map_or(end + 1, |(i, _)| start + i)
}
fn seek_end_of_quote(chars: &Vec, start: usize, end: usize, depth: usize) -> usize {
for i in start..=end {
if chars[i] == '\n' {
if i + 2 + depth > chars.len() {
return i;
}
if chars[i + 1..=i + 1 + depth].iter().any(|&c| !QUOTE_KEYWORDS.contains(&c)) {
return i;
}
}
}
end + 1
}
fn seek_end_block(chars: &Vec, keyword: char, start: usize, end: usize, depth: usize) -> Option {
for i in start..=end {
if chars[i] == '\n' {
if i + depth == end && chars[i + 1..i + 1 + depth].iter().all(|&c| QUOTE_KEYWORDS.contains(&c)) {
continue;
}
if i + 1 + depth > end {
return Some(i);
}
if seek_end_of_line(chars, i + 1, end) == i + depth + 4
&& chars[i + 1..i + 1 + depth].iter().all(|&c| QUOTE_KEYWORDS.contains(&c))
&& chars[i + 1 + depth] == keyword
&& is_char_repeating(chars, keyword, 2, i + 1 + depth, end)
{
return Some(i);
}
}
}
if end == chars.len() - 1 {
if depth == 0 {
return None;
}
return Some(chars.len());
}
Some(end)
}
fn is_quote_start(chars: &Vec, index: usize, depth: usize) -> bool {
index - depth == 0 || chars[index - 1 - depth] == '\n'
}
slidge-style-parser/src/telegram.rs 0000664 0000000 0000000 00000010265 15156463707 0017734 0 ustar 00root root 0000000 0000000 use pyo3::prelude::*;
use crate::parser::parse_with_limits;
const TELEGRAM_STYLES: &[(&'static str, &'static str)] = &[
("_", "italics"),
("*", "bold"),
("~", "strikethrough"),
("||", "spoiler"),
("`", "code"),
("```language", "pre"),
("```", "pre")
];
#[pyfunction]
#[pyo3(signature = (body, mentions=None))]
pub fn format_for_telegram(body: String, mentions: Option>) -> PyResult<(String, Vec<(String, usize, usize, String)>)> {
let mut chars: Vec = body.chars().collect();
if chars.len() < 1 {
return Ok((body, Vec::with_capacity(0)));
}
let mentions = mentions.unwrap_or(Vec::with_capacity(0));
let styles: Vec<(String, usize, usize, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0);
let mut remove_tags: Vec<(usize, usize)> = Vec::with_capacity(styles.len() * 2);
for (keyword, start, remove_start, end, remove_end) in &styles {
if TELEGRAM_STYLES.iter().any(|&(k, _)| k == keyword) {
remove_tags.push((*start, *remove_start));
remove_tags.push((*end, *remove_end));
} else if keyword == "```>" || keyword == "\\" {
remove_tags.push((*start, *remove_start));
}
}
// is_start (*<-- start, end -->*), index of all_indexes, format, index of tag, language of codeblock
let mut message_entities: Vec<(bool, usize, String, usize, String)> = Vec::with_capacity(styles.len() * 2);
let mut all_indexes: Vec> = Vec::with_capacity(styles.len());
for (keyword, start, remove_start, end, remove_end) in &styles {
if TELEGRAM_STYLES.iter().any(|&(k, _)| k == keyword) {
let language = if keyword == "```language" {
chars[start+3..remove_start-1]
.into_iter()
.collect::()
} else {
String::new()
};
all_indexes.push(vec![*start, *remove_start - *start, *end, *remove_end - *end]);
let last_index = all_indexes.len() - 1;
message_entities.push((true, last_index, TELEGRAM_STYLES.iter().find(|&&(k, _)| k == keyword).unwrap().1.to_owned(), *start, language));
message_entities.push((false, last_index, String::new(), *end, String::new()));
} else if keyword == "```>" || keyword == "\\" {
all_indexes.push(vec![0, 0, *start, 1]);
message_entities.push((false, all_indexes.len() - 1, String::new(), *start, String::new()));
}
}
for (_name, start, end) in mentions {
all_indexes.push(vec![start, 0, end, 0]);
let last_index = all_indexes.len() - 1;
message_entities.push((true, last_index, "mention".to_owned(), start, String::new()));
message_entities.push((false, last_index, String::new(), end, String::new()));
}
message_entities.sort_by(|a, b| a.3.cmp(&b.3));
remove_tags.sort_by(|a, b| b.0.cmp(&a.0));
for (index, end) in remove_tags {
chars = [chars[..index].to_vec(), chars[end..].to_vec()].concat();
}
let formatted_text = chars.into_iter().collect::();
let utf16_lengths: Vec = utf8_to_utf16_length(&formatted_text);
let mut offset = 0;
for (is_start, index, _, _, _) in &message_entities {
let indexes = &mut all_indexes[*index];
if *is_start {
indexes[0] -= offset;
offset += indexes[1];
} else {
indexes[2] -= offset;
offset += indexes[3];
}
}
Ok((
formatted_text,
message_entities.into_iter()
.filter(|(is_start, _, _, _, _)| { *is_start } )
.map(|(_, index, format, _, language)| { (format, utf16_lengths[all_indexes[index][0]], utf16_lengths[all_indexes[index][2]] - utf16_lengths[all_indexes[index][0]], language) })
.collect()
))
}
fn utf8_to_utf16_length(utf8_str: &str) -> Vec {
let mut utf16_lengths = Vec::with_capacity(utf8_str.len());
let mut length = 0;
utf16_lengths.push(0);
for byte in utf8_str.as_bytes() {
if (byte & 0xc0) != 0x80 {
length += if *byte >= 0xf0 { 2 } else { 1 };
utf16_lengths.push(length);
}
}
utf16_lengths
}
slidge-style-parser/tests/ 0000775 0000000 0000000 00000000000 15156463707 0016135 5 ustar 00root root 0000000 0000000 slidge-style-parser/tests/test_matrix.py 0000664 0000000 0000000 00000032155 15156463707 0021060 0 ustar 00root root 0000000 0000000 from slidge_style_parser import format_for_matrix
def test_basic():
test = "_underline_"
formatted_body = "underline"
assert(format_for_matrix(test) == formatted_body)
test = "*bold*"
formatted_body = "bold"
assert(format_for_matrix(test) == formatted_body)
test = "~strikethrough~"
formatted_body = "strikethrough"
assert(format_for_matrix(test) == formatted_body)
test = "`code span`"
formatted_body = "code span"
assert(format_for_matrix(test) == formatted_body)
test = """
```python
def test_basic():
test = "_underline_"
formatted_body = "underline"
assert(format_for_matrix(test) == formatted_body)
```
"""
formatted_body = '
def test_basic():\n test = "_underline_"\n formatted_body = "underline"\n assert(format_for_matrix(test) == formatted_body)
'
assert(format_for_matrix(test) == formatted_body)
test = "```\ncode block\n```"
formatted_body = "code block
"
assert(format_for_matrix(test) == formatted_body)
test = "||this message contains a spoiler||"
formatted_body = "this message contains a spoiler"
assert(format_for_matrix(test) == formatted_body)
def test_basic_mention():
test = "SavagePeanut _underline_"
formatted_body = "SavagePeanut underline"
assert(format_for_matrix(test, [("@SavagePeanut:example.org", 0, 12)]) == formatted_body)
test = "*bold* SavagePeanut"
formatted_body = "bold SavagePeanut"
assert(format_for_matrix(test, [("@SavagePeanut:example.org", 7, 19)]) == formatted_body)
def test_empty():
test = "__ ** ~~ ``"
formatted_body = "__ ** ~~ ``"
assert(format_for_matrix(test) == formatted_body)
test = "```\n```"
formatted_body = "```
```"
assert(format_for_matrix(test) == formatted_body)
test = "```python\n```"
formatted_body = "```python
```"
assert(format_for_matrix(test) == formatted_body)
test = "_____"
formatted_body = "_____"
assert(format_for_matrix(test) == formatted_body)
def test_quotes():
test = ">single"
formatted_body = "single
"
assert(format_for_matrix(test) == formatted_body)
test = ">single arrow ->"
formatted_body = "single arrow ->
"
assert(format_for_matrix(test) == formatted_body)
test = ">single\n>grouped"
formatted_body = "single
grouped
"
assert(format_for_matrix(test) == formatted_body)
test = ">>double"
formatted_body = "double
"
assert(format_for_matrix(test) == formatted_body)
test = ">>double\n>>double"
formatted_body = "double
double
"
assert(format_for_matrix(test) == formatted_body)
test = ">>double\n&>not quote"
formatted_body = "double
&>not quote"
assert(format_for_matrix(test) == formatted_body)
test = ">>double\n>grouped single"
formatted_body = "double
grouped single
"
assert(format_for_matrix(test) == formatted_body)
test = ">>>tripple\n>single\n>>double"
formatted_body = "tripple
single
double
"
assert(format_for_matrix(test) == formatted_body)
def test_code_blocks():
test = "```\nhacker\ncode\n```"
formatted_body = "hacker\ncode
"
assert(format_for_matrix(test) == formatted_body)
test = "```python\nhacker code\n```"
formatted_body = "hacker code
"
assert(format_for_matrix(test) == formatted_body)
test = "```pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nhacker code\n```"
formatted_body = "hacker code
"
assert(format_for_matrix(test) == formatted_body)
test = "```python\nhacker code\n```\nnormal text"
formatted_body = "hacker code
normal text"
assert(format_for_matrix(test) == formatted_body)
test = "```python\nhacker code\n```\nnormal text\n```java\npublic static void main(String [])\n```"
formatted_body = "hacker code
normal text
public static void main(String [])
"
assert(format_for_matrix(test) == formatted_body)
test = ">```java\n>why are you quoting a code block\n>```"
formatted_body = "why are you quoting a code block
"
assert(format_for_matrix(test) == formatted_body)
test = ">>```\n>>double quote code block\n>single quote not in code block\nnormal text"
formatted_body = "double quote code block
single quote not in code block
normal text"
assert(format_for_matrix(test) == formatted_body)
test = ">```\n>please stop trying to break my parser ;-;"
formatted_body = "please stop trying to break my parser ;-;
"
assert(format_for_matrix(test) == formatted_body)
test = ">>```\n>>>>double quote code block\n>single quote not in code block\nnormal text"
formatted_body = ">>double quote code block
single quote not in code block
normal text"
assert(format_for_matrix(test) == formatted_body)
test = "_```_ignored\ninvalid code block\n```"
formatted_body = "```ignored
invalid code block
```"
assert(format_for_matrix(test) == formatted_body)
def test_escaped():
test = "\\_no underline_"
formatted_body = "_no underline_"
assert(format_for_matrix(test) == formatted_body)
test = "\\\\_no underline_"
formatted_body = "\\_no underline_"
assert(format_for_matrix(test) == formatted_body)
test = ">>>tripple\n\\>none\n>>double"
formatted_body = "tripple
>none
double
"
assert(format_for_matrix(test) == formatted_body)
def test_nested():
test = "`*~_code span_~*`"
formatted_body = "*~_code span_~*"
assert(format_for_matrix(test) == formatted_body)
test = "*_~`code span`~_*"
formatted_body = "code span"
assert(format_for_matrix(test) == formatted_body)
test = ">*_~`code span`~_*"
formatted_body = "code span
"
assert(format_for_matrix(test) == formatted_body)
test = "*bold star >*< star bold*"
formatted_body = "bold star >*< star bold"
assert(format_for_matrix(test) == formatted_body)
test = "*_bold*_"
formatted_body = "_bold_"
assert(format_for_matrix(test) == formatted_body)
test = "__underlined__"
formatted_body = "underlined"
assert(format_for_matrix(test) == formatted_body)
def test_no_changes():
test = ""
formatted_body = ""
assert(format_for_matrix(test) == formatted_body)
test = "~~ empty `````` styles **"
formatted_body = "~~ empty `````` styles **"
assert(format_for_matrix(test) == formatted_body)
test = "this is not an empty string"
formatted_body = "this is not an empty string"
assert(format_for_matrix(test) == formatted_body)
test = "arrow ->"
formatted_body = "arrow ->"
assert(format_for_matrix(test) == formatted_body)
test = " > no quote"
formatted_body = " > no quote"
assert(format_for_matrix(test) == formatted_body)
test = "_not underlined"
formatted_body = "_not underlined"
assert(format_for_matrix(test) == formatted_body)
test = "|not a spoiler|"
formatted_body = "|not a spoiler|"
assert(format_for_matrix(test) == formatted_body)
test = "||\nalso\nnot\na\nspoiler||"
formatted_body = "||
also
not
a
spoiler||"
assert(format_for_matrix(test) == formatted_body)
test = "`no code\nblock here`"
formatted_body = "`no code
block here`"
assert(format_for_matrix(test) == formatted_body)
test = "invalid ```\ncode block\n```"
formatted_body = "invalid ```
code block
```"
assert(format_for_matrix(test) == formatted_body)
test = "```\ncode block\ninvalid```"
formatted_body = "```
code block
invalid```"
assert(format_for_matrix(test) == formatted_body)
test = "```\ncode block\n```invalid"
formatted_body = "```
code block
```invalid"
assert(format_for_matrix(test) == formatted_body)
def test_assorted():
test = "\n"
formatted_body = "
"
assert(format_for_matrix(test) == formatted_body)
test = "at the ||end||"
formatted_body = "at the end"
assert(format_for_matrix(test) == formatted_body)
test = "in the ~middle~ here"
formatted_body = "in the middle here"
assert(format_for_matrix(test) == formatted_body)
test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||"
formatted_body = "underline bold strikethrough >not quote spoiler
quote
nothing
nothing
another quote with ```four```
"
assert(format_for_matrix(test) == formatted_body)
test = "```\nhacker\ncode\n```\n\n```\nhacker\ncode\n```"
formatted_body = "hacker\ncode
hacker\ncode
"
assert(format_for_matrix(test) == formatted_body)
test = ">```\n>do be do be dooo ba do be do be do ba\n>>>"
formatted_body = "do be do be dooo ba do be do be do ba\n>>
"
assert(format_for_matrix(test) == formatted_body)
test = "\n\n>```\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n"
formatted_body = "
do be do be dooo ba do be do be do ba
a
aoeu
"
assert(format_for_matrix(test) == formatted_body)
test = ">```\n>do be do be dooo ba do be do be do ba\n>\n>\n>aoeu"
formatted_body = "do be do be dooo ba do be do be do ba\n\n\naoeu
"
assert(format_for_matrix(test) == formatted_body)
test = ">```\n>code block\n>```invalid end\n"
formatted_body = "code block\n```invalid end
"
assert(format_for_matrix(test) == formatted_body)
test = "invalid ```\ncode block\n*bold*\n```"
formatted_body = "invalid ```
code block
bold
```"
assert(format_for_matrix(test) == formatted_body)
def test_weird_utf8():
test = "β€οΈππππ ||πππππ€|| πππβ£οΈ"
formatted_body = "β€οΈππππ πππππ€ πππβ£οΈ"
assert(format_for_matrix(test) == formatted_body)
test = "π¨βπ©βπ§βπ§ _underline_π©βπ©βπ¦βπ§"
formatted_body = "π¨βπ©βπ§βπ§ underlineπ©βπ©βπ¦βπ§"
assert(format_for_matrix(test) == formatted_body)
test = "\u202eRight to left"
formatted_body = "\u202eRight to left"
assert(format_for_matrix(test) == formatted_body)
test = ">\u202eRight to left quote?"
formatted_body = "\u202eRight to left quote?
"
assert(format_for_matrix(test) == formatted_body)
test = "_Invisible\u200bseparator_"
formatted_body = "Invisible\u200bseparator"
assert(format_for_matrix(test) == formatted_body)
test = "~\u200b~"
formatted_body = "\u200b"
assert(format_for_matrix(test) == formatted_body)
test = ""
formatted_body = "<element>"
assert(format_for_matrix(test) == formatted_body)
test = "< element >"
formatted_body = "< element >"
assert(format_for_matrix(test) == formatted_body)
test = "< element>"
formatted_body = "< element>"
assert(format_for_matrix(test) == formatted_body)
test = ""
formatted_body = "<element >"
assert(format_for_matrix(test) == formatted_body)
test = " malicious script "
formatted_body = "<element> malicious script </element>"
assert(format_for_matrix(test) == formatted_body)
slidge-style-parser/tests/test_telegram.py 0000664 0000000 0000000 00000025216 15156463707 0021354 0 ustar 00root root 0000000 0000000 from slidge_style_parser import format_for_telegram
def test_basic():
test = "_underline_"
formatted_body = "underline"
styles = [('italics', 0, 9, '')]
assert(format_for_telegram(test) == (formatted_body, styles))
test = "*bold*"
formatted_body = "bold"
styles = [('bold', 0, 4, '')]
assert(format_for_telegram(test) == (formatted_body, styles))
test = "~strikethrough~"
formatted_body = "strikethrough"
styles = [('strikethrough', 0, 13, '')]
assert(format_for_telegram(test) == (formatted_body, styles))
test = "`code span`"
formatted_body = "code span"
styles = [('code', 0, 9, '')]
assert(format_for_telegram(test) == (formatted_body, styles))
test = """
```python
def test_basic():
test = "_underline_"
formatted_body = "underline"
assert(format_for_telegram(test)[0] == formatted_body)
```
"""
formatted_body = '\n def test_basic():\n test = "_underline_"\n formatted_body = "underline"\n assert(format_for_telegram(test)[0] == formatted_body)\n'
styles = [('pre', 1, 150, 'python')]
assert(format_for_telegram(test) == (formatted_body, styles))
test = "```\ncode block\n```"
formatted_body = "code block"
styles = [('pre', 0, 10, '')]
assert(format_for_telegram(test) == (formatted_body, styles))
test = "||this message contains a spoiler||"
formatted_body = "this message contains a spoiler"
styles = [('spoiler', 0, 31, '')]
assert(format_for_telegram(test) == (formatted_body, styles))
test = "β€οΈππππ ||πππππ€|| πππβ£οΈ"
formatted_body = "β€οΈππππ πππππ€ πππβ£οΈ"
styles = [('spoiler', 11, 10, '')]
assert(format_for_telegram(test) == (formatted_body, styles))
def test_basic_mention():
test = "SavagePeanut _underline_"
formatted_body = "SavagePeanut underline"
styles = [('mention', 0, 12, ''), ('italics', 13, 9, '')]
assert(format_for_telegram(test, [("SavagePeanut", 0, 12)]) == (formatted_body, styles))
test = "*bold* SavagePeanut"
formatted_body = "bold SavagePeanut"
styles = [('bold', 0, 4, ''), ('mention', 5, 12, '')]
assert(format_for_telegram(test, [("SavagePeanut", 7, 19)]) == (formatted_body, styles))
def test_quotes():
test = ">single"
formatted_body = ">single"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">single arrow ->"
formatted_body = ">single arrow ->"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">single\n>grouped"
formatted_body = ">single\n>grouped"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">>double"
formatted_body = ">>double"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">>double\n>>double"
formatted_body = ">>double\n>>double"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">>double\n&>not quote"
formatted_body = ">>double\n&>not quote"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">>double\n>grouped single"
formatted_body = ">>double\n>grouped single"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">>>tripple\n>single\n>>double"
formatted_body = ">>>tripple\n>single\n>>double"
assert(format_for_telegram(test)[0] == formatted_body)
def test_code_blocks():
test = "```\nhacker\ncode\n```"
formatted_body = "hacker\ncode"
assert(format_for_telegram(test)[0] == formatted_body)
test = "```python\nhacker code\n```"
formatted_body = "hacker code"
assert(format_for_telegram(test)[0] == formatted_body)
test = "```pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nhacker code\n```"
formatted_body = "hacker code"
assert(format_for_telegram(test)[0] == formatted_body)
test = "```python\nhacker code\n```\nnormal text"
formatted_body = "hacker code\nnormal text"
assert(format_for_telegram(test)[0] == formatted_body)
test = "```python\nhacker code\n```\nnormal text\n```java\npublic static void main(String [])\n```"
formatted_body = "hacker code\nnormal text\npublic static void main(String [])"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">```java\n>why are you quoting a code block\n>```"
formatted_body = ">why are you quoting a code block"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">>```\n>>double quote code block\n>single quote not in code block\nnormal text"
formatted_body = ">>double quote code block\n>single quote not in code block\nnormal text"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">```\n>please stop trying to break my parser ;-;"
formatted_body = ">please stop trying to break my parser ;-;"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">>```\n>>>>double quote code block\n>single quote not in code block\nnormal text"
formatted_body = ">>>>double quote code block\n>single quote not in code block\nnormal text"
assert(format_for_telegram(test)[0] == formatted_body)
test = "_```_ignored\ninvalid code block\n```"
formatted_body = "```ignored\ninvalid code block\n```"
assert(format_for_telegram(test)[0] == formatted_body)
def test_escaped():
test = "\\_no underline_"
formatted_body = "_no underline_"
assert(format_for_telegram(test)[0] == formatted_body)
test = "\\\\_no underline_"
formatted_body = "\\_no underline_"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">>>tripple\n\\>none\n>>double"
formatted_body = ">>>tripple\n>none\n>>double"
assert(format_for_telegram(test)[0] == formatted_body)
def test_nested():
test = "`*~_code span_~*`"
formatted_body = "*~_code span_~*"
assert(format_for_telegram(test)[0] == formatted_body)
test = "*_~`code span`~_*"
formatted_body = "code span"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">*_~`code span`~_*"
formatted_body = ">code span"
assert(format_for_telegram(test)[0] == formatted_body)
test = "*bold star >*< star bold*"
formatted_body = "bold star >*< star bold"
assert(format_for_telegram(test)[0] == formatted_body)
test = "*_bold*_"
formatted_body = "_bold_"
assert(format_for_telegram(test)[0] == formatted_body)
test = "__underlined__"
formatted_body = "underlined"
assert(format_for_telegram(test)[0] == formatted_body)
def test_no_changes():
test = ""
formatted_body = ""
assert(format_for_telegram(test)[0] == formatted_body)
test = "~~ empty `````` styles **"
formatted_body = "~~ empty `````` styles **"
assert(format_for_telegram(test)[0] == formatted_body)
test = "this is not an empty string"
formatted_body = "this is not an empty string"
assert(format_for_telegram(test)[0] == formatted_body)
test = "arrow ->"
formatted_body = "arrow ->"
assert(format_for_telegram(test)[0] == formatted_body)
test = " > no quote"
formatted_body = " > no quote"
assert(format_for_telegram(test)[0] == formatted_body)
test = "_not underlined"
formatted_body = "_not underlined"
assert(format_for_telegram(test)[0] == formatted_body)
test = "|not a spoiler|"
formatted_body = "|not a spoiler|"
assert(format_for_telegram(test)[0] == formatted_body)
test = "||\nalso\nnot\na\nspoiler||"
formatted_body = "||\nalso\nnot\na\nspoiler||"
assert(format_for_telegram(test)[0] == formatted_body)
test = "`no code\nblock here`"
formatted_body = "`no code\nblock here`"
assert(format_for_telegram(test)[0] == formatted_body)
test = "invalid ```\ncode block\n```"
formatted_body = "invalid ```\ncode block\n```"
assert(format_for_telegram(test)[0] == formatted_body)
test = "```\ncode block\ninvalid```"
formatted_body = "```\ncode block\ninvalid```"
assert(format_for_telegram(test)[0] == formatted_body)
test = "```\ncode block\n```invalid"
formatted_body = "```\ncode block\n```invalid"
assert(format_for_telegram(test)[0] == formatted_body)
def test_assorted():
test = "\n"
formatted_body = "\n"
assert(format_for_telegram(test)[0] == formatted_body)
test = "at the ||end||"
formatted_body = "at the end"
assert(format_for_telegram(test)[0] == formatted_body)
test = "in the ~middle~ here"
formatted_body = "in the middle here"
assert(format_for_telegram(test)[0] == formatted_body)
test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||"
formatted_body = "underline bold strikethrough >not quote spoiler\n>quote\nnothing\nnothing\n>>>>another quote with ```four```"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">```\n>do be do be dooo ba do be do be do ba\n>>>"
formatted_body = ">do be do be dooo ba do be do be do ba\n>>"
assert(format_for_telegram(test)[0] == formatted_body)
test = "\n\n>```\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n"
formatted_body = "\n\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">```\n>do be do be dooo ba do be do be do ba\n>\n>\n>aoeu"
formatted_body = ">do be do be dooo ba do be do be do ba\n\n\naoeu"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">```\n>code block\n>```invalid end\n"
formatted_body = ">code block\n```invalid end\n"
assert(format_for_telegram(test)[0] == formatted_body)
test = "invalid ```\ncode block\n*bold*\n```"
formatted_body = "invalid ```\ncode block\nbold\n```"
assert(format_for_telegram(test)[0] == formatted_body)
def test_weird_utf8():
test = "β€οΈππππ ||πππππ€|| πππβ£οΈ"
formatted_body = "β€οΈππππ πππππ€ πππβ£οΈ"
assert(format_for_telegram(test)[0] == formatted_body)
test = "π¨βπ©βπ§βπ§ _underline_π©βπ©βπ¦βπ§"
formatted_body = "π¨βπ©βπ§βπ§ underlineπ©βπ©βπ¦βπ§"
assert(format_for_telegram(test)[0] == formatted_body)
test = "\u202eRight to left"
formatted_body = "\u202eRight to left"
assert(format_for_telegram(test)[0] == formatted_body)
test = ">\u202eRight to left quote?"
formatted_body = ">\u202eRight to left quote?"
assert(format_for_telegram(test)[0] == formatted_body)
test = "_Invisible\u200bseparator_"
formatted_body = "Invisible\u200bseparator"
assert(format_for_telegram(test)[0] == formatted_body)
test = "~\u200b~"
formatted_body = "\u200b"
assert(format_for_telegram(test)[0] == formatted_body)