diff options
Diffstat (limited to 'src/new/format.py')
-rw-r--r-- | src/new/format.py | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/src/new/format.py b/src/new/format.py new file mode 100644 index 0000000..a712544 --- /dev/null +++ b/src/new/format.py @@ -0,0 +1,76 @@ +import PIL.Image +import io +import os +import pathlib +import re +import requests +import shutil +import subprocess +import sys +import unicodedata +import urllib.request + +def format_list(xs): + return '[ ' + ', '.join([f'"{x}"' for x in xs]) + ' ]' + +def path_part(name): + simplified = ''.join([alnum_or_space(c) for c in unaccent(name.lower())]) + return '-'.join(simplified.split()) + +def unaccent(s): + return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn') + +def alnum_or_space(c): + if c.isalnum(): + return c + else: + return ' ' + +def extension(path): + return pathlib.Path(path).suffix + +def cleanup_text(s, lang): + s = re.sub('\'', '’', s) + s = re.sub('\.\.\.', '…', s) + s = re.sub('\. \. \.', '…', s) + s = cleanup_quotes(s, lang) + + if lang == 'fr': + s = re.sub('“', '«', s) + s = re.sub('”', '»', s) + + # Replace space by insecable spaces + s = re.sub(r' ([:?\!»])', r' \1', s) + s = re.sub('« ', '« ', s) + + # Add missing insecable spaces + s = re.sub(r'([^ ])([:?\!»])', r'\1 \2', s) + s = re.sub(r'«([^ ])', r'« \1', s) + + elif lang == 'en': + s = re.sub('«', '“', s) + s = re.sub('»', '”', s) + + return s + +def cleanup_quotes(s, lang): + res = '' + quoted = False + for c in s: + if c == '"': + if quoted: + quoted = False + if lang == 'fr': + res += '»' + elif lang == 'en': + res += '”' + else: + quoted = True + if lang == 'fr': + res += '«' + elif lang == 'en': + res += '“' + else: + res += c + return res + |