diff --git a/utf8fixer.py b/utf8fixer.py deleted file mode 100755 index a89f5856..00000000 --- a/utf8fixer.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/python -from __future__ import print_function -from codecs import open as copen -from os import listdir, path -from sys import argv - -import unicodedata - -# usage: utf8-fix.py PATH [codec] [normalize] -PATH = argv[1] if len(argv) > 1 else "" -NORMALIZE = False -ENCODING = None -DEFAULT_ENCODING = "iso8859_2" # iso8859_2 a.k.a latin2 - -for arg in argv[2:]: - if arg.lower() == "normalize": - NORMALIZE = True - else: - ENCODING = arg - - - -def convert_file(file_path): - print("[*]", file_path, "fixed!") - foriginal = copen(file_path, "r", "utf8", errors='ignore') - content = foriginal.read() - foriginal.close() - - ccontent = fix_encoding(content, ENCODING, NORMALIZE, True) - fconverted = copen(file_path, "w", "utf8") - fconverted.write(ccontent) - fconverted.close() - -def normalize_str(text): - return ''.join( - c for c in unicodedata.normalize('NFKD', text) - if unicodedata.category(c) != 'Mn' - ) - -def fix_encoding(content, encoding=None, norm=False, verbose=False): - encoding = encoding or DEFAULT_ENCODING - - try: - fixed = content.encode(encoding).decode("utf8") - except: - fixed = content - if verbose: - print("[*] error: can't fix the encoding. mixed encoding?") - - if norm: - return normalize_str(fixed) - else: - return fixed - - -if __name__ == "__main__": - if path.isfile(PATH): - convert_file(PATH) - - elif path.isdir(PATH): - - for ffile in listdir(PATH): - file_path = path.join(PATH, ffile) - - if path.isfile(file_path): - convert_file(file_path) - else: - print( - "[*] error: " - "usage: %s FILE_OR_DIR_PATH [codec] [normalize]" - % - argv[0] - )