I have thousands of html files inside a folder. I want to replace the filename present inside another files. Say for ex:- fileName :- 'abcd1234.html' is found inside another file say file2.html. Then I want to remove the last 4 digits of the fileName i.e,. 'abcd1234.html' => 'abcd.htm'.
I have tried a script . But your suggestions upon the script are welcomed. Regards, Kunal
import os import re def script_to_create_folder(): path_list = [] filename_list = [] path = r'D:\macrocodesrequired\Testing_Script\Real_testing_\New folder\brpt' #path = r'H:\Script_Work\New_folder\Actual_testing\brpt' for (root, dirs, name) in os.walk(path): for nm in name: if ( 'About' in root or 'Community' in root or 'support' in root \ or 'home' in root or 'Products' in root or 'service' in root \ or 'solutions' in root or 'training' in root \ or 'wheretobuy' in root ): pass if ( 'default' in nm or 'index' in nm or 'category' in nm \ or 'Category' in nm or 'Default' in nm or 'Index' in nm \ or 'home' in nm or 'support' in nm ): pass else: filename_list.append(nm) path_list.append(os.path.join(root, nm)) # print(path_list) # print(filename_list) for path in path_list: for names in filename_list: find_filename_inside_files(names, path) def find_filename_inside_files(file_name, dir_path): pattern_list = ['\d+$', '\d+\w$', '\d+-\d$', '\w\d+$', '\d\w\d\w', '\w\d+$', '\w\d\w\d'] data = [] replace_str = '' read_cnt = 0 digits_to_replace = 0 with open(dir_path, 'r', encoding='utf-8') as file_handle: data = file_handle.read() #print(data) if file_name in data: #print(file_name) for search_pattern in pattern_list: read_cnt = 0 if '-' in file_name: #print("===>",search_pattern) if re.search(search_pattern, file_name.split('.')[0]): digits_to_replace = filename_with_hypen(file_name, search_pattern) read_cnt = 1 position = file_handle.tell() replace_str = replace_oldstring_newstring( data, file_name, digits_to_replace ) # file_handle.seek(0, 0) # file_handle.write(replace_str) elif re.search(search_pattern, file_name.split('.')[0]): digits_to_replace = filename_without_hypen(file_name, search_pattern) read_cnt = 1 replace_str = replace_oldstring_newstring(data, file_name, digits_to_replace) if read_cnt == 1: #print("write to") print(file_name) print(dir_path) with open(dir_path, 'w', encoding='utf-8') as file_out: file_out.write(replace_str) exit() def filename_without_hypen(file_name, pattern): #print(file_name) value = re.search(pattern, file_name.split('.')[0]) if bool(value): last_digits = value.group() if len(last_digits) > 2: return -(len(last_digits)) elif len(last_digits) > 0 and len(last_digits) <= 3: return -(len(last_digits)) def filename_with_hypen(file_name, pattern): value = re.search(pattern, file_name.split('.')[0]) if bool(value): last_digits = value.group() if '-2' in last_digits or '-3' in last_digits: return -(len(last_digits)) else: return -(len(last_digits)) def replace_oldstring_newstring(data, filename, last_digits_to_replace): print("in replace") ind = data.index(filename) temp_str = data[ind:(ind + len(filename))] replace_str = data.replace(temp_str.split('.')[0][last_digits_to_replace:], '') replace_str = replace_str.replace(".html", ".htm") return replace_str def main(): script_to_create_folder() if __name__ == '__main__': main()
-- https://mail.python.org/mailman/listinfo/python-list