On Mon, Apr 29, 2024 at 1:17 PM Corey Huinker <corey.huin...@gmail.com> wrote: >> >> I've splitted it to7 patches. >> each patch split one <sect1> into separate new files. > > > Seems like a good start. Looking at the diffs of these, I wonder if we would > be better off with a func/ directory, each function gets its own file in that > dir, and either these files above include the individual files, or the > original func.sgml just becomes the organizer of all the functions. That > would allow us to do future reorganizations with minimal churn, make > validation of this patch a bit more straightforward, and make it easier for > future editors to find the function they need to edit.
looking back. The patch is big. no convenient way to review/validate it. so I created a python script to automate it. we can review the python script. (just googling around, I know little about python). * create new files for holding the content. func-string.sgml func-matching.sgml func-datetime.sgml func-json.sgml func-aggregate.sgml func-info.sgml func-admin.sgml * locate parts that need to copy paste to a newly created file, based on line number. line number pattern is mentioned here: http://postgr.es/m/CACJufxEcMjjn-m6fpC2wXHsQbE5nyd%3Dxt6k-jDizBVUKK6O4KQ%40mail.gmail.com * insert placeholder string in func.sgml: &func-string; &func-matching; &func-datetime; &func-json; &func-aggregate; &func-info; &func-admin; * copy the parts to new files. * validate newly created file. (must only have 2 occurrences of "sect1"). * delete the parts from func.sgml files, since they already copy to new files. sed --in-place "2408,4180d ; 5330,7760d ; 8942,11127d ; 15502,19436d ; 21567,22985d ; 24346,28017d ; 28020,30714d " func.sgml * manually change doc/src/sgml/filelist.sgml <!ENTITY func SYSTEM "func.sgml"> +<!ENTITY func-string SYSTEM "func-string.sgml"> +<!ENTITY func-matching SYSTEM "func-matching.sgml"> +<!ENTITY func-datetime SYSTEM "func-datetime.sgml"> +<!ENTITY func-json SYSTEM "func-json.sgml"> +<!ENTITY func-aggregate SYSTEM "func-aggregate.sgml"> +<!ENTITY func-info SYSTEM "func-info.sgml"> +<!ENTITY func-admin SYSTEM "func-admin.sgml"> 2 requirements. 1. manual change doc/src/sgml/filelist.sgml as mentioned before; 2. in python script, at line 35, i use "os.chdir("/home/jian/Desktop/pg_src/src7/postgres/doc/src/sgml")" you need to change to your "doc/src/sgml" directory accordingly.
import subprocess import os import re func_string_place_holder="&func-string;\n" func_matching_place_holder="&func-matching;\n" func_datetime_place_holder="&func-datetime;\n" func_json_place_holder="&func-json;\n" func_aggregate_place_holder="&func-aggregate;\n" func_info_place_holder="&func-info;\n" func_admin_place_holder="&func-admin;\n" func_string="func-string.sgml" func_matching="func-matching.sgml" func_datetime="func-datetime.sgml" func_json="func-json.sgml" func_aggregate="func-aggregate.sgml" func_info="func-info.sgml" func_admin="func-admin.sgml" func_string_line_begin_lineno = -1 func_string_line_end_lineno = -1 func_matching_begin_lineno = -1 func_matching_end_lineno = -1 func_datetime_begin_lineno = -1 func_datetime_end_lineno = -1 func_json_begin_lineno = -1 func_json_end_lineno = -1 func_aggregate_begin = -1 func_aggregate_end = -1 func_info_begin_lineno = -1 func_info_end_lineno = -1 func_admin_begin_lineno = -1 func_admin_end_lineno = -1 os.chdir("/home/jian/Desktop/pg_src/src7/postgres/doc/src/sgml") target_file="func.sgml" subprocess.call(["touch", func_string]) subprocess.call(["touch", func_matching]) subprocess.call(["touch", func_datetime]) subprocess.call(["touch", func_json]) subprocess.call(["touch", func_aggregate]) subprocess.call(["touch", func_info]) subprocess.call(["touch", func_admin]) def printall(): print(f'func_string_line_begin_lineno:{func_string_line_begin_lineno}') print(f'func_string_line_end_lineno:{func_string_line_end_lineno}') print(f'func_matching_begin_lineno:{func_matching_begin_lineno}') print(f'func_matching_end_lineno:{func_matching_end_lineno}') print(f'func_datetime_begin_lineno:{func_datetime_begin_lineno}') print(f'func_datetime_end_lineno:{func_datetime_end_lineno}') print(f'func_json_begin_lineno:{func_json_begin_lineno}') print(f'func_json_end_lineno:{func_json_end_lineno}') print(f'func_aggregate_begin:{func_aggregate_begin}') print(f'func_aggregate_end:{func_aggregate_end}') print(f'func_info_begin_lineno:{func_info_begin_lineno}') print(f'func_info_end_lineno:{func_info_end_lineno}') print(f'func_admin_begin_lineno:{func_admin_begin_lineno}') print(f'func_admin_end_lineno:{func_admin_end_lineno}') def get_line_number(file_name: str): global func_string_line_begin_lineno global func_string_line_end_lineno global func_matching_begin_lineno global func_matching_end_lineno global func_datetime_begin_lineno global func_datetime_end_lineno global func_json_begin_lineno global func_json_end_lineno global func_aggregate_begin global func_aggregate_end global func_info_begin_lineno global func_info_end_lineno global func_admin_begin_lineno global func_admin_end_lineno with open(file_name, 'r+') as f: for i, line in enumerate(f, 1): if r'<sect1 id="functions-string">' in line: func_string_line_begin_lineno = i elif r'<sect1 id="functions-binarystring">' in line: func_string_line_end_lineno = i - 1 elif r'<sect1 id="functions-matching">' in line: func_matching_begin_lineno = i elif r'<sect1 id="functions-formatting">' in line: func_matching_end_lineno = i - 1 elif r'<sect1 id="functions-datetime">' in line: func_datetime_begin_lineno = i elif r'<sect1 id="functions-enum">' in line: func_datetime_end_lineno = i - 1 elif r'<sect1 id="functions-json">' in line: func_json_begin_lineno = i elif r'<sect1 id="functions-sequence">' in line: func_json_end_lineno = i - 1 elif r'<sect1 id="functions-aggregate">' in line: func_aggregate_begin = i elif r'<sect1 id="functions-window">' in line: func_aggregate_end = i - 1 elif r'<sect1 id="functions-info">' in line: func_info_begin_lineno = i elif r'<sect1 id="functions-admin">' in line: func_admin_begin_lineno = i func_info_end_lineno = i - 1 elif r'<sect1 id="functions-trigger">' in line: func_admin_end_lineno = i - 1 #line number is important property. check line begin and line end def precheck_line_info(): if ((func_string_line_begin_lineno < 0) or (func_string_line_end_lineno < 0) or (func_matching_begin_lineno < 0) or (func_matching_end_lineno < 0) or (func_datetime_begin_lineno < 0) or (func_datetime_end_lineno < 0) or (func_json_begin_lineno < 0) or (func_json_end_lineno < 0) or (func_aggregate_begin < 0) or (func_aggregate_end < 0) or (func_info_begin_lineno < 0) or (func_info_end_lineno < 0) or (func_admin_begin_lineno < 0) or (func_admin_end_lineno < 0)): ValueError("don't have related file") print("quiting") quit() #line number is important property. def precheck_line_begining_info(): if ((func_string_line_begin_lineno < 0) or (func_matching_begin_lineno < 0) or (func_datetime_begin_lineno < 0) or (func_json_begin_lineno < 0) or (func_aggregate_begin < 0) or (func_info_begin_lineno < 0) or (func_admin_begin_lineno < 0) ): ValueError("don't have related file") print("quiting") quit() #validate new file def validate_new_file(file_name: str, pattern: str): matches_all = [] with open(file_name, 'r+') as f: for i, line in enumerate(f, 1): matches = re.findall(pattern, line) if (len(matches) > 0): matches_all = matches_all + matches if (len(matches_all) != 2): print(f'{file_name} should only have 2 \"{pattern}\" attribute') quit() #because &func-info, &func-admin section is nearby, we need special care to the func_info_end_lineno number. def get_line_number_speical(file_name: str): global func_info_begin_lineno global func_info_end_lineno with open(file_name, 'r+') as f: for i, line in enumerate(f, 1): if func_admin_place_holder in line: func_info_end_lineno = i - 2 # insert content before line X def write_line(file_name:str, line:int, content:str): line -= 1 # Python starts counting at 0, but people start counting at one. This accounts for that. with open(file_name, "r") as file: # Open the file in read mode lines = file.readlines() # Assign the file as a list to a variable lines[line] = lines[line] + content # concatenate the content with open(file_name, "w") as file: # Open the file in write mode file.write("".join(lines)) # Write the modified content to the file #--------------------------step1. get the line number info and validate it. get_line_number(target_file) precheck_line_info() #--------------------------step2. wrirte place_holderstring to it. write_line(target_file,(func_string_line_begin_lineno -1), func_string_place_holder) write_line(target_file,(func_matching_begin_lineno), func_matching_place_holder) write_line(target_file,(func_datetime_begin_lineno +1), func_datetime_place_holder) write_line(target_file,(func_json_begin_lineno +2), func_json_place_holder) write_line(target_file,(func_aggregate_begin +3), func_aggregate_place_holder) write_line(target_file,(func_info_begin_lineno +4), func_info_place_holder) write_line(target_file,(func_admin_begin_lineno + 5), func_admin_place_holder) #re-evaulate the line number again get_line_number(target_file) get_line_number_speical(target_file) precheck_line_info() #--------------------------step3 construct sed command and execute it sed_command_string = f'sed -n {func_string_line_begin_lineno},{func_string_line_end_lineno}p {target_file} > {func_string}' sed_command_matching = f'sed -n {func_matching_begin_lineno},{func_matching_end_lineno}p {target_file} > {func_matching}' sed_command_datetime = f'sed -n {func_datetime_begin_lineno},{func_datetime_end_lineno}p {target_file} > {func_datetime}' sed_command_json = f'sed -n {func_json_begin_lineno},{func_json_end_lineno}p {target_file} > {func_json}' sed_command_aggregate = f'sed -n {func_aggregate_begin},{func_aggregate_end}p {target_file} > {func_aggregate}' sed_command_func_info = f'sed -n {func_info_begin_lineno},{func_info_end_lineno}p {target_file} > {func_info}' sed_command_func_admin = f'sed -n {func_admin_begin_lineno},{func_admin_end_lineno}p {target_file} > {func_admin}' def print_sed_command(): print(sed_command_string) print(sed_command_matching) print(sed_command_datetime) print(sed_command_json) print(sed_command_aggregate) print(sed_command_func_info) print(sed_command_func_admin) print_sed_command() subprocess.call([sed_command_string], shell=True) subprocess.call([sed_command_matching], shell=True) subprocess.call([sed_command_datetime], shell=True) subprocess.call([sed_command_json], shell=True) subprocess.call([sed_command_aggregate], shell=True) subprocess.call([sed_command_func_info], shell=True) subprocess.call([sed_command_func_admin], shell=True) #--------------------------step4 validate new file's content. validate_new_file(func_string,"sect1") validate_new_file(func_matching,"sect1") validate_new_file(func_datetime,"sect1") validate_new_file(func_json,"sect1") validate_new_file(func_aggregate,"sect1") validate_new_file(func_info,"sect1") validate_new_file(func_admin,"sect1") get_line_number(func_string) get_line_number(func_matching) get_line_number(func_datetime) get_line_number(func_json) get_line_number(func_aggregate) get_line_number(func_info) get_line_number(func_admin) precheck_line_begining_info() #--------------------------step5 validate new file get_line_number(target_file) get_line_number_speical(target_file) precheck_line_info() printall() sed_in_place_delete = 'sed --in-place "{0},{1}d ; {2},{3}d ; {4},{5}d ; {6},{7}d ; {8},{9}d ; {10},{11}d ; {12},{13}d " '.format( func_string_line_begin_lineno, func_string_line_end_lineno, func_matching_begin_lineno, func_matching_end_lineno, func_datetime_begin_lineno, func_datetime_end_lineno, func_json_begin_lineno, func_json_end_lineno, func_aggregate_begin, func_aggregate_end, func_info_begin_lineno, func_info_end_lineno, func_admin_begin_lineno, func_admin_end_lineno, ) sed_in_place_delete = sed_in_place_delete + target_file print(sed_in_place_delete) subprocess.call([sed_in_place_delete], shell=True)