On Mon, Apr 29, 2024 at 1:17 PM Corey Huinker <corey.huin...@gmail.com> wrote:
>>
>> I've splitted it to7 patches.
>> each patch split one <sect1> into separate new files.
>
>
> Seems like a good start. Looking at the diffs of these, I wonder if we would 
> be better off with a func/ directory, each function gets its own file in that 
> dir, and either these files above include the individual files, or the 
> original func.sgml just becomes the organizer of all the functions. That 
> would allow us to do future reorganizations with minimal churn, make 
> validation of this patch a bit more straightforward, and make it easier for 
> future editors to find the function they need to edit.

looking back.
The patch is big. no convenient way to review/validate it.
so I created a python script to automate it.
we can review the python script.
(just googling around, I know little about python).

* create new files for holding the content.
func-string.sgml
func-matching.sgml
func-datetime.sgml
func-json.sgml
func-aggregate.sgml
func-info.sgml
func-admin.sgml

* locate parts that need to copy paste to a newly created file, based
on line number.
line number pattern is mentioned here:
http://postgr.es/m/CACJufxEcMjjn-m6fpC2wXHsQbE5nyd%3Dxt6k-jDizBVUKK6O4KQ%40mail.gmail.com

* insert placeholder string in func.sgml:
&func-string;
&func-matching;
&func-datetime;
&func-json;
&func-aggregate;
&func-info;
&func-admin;

* copy the parts to new files.

* validate newly created file. (must only have 2 occurrences of "sect1").

* delete the parts from func.sgml files, since they already copy to new files.
sed --in-place "2408,4180d ; 5330,7760d ; 8942,11127d ; 15502,19436d ;
21567,22985d ; 24346,28017d ; 28020,30714d " func.sgml

* manually change doc/src/sgml/filelist.sgml
 <!ENTITY func       SYSTEM "func.sgml">
+<!ENTITY func-string       SYSTEM "func-string.sgml">
+<!ENTITY func-matching       SYSTEM "func-matching.sgml">
+<!ENTITY func-datetime       SYSTEM "func-datetime.sgml">
+<!ENTITY func-json       SYSTEM "func-json.sgml">
+<!ENTITY func-aggregate       SYSTEM "func-aggregate.sgml">
+<!ENTITY func-info       SYSTEM "func-info.sgml">
+<!ENTITY func-admin     SYSTEM "func-admin.sgml">



2 requirements.
1. manual change doc/src/sgml/filelist.sgml as mentioned before;
2. in python script, at line 35, i use
"os.chdir("/home/jian/Desktop/pg_src/src7/postgres/doc/src/sgml")"
you need to change to your "doc/src/sgml" directory accordingly.
import subprocess
import os
import re
func_string_place_holder="&func-string;\n"
func_matching_place_holder="&func-matching;\n"
func_datetime_place_holder="&func-datetime;\n"
func_json_place_holder="&func-json;\n"
func_aggregate_place_holder="&func-aggregate;\n"
func_info_place_holder="&func-info;\n"
func_admin_place_holder="&func-admin;\n"

func_string="func-string.sgml"
func_matching="func-matching.sgml"
func_datetime="func-datetime.sgml"
func_json="func-json.sgml"
func_aggregate="func-aggregate.sgml"
func_info="func-info.sgml"
func_admin="func-admin.sgml"

func_string_line_begin_lineno = -1
func_string_line_end_lineno = -1
func_matching_begin_lineno = -1
func_matching_end_lineno = -1
func_datetime_begin_lineno = -1
func_datetime_end_lineno = -1
func_json_begin_lineno = -1
func_json_end_lineno = -1
func_aggregate_begin = -1
func_aggregate_end = -1
func_info_begin_lineno = -1
func_info_end_lineno = -1
func_admin_begin_lineno = -1
func_admin_end_lineno = -1

os.chdir("/home/jian/Desktop/pg_src/src7/postgres/doc/src/sgml")
target_file="func.sgml"
subprocess.call(["touch", func_string])
subprocess.call(["touch", func_matching])
subprocess.call(["touch", func_datetime])
subprocess.call(["touch", func_json])
subprocess.call(["touch", func_aggregate])
subprocess.call(["touch", func_info])
subprocess.call(["touch", func_admin])

def printall():
    print(f'func_string_line_begin_lineno:{func_string_line_begin_lineno}')
    print(f'func_string_line_end_lineno:{func_string_line_end_lineno}')
    print(f'func_matching_begin_lineno:{func_matching_begin_lineno}')
    print(f'func_matching_end_lineno:{func_matching_end_lineno}')
    print(f'func_datetime_begin_lineno:{func_datetime_begin_lineno}')
    print(f'func_datetime_end_lineno:{func_datetime_end_lineno}')
    print(f'func_json_begin_lineno:{func_json_begin_lineno}')
    print(f'func_json_end_lineno:{func_json_end_lineno}')
    print(f'func_aggregate_begin:{func_aggregate_begin}')
    print(f'func_aggregate_end:{func_aggregate_end}')
    print(f'func_info_begin_lineno:{func_info_begin_lineno}')
    print(f'func_info_end_lineno:{func_info_end_lineno}')
    print(f'func_admin_begin_lineno:{func_admin_begin_lineno}')
    print(f'func_admin_end_lineno:{func_admin_end_lineno}')

def get_line_number(file_name: str):
    global func_string_line_begin_lineno
    global func_string_line_end_lineno
    global func_matching_begin_lineno
    global func_matching_end_lineno
    global func_datetime_begin_lineno
    global func_datetime_end_lineno
    global func_json_begin_lineno
    global func_json_end_lineno
    global func_aggregate_begin
    global func_aggregate_end
    global func_info_begin_lineno
    global func_info_end_lineno
    global func_admin_begin_lineno
    global func_admin_end_lineno
    with open(file_name, 'r+') as f:
        for i, line in enumerate(f, 1):
            if r'<sect1 id="functions-string">' in line:
                func_string_line_begin_lineno = i
            elif r'<sect1 id="functions-binarystring">' in line:
                func_string_line_end_lineno = i - 1
            elif r'<sect1 id="functions-matching">' in line:
                func_matching_begin_lineno = i
            elif r'<sect1 id="functions-formatting">' in line:
                func_matching_end_lineno = i - 1
            elif r'<sect1 id="functions-datetime">' in line:
                func_datetime_begin_lineno = i
            elif r'<sect1 id="functions-enum">' in line:
                func_datetime_end_lineno = i - 1
            elif r'<sect1 id="functions-json">' in line:
                func_json_begin_lineno = i
            elif r'<sect1 id="functions-sequence">' in line:
                func_json_end_lineno = i - 1
            elif r'<sect1 id="functions-aggregate">' in line:
                func_aggregate_begin = i
            elif r'<sect1 id="functions-window">' in line:
                func_aggregate_end = i - 1
            elif r'<sect1 id="functions-info">' in line:
                func_info_begin_lineno = i
            elif r'<sect1 id="functions-admin">' in line:
                func_admin_begin_lineno = i
                func_info_end_lineno = i - 1
            elif r'<sect1 id="functions-trigger">' in line:
                func_admin_end_lineno = i - 1

#line number is important property. check line begin and line end
def precheck_line_info():
    if ((func_string_line_begin_lineno < 0) or (func_string_line_end_lineno < 0) or
        (func_matching_begin_lineno < 0) or (func_matching_end_lineno < 0) or
        (func_datetime_begin_lineno < 0) or (func_datetime_end_lineno < 0) or
        (func_json_begin_lineno < 0) or (func_json_end_lineno < 0) or
        (func_aggregate_begin < 0) or (func_aggregate_end < 0) or
        (func_info_begin_lineno < 0) or (func_info_end_lineno < 0) or
        (func_admin_begin_lineno < 0) or (func_admin_end_lineno < 0)):
        ValueError("don't have related file")
        print("quiting")
        quit()

#line number is important property.
def precheck_line_begining_info():
    if ((func_string_line_begin_lineno < 0) or
        (func_matching_begin_lineno < 0) or
        (func_datetime_begin_lineno < 0) or
        (func_json_begin_lineno < 0) or
        (func_aggregate_begin < 0) or
        (func_info_begin_lineno < 0) or
        (func_admin_begin_lineno < 0) ):
        ValueError("don't have related file")
        print("quiting")
        quit()

#validate new file
def validate_new_file(file_name: str, pattern: str):
    matches_all = []
    with open(file_name, 'r+') as f:
        for i, line in enumerate(f, 1):
            matches = re.findall(pattern, line)
            if (len(matches) > 0):
                matches_all = matches_all + matches
        if (len(matches_all) != 2):
            print(f'{file_name} should only have 2 \"{pattern}\" attribute')
            quit()

#because &func-info, &func-admin section is nearby, we need special care to the func_info_end_lineno number.
def get_line_number_speical(file_name: str):
    global func_info_begin_lineno
    global func_info_end_lineno
    with open(file_name, 'r+') as f:
        for i, line in enumerate(f, 1):
            if func_admin_place_holder in line:
                func_info_end_lineno = i - 2

# insert content before line X
def write_line(file_name:str, line:int, content:str):
  line -= 1 # Python starts counting at 0, but people start counting at one. This accounts for that.
  with open(file_name, "r") as file: # Open the file in read mode
    lines = file.readlines() # Assign the file as a list to a variable
    lines[line] = lines[line] + content  # concatenate the content
  with open(file_name, "w") as file: # Open the file in write mode
    file.write("".join(lines)) # Write the modified content to the file

#--------------------------step1. get the line number info and validate it.
get_line_number(target_file)
precheck_line_info()


#--------------------------step2. wrirte place_holderstring to it.
write_line(target_file,(func_string_line_begin_lineno -1), func_string_place_holder)
write_line(target_file,(func_matching_begin_lineno), func_matching_place_holder)
write_line(target_file,(func_datetime_begin_lineno +1), func_datetime_place_holder)
write_line(target_file,(func_json_begin_lineno +2), func_json_place_holder)
write_line(target_file,(func_aggregate_begin +3), func_aggregate_place_holder)
write_line(target_file,(func_info_begin_lineno +4), func_info_place_holder)
write_line(target_file,(func_admin_begin_lineno + 5), func_admin_place_holder)

#re-evaulate the line number again
get_line_number(target_file)
get_line_number_speical(target_file)
precheck_line_info()

#--------------------------step3 construct sed command and execute it
sed_command_string = f'sed -n {func_string_line_begin_lineno},{func_string_line_end_lineno}p {target_file} > {func_string}'
sed_command_matching  = f'sed -n {func_matching_begin_lineno},{func_matching_end_lineno}p {target_file} > {func_matching}'
sed_command_datetime = f'sed -n {func_datetime_begin_lineno},{func_datetime_end_lineno}p {target_file} > {func_datetime}'
sed_command_json = f'sed -n {func_json_begin_lineno},{func_json_end_lineno}p {target_file} > {func_json}'
sed_command_aggregate = f'sed -n {func_aggregate_begin},{func_aggregate_end}p {target_file} > {func_aggregate}'
sed_command_func_info = f'sed -n {func_info_begin_lineno},{func_info_end_lineno}p {target_file} > {func_info}'
sed_command_func_admin = f'sed -n {func_admin_begin_lineno},{func_admin_end_lineno}p {target_file} > {func_admin}'
def print_sed_command():
    print(sed_command_string)
    print(sed_command_matching)
    print(sed_command_datetime)
    print(sed_command_json)
    print(sed_command_aggregate)
    print(sed_command_func_info)
    print(sed_command_func_admin)

print_sed_command()
subprocess.call([sed_command_string], shell=True)
subprocess.call([sed_command_matching], shell=True)
subprocess.call([sed_command_datetime], shell=True)
subprocess.call([sed_command_json], shell=True)
subprocess.call([sed_command_aggregate], shell=True)
subprocess.call([sed_command_func_info], shell=True)
subprocess.call([sed_command_func_admin], shell=True)

#--------------------------step4 validate new file's content.
validate_new_file(func_string,"sect1")
validate_new_file(func_matching,"sect1")
validate_new_file(func_datetime,"sect1")
validate_new_file(func_json,"sect1")
validate_new_file(func_aggregate,"sect1")
validate_new_file(func_info,"sect1")
validate_new_file(func_admin,"sect1")
get_line_number(func_string)
get_line_number(func_matching)
get_line_number(func_datetime)
get_line_number(func_json)
get_line_number(func_aggregate)
get_line_number(func_info)
get_line_number(func_admin)
precheck_line_begining_info()

#--------------------------step5 validate new file
get_line_number(target_file)
get_line_number_speical(target_file)
precheck_line_info()
printall()

sed_in_place_delete = 'sed --in-place "{0},{1}d ; {2},{3}d ; {4},{5}d ; {6},{7}d ; {8},{9}d ; {10},{11}d ; {12},{13}d " '.format(
    func_string_line_begin_lineno,
    func_string_line_end_lineno,
    func_matching_begin_lineno,
    func_matching_end_lineno,
    func_datetime_begin_lineno,
    func_datetime_end_lineno,
    func_json_begin_lineno,
    func_json_end_lineno,
    func_aggregate_begin,
    func_aggregate_end,
    func_info_begin_lineno,
    func_info_end_lineno,
    func_admin_begin_lineno,
    func_admin_end_lineno,
)
sed_in_place_delete = sed_in_place_delete + target_file
print(sed_in_place_delete)
subprocess.call([sed_in_place_delete], shell=True)

Reply via email to