Re: [PR] Migrate datafusion/sql tests to insta, part7 [datafusion]

via GitHub Thu, 10 Apr 2025 12:14:53 -0700


qstommyshu commented on PR #15621:
URL: https://github.com/apache/datafusion/pull/15621#issuecomment-2784515058


   > > I wrote a script to migrate the test
   > 
   > Maybe you could attach your script here? So we can use it in the future if 
we have a similar task again?
   
   Not sure if it will be helpful. I wrote this script specifically for 
migrating this specific test case as it has a very unique structure. Probably 
need some modification if we want to use it for other tests.
   
   Here is the script:
   
   ```Python
   import re
   
   
   def process_block(block_lines, test_index):
       fields = {
           "sql": [],
           "expected": [],
           "parser_dialect": [],
           "unparser_dialect": []
       }
       current_field = None
       field_pattern = 
re.compile(r'^\s*(sql|expected|parser_dialect|unparser_dialect)\s*:\s*(.*)')
   
       for line in block_lines:
           m = field_pattern.match(line)
           if m:
               current_field = m.group(1)
               content = m.group(2)
               fields[current_field].append(content)
           else:
               if current_field is not None:
                   # skip if } is found
                   if re.match(r'^\s*\},\s*$', line):
                       continue
                   fields[current_field].append(line.rstrip("\n"))
   
       # concat fields into a single line
       for key in fields:
           combined = "\n".join(fields[key]).strip()
           if combined.endswith(','):
               combined = combined[:-1].rstrip()
           fields[key] = combined
   
       # keep the sql field as it is
       sql_value = fields["sql"]
   
       # process expected field
       expected_lines = fields["expected"].splitlines()
       expected_comments = []
       expected_string_lines = []
   
       in_string = False
       for line in expected_lines:
           stripped = line.strip()
           if not in_string and stripped.startswith("//"):
               expected_comments.append(line.rstrip())
           else:
               # in_string once we met a non commented line
               in_string = True
               expected_string_lines.append(line.rstrip())
       expected_string = "\n".join(expected_string_lines).strip()
       # remove extra comma
       if expected_string.endswith(','):
           expected_string = expected_string[:-1].rstrip()
   
       # remove Box::new()
       def process_dialect(value):
           value = value.strip()
           m = re.match(r'Box::new\((.*)\)', value)
           if m:
               return m.group(1).strip()
           else:
               return value
   
       parser_dialect_value = process_dialect(fields["parser_dialect"])
       unparser_dialect_value = process_dialect(fields["unparser_dialect"])
   
       # construction new tests
       new_test = []
       new_test.append(f"#[test]")
       new_test.append(f"fn roundtrip_statement_with_dialect_{test_index}() -> 
Result<(), DataFusionError> {{")
       new_test.append(f"    roundtrip_statement_with_dialect_helper!(")
       new_test.append(f'        sql: {sql_value},')
       new_test.append(f'        parser_dialect: {parser_dialect_value},')
       new_test.append(f'        unparser_dialect: {unparser_dialect_value},')
       # put back comments
       for comment in expected_comments:
           new_test.append(f'        {comment}')
       new_test.append(f'        expected: @{expected_string},')
       new_test.append(f"    );")
       new_test.append(f"    Ok(())")
       new_test.append(f"}}")
   
       return "\n".join(new_test)
   
   
   def transform_file(filename):
       with open(filename, "r", encoding="utf-8") as f:
           lines = f.readlines()
   
       output_lines = []
       in_block = False
       block_lines = []
       test_index = 1
   
       for line in lines:
           if not in_block:
               # new block
               if "TestStatementWithDialect {" in line:
                   in_block = True
                   block_lines = [line]
               else:
                   output_lines.append(line)
           else:
               block_lines.append(line)
               if re.match(r'^\s*\},\s*$', line):
                   transformed_block = process_block(block_lines, test_index)
                   test_index += 1
                   output_lines.append(transformed_block + "\n")
                   in_block = False
                   block_lines = []
   
       # write back to the original file
       with open(filename, "w", encoding="utf-8") as f:
           f.write("".join(output_lines))
   
   
   if __name__ == "__main__":
       transform_file("macro.txt")
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Re: [PR] Migrate datafusion/sql tests to insta, part7 [datafusion]

Reply via email to