Re: convert html table to Lyx

Angus Leeming Wed, 29 Sep 2004 14:44:16 -0700

Jeremy C. Reed wrote:

> I made a table in Lyx (cvs version).
> 
> It starts with:
> 
> \begin_inset Tabular
> <lyxtabular version="3" rows="9" columns="3">
> <features>
> <column alignment="center" valignment="top" leftline="true" width="0">
> <column alignment="center" valignment="top" leftline="true" width="0">
> <column alignment="center" valignment="top" leftline="true"
> rightline="true" wid
> th="0">
> <row topline="true" bottomline="true">
> <cell alignment="center" valignment="top" topline="true" leftline="true"
> usebox=
> "none">
> \begin_inset Text
> 
> 
> My problem is that HTML tables (by default) do not know how many columns
> or rows they may have.
> 
> For example, I may have some:
> 
> <table>
> <tr>
> <td>
> data
> </td>
> <td>
> data
> </td>
> </tr>
> <tr>
> <td>
> data
> </td>
> <td>
> data
> </td>
> </tr>
> </table>
> 
> Is there any way in lyx to have tables without defining how many rows or
> columns ahead of time?


No.

I'd agree that LyX's table format sucks, but the data is there in your html
file. Why not just extract it?

Attached is a python script that does it for you.

Regards,
Angus

#! /usr/bin/env python

import re, string, sys

def usage(prog_name):
    print "Usage: %s <data file>\n" % prog_name
    return 1

def error(message):
    sys.stderr.write(message + '\n')
    sys.exit(1)

def read_file(data_file):
    try:
        data = ""
        for line in open(data_file, 'r').readlines():
            data = data + ' ' + string.strip(line)
        return data

    except:
        # Unable to open the file
        error("Unable to open " + data_file)


def delimit_table(data, startpos):
    table_start = string.find(data, "<table", startpos)
    if table_start == -1:
        return None, None
    table_end = string.find(data, "</table>", table_start)
    if table_end == -1:
        return None, None
    return table_start, table_end + len("</table>")


def delimit_row(data, startpos):
    row_start = string.find(data, "<tr", startpos)
    if row_start == -1:
        return None, None
    row_end = string.find(data, "</tr>", row_start)
    if row_end == -1:
        return None, None
    return row_start, row_end + len("</tr>")
    

def delimit_col(data, startpos):
    col_start = string.find(data, "<td", startpos)
    if col_start == -1:
        return None, None
    col_end = string.find(data, "</td>", col_start)
    if col_end == -1:
        return None, None
    return col_start, col_end + len("</td>")
    

def extract_rows_cols(data, start, end):

    rows = 0
    cols = 0
    startrow = start
    while (1):
        startrow, endrow = delimit_row(data, startrow)
        if startrow == None:
            break
        if endrow > end:
            break
        rows = rows + 1

        startcol = startrow
        cols_row = 0
        while (1):
            startcol, endcol = delimit_col(data, startcol)
            if startcol == None:
                break
            if endcol > endrow:
                break
            cols_row = cols_row + 1
            startcol = endcol

        cols = max(cols, cols_row)
        
        startrow = endrow
    return rows, cols
   

def main(argv):
    if len(argv) != 2:
        return usage(argv[0])

    data = read_file(argv[1])

    start = 0
    while (1):
        start, end = delimit_table(data, start)
        if start == None:
            break
        rows, cols = extract_rows_cols(data, start, end)

        print data[start:end]
        print "Table has ", rows, " rows and ", cols, "cols."
        start = end
    
    #print data

if __name__ == "__main__":
    main(sys.argv)

Re: convert html table to Lyx

Reply via email to