Attached is a new CSV2lyx importer written mainly by Hartmut Haase.
It uses Python's built in CSV reader that is available since Python 2.3. I added a method to
automatically detect the correct column separator.
There are only minor tweaks to do I think. I'm sending it that the Python masters could have a look
at the basic design - I'm sure you find some optimizations ;-)
Attached are some small testfiles made with OpenOffice that allows you to specify the column
separator when creating CSV files.
regards Uwe
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# file csv2lyx.py
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.
# author Hartmut Haase
# author Uwe Stöhr
# Full author contact details are available in file CREDITS
# This script reads a csv-table (file name.csv) and converts it into
# a LyX-table for versions 1.5.0 and higher (LyX table format 276).
# It uses Python's csv module for parsing.
# The original csv2lyx was witten by Antonio Gulino <[EMAIL PROTECTED]>
# in Perl for LyX 1.x and modified for LyX table format 276 by the author.
#
import csv, os, re, string, sys, unicodedata
def error(message):
sys.stderr.write(message + '\n')
sys.exit(1)
# processing command line options
if len(sys.argv) == 1 or sys.argv[1] == '--help':
print '''Usage:
csv2lyx [options] mycsvfile mytmptable.lyx
This script creates a LyX document containing a table
from a comma-separated-value file. The LyX file has format 276
and can be opened with LyX 1.5.0 and newer.
Options:
-e 'character' Excel type, default is 'n'
= 'e': Excel-generated CSV file
= 't': Excel-generated TAB-delimited CSV file
-s 'character' column separator, default is ','
--help usage instructions
Remarks:
If your .csv file contains special characters (e. g. umlauts,
accented letters, etc.) make sure it is coded in UTF-8 (unicode).
Else LyX will loose some cell contents.
If your .csv file was not written according to the
"Common Format and MIME Type for Comma-Separated Values (CSV) Files"
(http://tools.ietf.org/html/rfc4180)
there may be unexpected results.'''
sys.exit(0)
# print len(sys.argv), sys.argv
excel = 'n'
infile = ""
# the default column separator for CSV is of course the comma
column_sep = ','
dia_excel = 'none'
if len(sys.argv) == 3:
infile = sys.argv[1]
outfile = sys.argv[2]
elif len(sys.argv) == 5:
infile = sys.argv[3]
outfile = sys.argv[4]
if sys.argv[1] == '-s':
column_sep = sys.argv[2]
elif sys.argv[1] == '-e':
excel = sys.argv[2]
elif len(sys.argv) == 7:
infile = sys.argv[5]
outfile = sys.argv[6]
if sys.argv[1] == '-s':
column_sep = sys.argv[2]
elif sys.argv[1] == '-e':
excel = sys.argv[2]
if sys.argv[3] == '-s':
column_sep = sys.argv[4]
elif sys.argv[3] == '-e':
excel = sys.argv[4]
if not os.path.exists(infile):
error('File "%s" not found.' % infile)
# look for dialects
if excel == 'e':
dia_excel = 'excel'
elif excel == 't':
dia_excel = 'excel-tab'
#print 'excel ', dia_excel
# when no special column separator is given, try to detect it:
# 1. open the file as standard text file
# 2. count the different allowed separator characters
# 3. set the delimiter
# (The encoding doesn't matter because the allowed delimiters are on the same
# position in any code table.)
if column_sep == ",":
textfile = open(infile)
# the allowed delimiters:
counter = 0
colon = 0
semicolon = 0
space = 0
tab = 0
# read the text file
while True:
line = textfile.readline()
if not line:
break
counter += 1
if line.find(":") != -1:
colon += 1
if line.find(";") != -1:
semicolon += 1
if line.find(" ") != -1:
space += 1
if line.find("\t") != -1:
tab += 1
# close the text file
textfile.close()
# set the delimiter according to this rule:
# When a delimiter character appears at least as much as number of text
# lines it is the delimiter character.
# This works for more than 95 % of all files.
if tab >= counter:
column_sep = "\t"
if colon >= counter:
column_sep = ":"
if semicolon >= counter:
column_sep = ";"
if space >= counter:
column_sep = " "
# read input
if dia_excel == 'none':
reader = csv.reader(open(infile, "rb"), delimiter=column_sep)
else:
reader = csv.reader(open(infile, "rb"), dialect=dia_excel,
delimiter=column_sep)
num_cols = 1 # max columns
rows = []
for row in reader:
#print row
num_cols = max(num_cols, len(row))
rows.append(row)
#print rows
num_rows = reader.line_num # number of lines
#print 'num_rows', reader.line_num
#print 'num_cols', num_cols
# create a LyX file
fout = open(outfile, 'w')
#####################
# write first part
####################
fout.write("""#csv2lyx created this file
\lyxformat 276
\\begin_document
\\begin_header
\\textclass article
\\inputencoding auto
\\font_roman default
\\font_sans default
\\font_typewriter default
\\font_default_family default
\\font_sc false
\\font_osf false
\\font_sf_scale 100
\\font_tt_scale 100
\\graphics default
\\paperfontsize default
\\papersize default
\\use_geometry false
\\use_amsmath 1
\\use_esint 0
\\cite_engine basic
\\use_bibtopic false
\\paperorientation portrait
\\secnumdepth 3
\\tocdepth 3
\\paragraph_separation indent
\\defskip medskip
\\papercolumns 1
\\papersides 1
\\paperpagestyle default
\\tracking_changes false
\\output_changes false
\\end_header
\\begin_body
\\begin_layout Standard
\\align left
\\begin_inset Tabular
""")
fout.write('<lyxtabular version="3" rows=\"' + str(num_rows) + '\" columns=\"'
+ str(num_cols) + '\">\n')
fout.write('<features>\n')
#####################
# write table
####################
i = 0
while i < num_cols:
fout.write('<column alignment="left" valignment="top" width="0pt">\n')
i += 1
j = 0
while j < num_rows:
fout.write('<row>\n')
num_cols_2 = len(rows[j]) # columns in current row
#print j, ': ' , rows[j]
############################
# write contents of one line
############################
i = 0
while i < num_cols_2:
fout.write("""<cell alignment="left" valignment="top"
usebox="none">
\\begin_inset Text
\\begin_layout Standard\n""")
#print rows[j][i]
fout.write(rows[j][i])
fout.write('\n\\end_layout\n\n\\end_inset\n</cell>\n')
i += 1
# If row has less columns than num_cols
if num_cols_2 < num_cols:
while i < num_cols:
fout.write("""<cell alignment="left" valignment="top"
usebox="none">
\\begin_inset Text
\\begin_layout Standard\n""")
fout.write(' ')
fout.write('\n\\end_layout\n\n\\end_inset\n</cell>\n')
i += 1
fout.write('</row>\n')
j += 1
#####################
# write last part
####################
fout.write("""</lyxtabular>
\\end_inset
\\end_layout
\\end_body
\\end_document\n""")
# close the LyX file
fout.close()
"asda" "asdasd"
"asda" "asdasd"
3456 36
4.56 478
58 568
56,78 568
568 568
"asda","asdasd"
"asda","asdasd"
3456,36
4.56,478
58,568
56,78,568
568,568
"asda";"asdasd"
"asda";"asdasd"
3456;36
4.56;478
58;568
56,78;568
568;568
"asda":"asdasd"
"asda":"asdasd"
3456:36
4.56:478
58:568
56,78:568
568:568
"asda" "asdasd"
"asda" "asdasd"
3456 36
4.56 478
58 568
56,78 568
568 568