Package: flex Version: 2.5.35-2 Severity: normal On ia64, flex appears to generate a completely different (and broken) output depending on whether the input file is specified using shell redirection or via a pipe.
For example, compare:
merulo% flex -t < input.txt | head
#line 3 "<stdout>"
#define YY_INT_ALIGNED short int
/* A lexical scanner generated by flex */
#define FLEX_SCANNER
#define YY_FLEX_MAJOR_VERSION 2
#define YY_FLEX_MINOR_VERSION 5
with:
merulo% cat input.txt | flex -t | head
{
bufferAppend(buffer, yytext);
yy_push_state(C24);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\object" {
bufferAppend(buffer, yytext);
yy_push_state(C25);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\docvar" {
bufferAppend(buffer, yytext);
This latter output is clearly broken.
This appears to be the cause of #500171, which is in turn blocking the
transition of a version of apertium fixing an RC bug, #496395. (In case it
isn't obvious, "cat" is being used as minimal testcase - see #500171 for the
full log.)
The offending input.txt is attached.
Regards,
--
,''`.
: :' : Chris Lamb
`. `'` [EMAIL PROTECTED]
`-
%{
#include <cstdlib>
#include <iostream>
#include <map>
#include <vector>
#include <regex.h>
#include <string>
#include <lttoolbox/lt_locale.h>
#include <lttoolbox/ltstr.h>
using namespace std;
wstring buffer;
string symbuf = "";
bool isDot, hasWrite_dot, hasWrite_white;
FILE *formatfile;
string last;
int current;
long int offset;
vector<long int> offsets;
vector<wstring> tags;
vector<int> orders;
regex_t escape_chars;
regex_t names_regexp;
void bufferAppend(wstring &buf, string const &str)
{
symbuf.append(str);
for(size_t i = 0, limit = symbuf.size(); i < limit;)
{
wchar_t symbol;
int gap = mbtowc(&symbol, symbuf.c_str() + i, MB_CUR_MAX);
if(gap == -1)
{
if(i + MB_CUR_MAX < limit)
{
buf += L'?';
gap = 1;
}
else
{
symbuf = symbuf.substr(i);
return;
}
}
else
{
buf += symbol;
}
i += gap;
}
symbuf = "";
return;
}
void init_escape()
{
if(regcomp(&escape_chars, "\\\\|[][<>@^$/]", REG_EXTENDED))
{
cerr << "ERROR: Illegal regular expression for escape characters" << endl;
exit(EXIT_FAILURE);
}
}
void init_tagNames()
{
if(regcomp(&names_regexp, "", REG_EXTENDED))
{
cerr << "ERROR: Illegal regular expression for tag-names" << endl;
exit(EXIT_FAILURE);
}
}
string backslash(string const &str)
{
string new_str = "";
for(unsigned int i = 0; i < str.size(); i++)
{
if(str[i] == '\\')
{
new_str += str[i];
}
new_str += str[i];
}
return new_str;
}
wstring escape(string const &str)
{
regmatch_t pmatch;
char const *mystring = str.c_str();
int base = 0;
wstring result = L"";
while(!regexec(&escape_chars, mystring + base, 1, &pmatch, 0))
{
bufferAppend(result, str.substr(base, pmatch.rm_so));
result += L'\\';
wchar_t micaracter;
int pos = mbtowc(&micaracter, str.c_str() + base + pmatch.rm_so,
MB_CUR_MAX);
if(pos == -1)
{
wcerr << L"Uno" << endl;
wcerr << L"Encoding error." << endl;
exit(EXIT_FAILURE);
}
result += micaracter;
base += pmatch.rm_eo;
}
bufferAppend(result, str.substr(base));
return result;
}
wstring escape(wstring const &str)
{
string dest = "";
for(size_t i = 0, limit = str.size(); i < limit; i++)
{
char symbol[MB_CUR_MAX+1];
int pos = wctomb(symbol, str[i]);
if(pos == -1)
{
symbol[0]='?';
pos = 1;
}
symbol[pos] = 0;
dest.append(symbol);
}
return escape(dest);
}
string get_tagName(string tag){
regmatch_t pmatch;
char const *mystring = tag.c_str();
string result = "";
if(!regexec(&names_regexp, mystring, 1, &pmatch, 0))
{
result=tag.substr(pmatch.rm_so, pmatch.rm_eo - pmatch.rm_so);
return result;
}
return "";
}
map<string, wstring, Ltstr> S1_substitution;
void S1_init()
{
S1_substitution["\\'8a"] = L"Å ";
S1_substitution["\\'8c"] = L"Å";
S1_substitution["\\'8d"] = L"Ť";
S1_substitution["\\'8e"] = L"Ž";
S1_substitution["\\'8f"] = L"Ź";
S1_substitution["\\'9a"] = L"Å¡";
S1_substitution["\\'9c"] = L"Å";
S1_substitution["\\'9d"] = L"Å¥";
S1_substitution["\\'9e"] = L"ž";
S1_substitution["\\'9f"] = L"ź";
S1_substitution["\\'a3"] = L"Å";
S1_substitution["\\'a5"] = L"Ä";
S1_substitution["\\'aa"] = L"Å";
S1_substitution["\\'af"] = L"Å»";
S1_substitution["\\'b3"] = L"Å";
S1_substitution["\\'b5"] = L"µ";
S1_substitution["\\'b9"] = L"Ä
";
S1_substitution["\\'ba"] = L"Å";
S1_substitution["\\'bc"] = L"Ľ";
S1_substitution["\\'be"] = L"ľ";
S1_substitution["\\'bf"] = L"ż";
S1_substitution["\\'c0"] = L"Å";
S1_substitution["\\'c1"] = L"Ã";
S1_substitution["\\'c2"] = L"Ã";
S1_substitution["\\'c3"] = L"Ä";
S1_substitution["\\'c4"] = L"Ã";
S1_substitution["\\'c5"] = L"Ĺ";
S1_substitution["\\'c6"] = L"Ä";
S1_substitution["\\'c7"] = L"Ã";
S1_substitution["\\'c8"] = L"Ä";
S1_substitution["\\'c9"] = L"Ã";
S1_substitution["\\'ca"] = L"Ä";
S1_substitution["\\'cb"] = L"Ã";
S1_substitution["\\'cc"] = L"Ä";
S1_substitution["\\'cd"] = L"Ã";
S1_substitution["\\'ce"] = L"Ã";
S1_substitution["\\'cf"] = L"Ä";
S1_substitution["\\'d0"] = L"Ã";
S1_substitution["\\'d1"] = L"Å";
S1_substitution["\\'d2"] = L"Å";
S1_substitution["\\'d3"] = L"Ã";
S1_substitution["\\'d4"] = L"Ã";
S1_substitution["\\'d5"] = L"Å";
S1_substitution["\\'d6"] = L"Ã";
S1_substitution["\\'d8"] = L"Å";
S1_substitution["\\'d9"] = L"Å®";
S1_substitution["\\'da"] = L"Ã";
S1_substitution["\\'db"] = L"Ű";
S1_substitution["\\'dc"] = L"Ã";
S1_substitution["\\'dd"] = L"Ã";
S1_substitution["\\'de"] = L"Å¢";
S1_substitution["\\'df"] = L"Ã";
S1_substitution["\\'e0"] = L"Å";
S1_substitution["\\'e1"] = L"á";
S1_substitution["\\'e2"] = L"â";
S1_substitution["\\'e3"] = L"Ä";
S1_substitution["\\'e4"] = L"ä";
S1_substitution["\\'e5"] = L"ĺ";
S1_substitution["\\'e6"] = L"Ä";
S1_substitution["\\'e7"] = L"ç";
S1_substitution["\\'e8"] = L"Ä";
S1_substitution["\\'e9"] = L"é";
S1_substitution["\\'ea"] = L"Ä";
S1_substitution["\\'eb"] = L"ë";
S1_substitution["\\'ec"] = L"Ä";
S1_substitution["\\'ed"] = L"Ã";
S1_substitution["\\'ee"] = L"î";
S1_substitution["\\'ef"] = L"Ä";
S1_substitution["\\'f0"] = L"Ä";
S1_substitution["\\'f1"] = L"Å";
S1_substitution["\\'f2"] = L"Å";
S1_substitution["\\'f3"] = L"ó";
S1_substitution["\\'f4"] = L"ô";
S1_substitution["\\'f5"] = L"Å";
S1_substitution["\\'f6"] = L"ö";
S1_substitution["\\'f8"] = L"Å";
S1_substitution["\\'f9"] = L"ů";
S1_substitution["\\'fa"] = L"ú";
S1_substitution["\\'fb"] = L"ű";
S1_substitution["\\'fc"] = L"ü";
S1_substitution["\\'fd"] = L"ý";
S1_substitution["\\'fe"] = L"Å£";
S1_substitution["\\'ff"] = L"Ë";
S1_substitution["\\'8a\r\n"] = L"Å ";
S1_substitution["\\'8c\r\n"] = L"Å";
S1_substitution["\\'8d\r\n"] = L"Ť";
S1_substitution["\\'8e\r\n"] = L"Ž";
S1_substitution["\\'8f\r\n"] = L"Ź";
S1_substitution["\\'9a\r\n"] = L"Å¡";
S1_substitution["\\'9c\r\n"] = L"Å";
S1_substitution["\\'9d\r\n"] = L"Å¥";
S1_substitution["\\'9e\r\n"] = L"ž";
S1_substitution["\\'9f\r\n"] = L"ź";
S1_substitution["\\'a3\r\n"] = L"Å";
S1_substitution["\\'a5\r\n"] = L"Ä";
S1_substitution["\\'aa\r\n"] = L"Å";
S1_substitution["\\'af\r\n"] = L"Å»";
S1_substitution["\\'b3\r\n"] = L"Å";
S1_substitution["\\'b5\r\n"] = L"µ";
S1_substitution["\\'b9\r\n"] = L"Ä
";
S1_substitution["\\'ba\r\n"] = L"Å";
S1_substitution["\\'bc\r\n"] = L"Ľ";
S1_substitution["\\'be\r\n"] = L"ľ";
S1_substitution["\\'bf\r\n"] = L"ż";
S1_substitution["\\'c0\r\n"] = L"Å";
S1_substitution["\\'c1\r\n"] = L"Ã";
S1_substitution["\\'c2\r\n"] = L"Ã";
S1_substitution["\\'c3\r\n"] = L"Ä";
S1_substitution["\\'c4\r\n"] = L"Ã";
S1_substitution["\\'c5\r\n"] = L"Ĺ";
S1_substitution["\\'c6\r\n"] = L"Ä";
S1_substitution["\\'c7\r\n"] = L"Ã";
S1_substitution["\\'c8\r\n"] = L"Ä";
S1_substitution["\\'c9\r\n"] = L"Ã";
S1_substitution["\\'ca\r\n"] = L"Ä";
S1_substitution["\\'cb\r\n"] = L"Ã";
S1_substitution["\\'cc\r\n"] = L"Ä";
S1_substitution["\\'cd\r\n"] = L"Ã";
S1_substitution["\\'ce\r\n"] = L"Ã";
S1_substitution["\\'cf\r\n"] = L"Ä";
S1_substitution["\\'d0\r\n"] = L"Ã";
S1_substitution["\\'d1\r\n"] = L"Å";
S1_substitution["\\'d2\r\n"] = L"Å";
S1_substitution["\\'d3\r\n"] = L"Ã";
S1_substitution["\\'d4\r\n"] = L"Ã";
S1_substitution["\\'d5\r\n"] = L"Å";
S1_substitution["\\'d6\r\n"] = L"Ã";
S1_substitution["\\'d8\r\n"] = L"Å";
S1_substitution["\\'d9\r\n"] = L"Å®";
S1_substitution["\\'da\r\n"] = L"Ã";
S1_substitution["\\'db\r\n"] = L"Ű";
S1_substitution["\\'dc\r\n"] = L"Ã";
S1_substitution["\\'dd\r\n"] = L"Ã";
S1_substitution["\\'de\r\n"] = L"Å¢";
S1_substitution["\\'df\r\n"] = L"Ã";
S1_substitution["\\'e0\r\n"] = L"Å";
S1_substitution["\\'e1\r\n"] = L"á";
S1_substitution["\\'e2\r\n"] = L"â";
S1_substitution["\\'e3\r\n"] = L"Ä";
S1_substitution["\\'e4\r\n"] = L"ä";
S1_substitution["\\'e5\r\n"] = L"ĺ";
S1_substitution["\\'e6\r\n"] = L"Ä";
S1_substitution["\\'e7\r\n"] = L"ç";
S1_substitution["\\'e8\r\n"] = L"Ä";
S1_substitution["\\'e9\r\n"] = L"é";
S1_substitution["\\'ea\r\n"] = L"Ä";
S1_substitution["\\'eb\r\n"] = L"ë";
S1_substitution["\\'ec\r\n"] = L"Ä";
S1_substitution["\\'ed\r\n"] = L"Ã";
S1_substitution["\\'ee\r\n"] = L"î";
S1_substitution["\\'ef\r\n"] = L"Ä";
S1_substitution["\\'f0\r\n"] = L"Ä";
S1_substitution["\\'f1\r\n"] = L"Å";
S1_substitution["\\'f2\r\n"] = L"Å";
S1_substitution["\\'f3\r\n"] = L"ó";
S1_substitution["\\'f4\r\n"] = L"ô";
S1_substitution["\\'f5\r\n"] = L"Å";
S1_substitution["\\'f6\r\n"] = L"ö";
S1_substitution["\\'f8\r\n"] = L"Å";
S1_substitution["\\'f9\r\n"] = L"ů";
S1_substitution["\\'fa\r\n"] = L"ú";
S1_substitution["\\'fb\r\n"] = L"ű";
S1_substitution["\\'fc\r\n"] = L"ü";
S1_substitution["\\'fd\r\n"] = L"ý";
S1_substitution["\\'fe\r\n"] = L"Å£";
S1_substitution["\\'ff\r\n"] = L"Ë";
S1_substitution["\\'8a\n"] = L"Å ";
S1_substitution["\\'8c\n"] = L"Å";
S1_substitution["\\'8d\n"] = L"Ť";
S1_substitution["\\'8e\n"] = L"Ž";
S1_substitution["\\'8f\n"] = L"Ź";
S1_substitution["\\'9a\n"] = L"Å¡";
S1_substitution["\\'9c\n"] = L"Å";
S1_substitution["\\'9d\n"] = L"Å¥";
S1_substitution["\\'9e\n"] = L"ž";
S1_substitution["\\'9f\n"] = L"ź";
S1_substitution["\\'a3\n"] = L"Å";
S1_substitution["\\'a5\n"] = L"Ä";
S1_substitution["\\'aa\n"] = L"Å";
S1_substitution["\\'af\n"] = L"Å»";
S1_substitution["\\'b3\n"] = L"Å";
S1_substitution["\\'b5\n"] = L"µ";
S1_substitution["\\'b9\n"] = L"Ä
";
S1_substitution["\\'ba\n"] = L"Å";
S1_substitution["\\'bc\n"] = L"Ľ";
S1_substitution["\\'be\n"] = L"ľ";
S1_substitution["\\'bf\n"] = L"ż";
S1_substitution["\\'c0\n"] = L"Å";
S1_substitution["\\'c1\n"] = L"Ã";
S1_substitution["\\'c2\n"] = L"Ã";
S1_substitution["\\'c3\n"] = L"Ä";
S1_substitution["\\'c4\n"] = L"Ã";
S1_substitution["\\'c5\n"] = L"Ĺ";
S1_substitution["\\'c6\n"] = L"Ä";
S1_substitution["\\'c7\n"] = L"Ã";
S1_substitution["\\'c8\n"] = L"Ä";
S1_substitution["\\'c9\n"] = L"Ã";
S1_substitution["\\'ca\n"] = L"Ä";
S1_substitution["\\'cb\n"] = L"Ã";
S1_substitution["\\'cc\n"] = L"Ä";
S1_substitution["\\'cd\n"] = L"Ã";
S1_substitution["\\'ce\n"] = L"Ã";
S1_substitution["\\'cf\n"] = L"Ä";
S1_substitution["\\'d0\n"] = L"Ã";
S1_substitution["\\'d1\n"] = L"Å";
S1_substitution["\\'d2\n"] = L"Å";
S1_substitution["\\'d3\n"] = L"Ã";
S1_substitution["\\'d4\n"] = L"Ã";
S1_substitution["\\'d5\n"] = L"Å";
S1_substitution["\\'d6\n"] = L"Ã";
S1_substitution["\\'d8\n"] = L"Å";
S1_substitution["\\'d9\n"] = L"Å®";
S1_substitution["\\'da\n"] = L"Ã";
S1_substitution["\\'db\n"] = L"Ű";
S1_substitution["\\'dc\n"] = L"Ã";
S1_substitution["\\'dd\n"] = L"Ã";
S1_substitution["\\'de\n"] = L"Å¢";
S1_substitution["\\'df\n"] = L"Ã";
S1_substitution["\\'e0\n"] = L"Å";
S1_substitution["\\'e1\n"] = L"á";
S1_substitution["\\'e2\n"] = L"â";
S1_substitution["\\'e3\n"] = L"Ä";
S1_substitution["\\'e4\n"] = L"ä";
S1_substitution["\\'e5\n"] = L"ĺ";
S1_substitution["\\'e6\n"] = L"Ä";
S1_substitution["\\'e7\n"] = L"ç";
S1_substitution["\\'e8\n"] = L"Ä";
S1_substitution["\\'e9\n"] = L"é";
S1_substitution["\\'ea\n"] = L"Ä";
S1_substitution["\\'eb\n"] = L"ë";
S1_substitution["\\'ec\n"] = L"Ä";
S1_substitution["\\'ed\n"] = L"Ã";
S1_substitution["\\'ee\n"] = L"î";
S1_substitution["\\'ef\n"] = L"Ä";
S1_substitution["\\'f0\n"] = L"Ä";
S1_substitution["\\'f1\n"] = L"Å";
S1_substitution["\\'f2\n"] = L"Å";
S1_substitution["\\'f3\n"] = L"ó";
S1_substitution["\\'f4\n"] = L"ô";
S1_substitution["\\'f5\n"] = L"Å";
S1_substitution["\\'f6\n"] = L"ö";
S1_substitution["\\'f8\n"] = L"Å";
S1_substitution["\\'f9\n"] = L"ů";
S1_substitution["\\'fa\n"] = L"ú";
S1_substitution["\\'fb\n"] = L"ű";
S1_substitution["\\'fc\n"] = L"ü";
S1_substitution["\\'fd\n"] = L"ý";
S1_substitution["\\'fe\n"] = L"Å£";
S1_substitution["\\'ff\n"] = L"Ë";
S1_substitution["\\'8a\r"] = L"Å ";
S1_substitution["\\'8c\r"] = L"Å";
S1_substitution["\\'8d\r"] = L"Ť";
S1_substitution["\\'8e\r"] = L"Ž";
S1_substitution["\\'8f\r"] = L"Ź";
S1_substitution["\\'9a\r"] = L"Å¡";
S1_substitution["\\'9c\r"] = L"Å";
S1_substitution["\\'9d\r"] = L"Å¥";
S1_substitution["\\'9e\r"] = L"ž";
S1_substitution["\\'9f\r"] = L"ź";
S1_substitution["\\'a3\r"] = L"Å";
S1_substitution["\\'a5\r"] = L"Ä";
S1_substitution["\\'aa\r"] = L"Å";
S1_substitution["\\'af\r"] = L"Å»";
S1_substitution["\\'b3\r"] = L"Å";
S1_substitution["\\'b5\r"] = L"µ";
S1_substitution["\\'b9\r"] = L"Ä
";
S1_substitution["\\'ba\r"] = L"Å";
S1_substitution["\\'bc\r"] = L"Ľ";
S1_substitution["\\'be\r"] = L"ľ";
S1_substitution["\\'bf\r"] = L"ż";
S1_substitution["\\'c0\r"] = L"Å";
S1_substitution["\\'c1\r"] = L"Ã";
S1_substitution["\\'c2\r"] = L"Ã";
S1_substitution["\\'c3\r"] = L"Ä";
S1_substitution["\\'c4\r"] = L"Ã";
S1_substitution["\\'c5\r"] = L"Ĺ";
S1_substitution["\\'c6\r"] = L"Ä";
S1_substitution["\\'c7\r"] = L"Ã";
S1_substitution["\\'c8\r"] = L"Ä";
S1_substitution["\\'c9\r"] = L"Ã";
S1_substitution["\\'ca\r"] = L"Ä";
S1_substitution["\\'cb\r"] = L"Ã";
S1_substitution["\\'cc\r"] = L"Ä";
S1_substitution["\\'cd\r"] = L"Ã";
S1_substitution["\\'ce\r"] = L"Ã";
S1_substitution["\\'cf\r"] = L"Ä";
S1_substitution["\\'d0\r"] = L"Ã";
S1_substitution["\\'d1\r"] = L"Å";
S1_substitution["\\'d2\r"] = L"Å";
S1_substitution["\\'d3\r"] = L"Ã";
S1_substitution["\\'d4\r"] = L"Ã";
S1_substitution["\\'d5\r"] = L"Å";
S1_substitution["\\'d6\r"] = L"Ã";
S1_substitution["\\'d8\r"] = L"Å";
S1_substitution["\\'d9\r"] = L"Å®";
S1_substitution["\\'da\r"] = L"Ã";
S1_substitution["\\'db\r"] = L"Ű";
S1_substitution["\\'dc\r"] = L"Ã";
S1_substitution["\\'dd\r"] = L"Ã";
S1_substitution["\\'de\r"] = L"Å¢";
S1_substitution["\\'df\r"] = L"Ã";
S1_substitution["\\'e0\r"] = L"Å";
S1_substitution["\\'e1\r"] = L"á";
S1_substitution["\\'e2\r"] = L"â";
S1_substitution["\\'e3\r"] = L"Ä";
S1_substitution["\\'e4\r"] = L"ä";
S1_substitution["\\'e5\r"] = L"ĺ";
S1_substitution["\\'e6\r"] = L"Ä";
S1_substitution["\\'e7\r"] = L"ç";
S1_substitution["\\'e8\r"] = L"Ä";
S1_substitution["\\'e9\r"] = L"é";
S1_substitution["\\'ea\r"] = L"Ä";
S1_substitution["\\'eb\r"] = L"ë";
S1_substitution["\\'ec\r"] = L"Ä";
S1_substitution["\\'ed\r"] = L"Ã";
S1_substitution["\\'ee\r"] = L"î";
S1_substitution["\\'ef\r"] = L"Ä";
S1_substitution["\\'f0\r"] = L"Ä";
S1_substitution["\\'f1\r"] = L"Å";
S1_substitution["\\'f2\r"] = L"Å";
S1_substitution["\\'f3\r"] = L"ó";
S1_substitution["\\'f4\r"] = L"ô";
S1_substitution["\\'f5\r"] = L"Å";
S1_substitution["\\'f6\r"] = L"ö";
S1_substitution["\\'f8\r"] = L"Å";
S1_substitution["\\'f9\r"] = L"ů";
S1_substitution["\\'fa\r"] = L"ú";
S1_substitution["\\'fb\r"] = L"ű";
S1_substitution["\\'fc\r"] = L"ü";
S1_substitution["\\'fd\r"] = L"ý";
S1_substitution["\\'fe\r"] = L"Å£";
S1_substitution["\\'ff\r"] = L"Ë";
}
void printBuffer()
{
if(isDot)
{
fputws_unlocked(L".[]", yyout);
isDot = false;
}
if(buffer.size() > 8192)
{
string filename = tmpnam(NULL);
FILE *largeblock = fopen(filename.c_str(), "w");
fputws_unlocked(buffer.c_str(), largeblock);
fclose(largeblock);
fputwc_unlocked(L'[', yyout);
fputwc_unlocked(L'@', yyout);
wchar_t cad[filename.size()];
size_t pos = mbstowcs(cad, filename.c_str(), filename.size());
if(pos == (size_t) -1)
{
wcerr << L"Tres" << endl;
wcerr << L"Encoding error." << endl;
exit(EXIT_FAILURE);
}
cad[pos] = 0;
fputws_unlocked(cad, yyout);
fputwc_unlocked(L']', yyout);
}
else if(buffer.size() > 1)
{
fputwc_unlocked(L'[', yyout);
wstring const tmp = escape(buffer);
if(tmp[0] == L'@')
{
fputwc_unlocked(L'\\', yyout);
}
fputws_unlocked(tmp.c_str(), yyout);
fputwc_unlocked(L']', yyout);
}
else if(buffer.size() == 1 && buffer[0] != L' ')
{
fputwc_unlocked(L'[', yyout);
wstring const tmp = escape(buffer);
if(tmp[0] == L'@')
{
fputwc_unlocked(L'\\', yyout);
}
fputws_unlocked(tmp.c_str(), yyout);
fputwc_unlocked(L']', yyout);
}
else
{
fputws_unlocked(buffer.c_str(), yyout);
}
buffer = L"";
}
%}
%x C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13 C14 C15 C16 C17 C18 C19 C20 C21
C22 C23 C24 C25 C26 C27 C28 C29 C30 C31 C32 C33
%option nounput
%option noyywrap
%option caseless
%option stack
%%
<C1>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C2>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C3>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C4>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C5>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C6>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C7>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C8>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C9>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C10>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C11>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C12>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C13>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C14>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C15>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C16>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C17>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C18>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C19>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C20>{
";" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C21>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C22>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C23>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C24>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C25>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C26>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C27>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C28>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C29>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C30>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C31>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C32>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
<C33>{
"}" {
last = "buffer";
bufferAppend(buffer, yytext);
yy_pop_state();
}
\n|. {
last = "buffer";
bufferAppend(buffer, yytext);
}
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\snext" {
bufferAppend(buffer, yytext);
yy_push_state(C1);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\keycode" {
bufferAppend(buffer, yytext);
yy_push_state(C2);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fcharset" {
bufferAppend(buffer, yytext);
yy_push_state(C3);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fnil" {
bufferAppend(buffer, yytext);
yy_push_state(C4);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\froman" {
bufferAppend(buffer, yytext);
yy_push_state(C5);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fswiss" {
bufferAppend(buffer, yytext);
yy_push_state(C6);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fmodern" {
bufferAppend(buffer, yytext);
yy_push_state(C7);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fscript" {
bufferAppend(buffer, yytext);
yy_push_state(C8);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fdecor" {
bufferAppend(buffer, yytext);
yy_push_state(C9);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\ftech" {
bufferAppend(buffer, yytext);
yy_push_state(C10);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fbidi" {
bufferAppend(buffer, yytext);
yy_push_state(C11);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\falt" {
bufferAppend(buffer, yytext);
yy_push_state(C12);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fontfile" {
bufferAppend(buffer, yytext);
yy_push_state(C13);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fn" {
bufferAppend(buffer, yytext);
yy_push_state(C14);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\sbasedon" {
bufferAppend(buffer, yytext);
yy_push_state(C15);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\additive" {
bufferAppend(buffer, yytext);
yy_push_state(C16);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\file" {
bufferAppend(buffer, yytext);
yy_push_state(C17);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\listname" {
bufferAppend(buffer, yytext);
yy_push_state(C18);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\leveltext" {
bufferAppend(buffer, yytext);
yy_push_state(C19);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\levelnumbers" {
bufferAppend(buffer, yytext);
yy_push_state(C20);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\pict" {
bufferAppend(buffer, yytext);
yy_push_state(C21);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\sn" {
bufferAppend(buffer, yytext);
yy_push_state(C22);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\sv" {
bufferAppend(buffer, yytext);
yy_push_state(C23);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"{\\\*\\blipuid "[^ \n\r]+"}" {
bufferAppend(buffer, yytext);
yy_push_state(C24);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\object" {
bufferAppend(buffer, yytext);
yy_push_state(C25);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\docvar" {
bufferAppend(buffer, yytext);
yy_push_state(C26);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\bkmkstart" {
bufferAppend(buffer, yytext);
yy_push_state(C27);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\bkmkend" {
bufferAppend(buffer, yytext);
yy_push_state(C28);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\rxe" {
bufferAppend(buffer, yytext);
yy_push_state(C29);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fldinst" {
bufferAppend(buffer, yytext);
yy_push_state(C30);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\fldrslt" {
bufferAppend(buffer, yytext);
yy_push_state(C31);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\pntxt" {
bufferAppend(buffer, yytext);
yy_push_state(C32);
}
"{"[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*"\\colortbl" {
bufferAppend(buffer, yytext);
yy_push_state(C33);
}
[a-f0-9]{20,} {
isDot = true;
bufferAppend(buffer, yytext);
}
"\\par"|"\\row"|"\\cell" {
isDot = true;
bufferAppend(buffer, yytext);
}
"{"[ \n\r]*\\[^'][^ \n\r\\]*[ \n\r]* {
bufferAppend(buffer, yytext);
}
\\[^'][^ \n\r\\]*[ \n\r]* {
bufferAppend(buffer, yytext);
}
"}" {
bufferAppend(buffer, yytext);
}
"{" {
bufferAppend(buffer, yytext);
}
"\\'"[0-9a-fA-F][0-9a-fA-F](\r|\n|"\r\n")? {
if(S1_substitution.find(yytext) != S1_substitution.end())
{
printBuffer();
fputws_unlocked(S1_substitution[yytext].c_str(), yyout);
offset+=S1_substitution[yytext].size();
hasWrite_dot = hasWrite_white = true;
}
else
{
last="buffer";
bufferAppend(buffer, yytext);
}
}
[ \n\t\r$*] {
if (last == "open_tag")
bufferAppend(tags.back(), yytext);
else
bufferAppend(buffer, yytext);
}
\\|[][<>@^$/] {
printBuffer();
fputwc_unlocked(L'\\', yyout);
offset++;
wchar_t symbol;
int pos = mbtowc(&symbol, yytext, MB_CUR_MAX);
if(pos == -1)
{
wcerr << L"Cuatro" << endl;
wcerr << L"Encoding error." << endl;
exit(EXIT_FAILURE);
}
fputwc_unlocked(symbol, yyout);
offset++;
hasWrite_dot = hasWrite_white = true;
}
. {
printBuffer();
symbuf += yytext;
wchar_t symbol;
int pos = mbtowc(&symbol, symbuf.c_str(), MB_CUR_MAX);
if(pos == -1)
{
if(symbuf.size() > MB_CUR_MAX)
{
// unknown character
symbuf = "";
fputwc_unlocked(L'?', yyout);
offset++;
hasWrite_dot = hasWrite_white = true;
}
}
else
{
symbuf = "";
fputwc_unlocked(symbol, yyout);
offset++;
hasWrite_dot = hasWrite_white = true;
}
}
<<EOF>> {
isDot = true;
printBuffer();
return 0;
}
%%
void usage(string const &progname)
{
cerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl;
cerr << "rtf format processor " << endl;
exit(EXIT_SUCCESS);
}
int main(int argc, char *argv[])
{
LtLocale::tryToSetLocale();
if(argc > 3)
{
usage(argv[0]);
}
switch(argc)
{
case 3:
yyout = fopen(argv[2], "w");
if(!yyout)
{
usage(argv[0]);
}
case 2:
yyin = fopen(argv[1], "r");
if(!yyin)
{
usage(argv[0]);
}
break;
default:
break;
}
// prevent warning message
yy_push_state(1);
yy_top_state();
yy_pop_state();
S1_init();
last = "";
buffer = L"";
isDot = hasWrite_dot = hasWrite_white = false;
current=0;
offset = 0;
init_escape();
init_tagNames();
yylex();
fclose(yyin);
fclose(yyout);
}
signature.asc
Description: PGP signature

