On Tue, Jan 26, 2016 at 02:46:29PM +0100, Simon Ruderich wrote:
> Hi,
>
> I'm using GNU screen's hardcopy function to dump the current
> screen content to a file. However the resulting file is encoded
> in ISO-8859-1 although my current locale is UTF-8. This causes
> corruption for characters which are not representable in
> ISO-8859-1.

Hello again,

It's not actually using ISO-8859-1, but instead printing the
first byte of ->image which seems to be the unicode code point.

image.h:
    /* structure representing single cell of terminal */
    struct mchar {
            uint32_t image;         /* actual letter like a, b, c ... */
            [...]
    };

fileio.c WriteFile():
    for (i = 0; i < fore->w_height; i++) {
            p = fore->w_mlines[i].image;
            for (k = fore->w_width - 1; k >= 0 && p[k] == ' '; k--) ;
            for (j = 0; j <= k; j++)
                    putc(p[j], f);
            putc('\n', f);
    }

This obviously doesn't work for characters > 255 which caused the
garbled display for me.


The attached patch should fix the issue. However somebody should
verify my assumptions:

I'm not 100% sure that ->image is actually the unicode code
point.

Double-width characters are followed by a character with ->image
= 0xff and ->font = 0xff. I assumed that this means the character
is a filler character to handle the fixed screen width correctly,
but I'm not entirely sure. Is there a function/constant to check
for fillers like this? Hard-coding 0xff doesn't sound like a good
idea.

I don't know how the fontp parameter of EncodeChar() is used:

    int EncodeChar(char *bp, int c, int encoding, int *fontp)

Passing NULL seems to work though.

Regards
Simon

PS: The Git repository contains a lot of commits since the last
release. A new release of GNU Screen sounds like a good idea to
get those fixes/improvements distributed.
-- 
+ privacy is necessary
+ using gnupg http://gnupg.org
+ public key id: 0x92FEFDB7E44C32F9
From 1aa921e2e56867cc933973d96a786a83eca8bf3c Mon Sep 17 00:00:00 2001
From: Simon Ruderich <si...@ruderich.org>
Date: Sat, 6 Feb 2016 16:26:03 +0100
Subject: [PATCH] :hardcopy: handle encoding

Previously only the first byte of the Unicode code point was
written to the hardcopy file.
---
 src/fileio.c | 24 +++++++++++++++++++++---
 src/image.h  |  2 +-
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/fileio.c b/src/fileio.c
index 5dc1797..6cd21ac 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -42,6 +42,7 @@
 #include "misc.h"
 #include "process.h"
 #include "termcap.h"
+#include "encoding.h"
 
 static char *CatExtra(char *, char *);
 static char *findrcfile(char *);
@@ -304,6 +305,21 @@ void RcLine(char *ubuf, int ubufl)
 	EffectiveAclUser = 0;
 }
 
+
+static void putc_encoded(FILE *f, uint32_t c, uint32_t font, int encoding) {
+	/* Filler character which is used for double-width characters. */
+	if (c == 0xff && font == 0xff)
+		return;
+
+	char buf[10];
+	int length = EncodeChar(buf, c, encoding, NULL);
+	if (length < 0) {
+		return;
+	}
+	buf[length] = 0;
+	fputs(buf, f);
+}
+
 /*
  * needs display for copybuffer access and termcap dumping
  */
@@ -315,7 +331,6 @@ void WriteFile(struct acluser *user, char *fn, int dump)
 	 * dump==1:   scrollback,
 	 */
 	int i, j, k;
-	uint32_t *p;
 	char *c;
 	FILE *f;
 	char fnbuf[FILENAME_MAX];
@@ -384,6 +399,7 @@ void WriteFile(struct acluser *user, char *fn, int dump)
 		if (f == NULL) {
 			UserReturn(0);
 		} else {
+			uint32_t *p, *pf;
 			switch (dump) {
 			case DUMP_HARDCOPY:
 			case DUMP_SCROLLBACK:
@@ -398,17 +414,19 @@ void WriteFile(struct acluser *user, char *fn, int dump)
 				if (dump == DUMP_SCROLLBACK) {
 					for (i = 0; i < fore->w_histheight; i++) {
 						p = (WIN(i)->image);
+						pf = WIN(i)->font;
 						for (k = fore->w_width - 1; k >= 0 && p[k] == ' '; k--) ;
 						for (j = 0; j <= k; j++)
-							putc(p[j], f);
+							putc_encoded(f, p[j], pf[j], fore->w_encoding);
 						putc('\n', f);
 					}
 				}
 				for (i = 0; i < fore->w_height; i++) {
 					p = fore->w_mlines[i].image;
+					pf = fore->w_mlines[i].font;
 					for (k = fore->w_width - 1; k >= 0 && p[k] == ' '; k--) ;
 					for (j = 0; j <= k; j++)
-						putc(p[j], f);
+						putc_encoded(f, p[j], pf[j], fore->w_encoding);
 					putc('\n', f);
 				}
 				break;
diff --git a/src/image.h b/src/image.h
index 9ce9b0e..487dfc2 100644
--- a/src/image.h
+++ b/src/image.h
@@ -34,7 +34,7 @@
 
 /* structure representing single cell of terminal */
 struct mchar {
-	uint32_t image;		/* actual letter like a, b, c ... */
+	uint32_t image;		/* actual letter like a, b, c ... as Unicode code point */
 	uint32_t attr;		/* attributes - bold, standout etc. */
 	uint32_t font;		/* font :) */
 	uint32_t fontx; 	/* extended font; TODO: remove - merge with font */
-- 
2.7.0

Attachment: signature.asc
Description: PGP signature

Reply via email to