Changeset: 064ab92cd39d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/064ab92cd39d
Modified Files:
        common/stream/text_stream.c
Branch: default
Log Message:

More efficient implementation of text_pump_in()


diffs (105 lines):

diff --git a/common/stream/text_stream.c b/common/stream/text_stream.c
--- a/common/stream/text_stream.c
+++ b/common/stream/text_stream.c
@@ -89,42 +89,69 @@ take_byte(inner_state_t *ist)
 static pump_result
 text_pump_in(inner_state_t *ist, pump_action action)
 {
-       bool crlf_pending = ist->crlf_pending;
+       assert(ist->dst_win.count > 0);
+       assert(ist->src_win.count > 0 || action == PUMP_FINISH);
 
-       while (ist->src_win.count > 0 && ist->dst_win.count > 0) {
-               char c = take_byte(ist);
-               switch (c) {
-                       case '\r':
-                               if (crlf_pending) {
-                                       // put the previous one, which is 
clearly not followed by an \n
-                                       put_byte(ist, '\r');
-                               }
-                               crlf_pending = true;
-                               continue;
-                       case '\n':
-                               put_byte(ist, c);
-                               crlf_pending = false;
-                               continue;
-                       default:
-                               if (crlf_pending) {
-                                       put_byte(ist, '\r');
-                                       crlf_pending = false;
-                                       // if dst_win.count was 1, there is no 
room for another put_byte().
-                                       if (ist->dst_win.count > 0) {
-                                               put_byte(ist, c);
-                                       } else {
-                                               // no room anymore for char c, 
put it back!
-                                               ist->src_win.start--;
-                                               ist->src_win.count++;
-                                       }
-                               } else {
-                                       put_byte(ist, c);
-                               }
-                               continue;
+       if (ist->crlf_pending) {
+               if (ist->src_win.count > 0) {
+                       if (ist->src_win.start[0] != '\n') {
+                               // CR not followed by a LF, emit it
+                               put_byte(ist, '\r');
+                       }
+               } else {
+                       assert(action == PUMP_FINISH);
+                       // CR followed by end of file, not LF, so emit it
+                       put_byte(ist, '\r');
                }
+               // in any case, the CR is no longer pending
+               ist->crlf_pending = false;
        }
 
-       ist->crlf_pending = crlf_pending;
+       while (1) {
+               size_t span = ist->src_win.count < ist->dst_win.count
+                                       ? ist->src_win.count
+                                       : ist->dst_win.count;
+               if (span == 0)
+                       break;
+
+               if (ist->src_win.start[0] == '\r') {
+                       // Looking at a CR. We'll handle just that, then make 
another round of the while loop
+                       if (ist->src_win.count == 1) {
+                               // Don't know what will follow, move it to the 
flag.
+                               // Then stop, as all available input has been 
consumed
+                               take_byte(ist);
+                               ist->crlf_pending = true;
+                               break;
+                       }
+                       assert(ist->src_win.count > 1); // We can safely look 
ahead
+                       if (ist->src_win.start[1] == '\n') {
+                               // Drop the CR, move the LF
+                               take_byte(ist);
+                               put_byte(ist, take_byte(ist));
+                       } else {
+                               // Move the CR
+                               put_byte(ist, take_byte(ist));
+                       }
+                       // progress has been made, consider the situation anew
+                       continue;
+               } else {
+                       // The remaining input data does not start with a CR.
+                       // Move all non-CR data to the output buffer
+                       char *cr = memchr(ist->src_win.start, '\r', span);
+                       if (cr != NULL) {
+                               span = cr - ist->src_win.start;
+                       }
+                       assert(span > 0);
+                       memcpy(ist->dst_win.start, ist->src_win.start, span);
+                       ist->src_win.start += span;
+                       ist->src_win.count -= span;
+                       ist->dst_win.start += span;
+                       ist->dst_win.count -= span;
+                       continue;
+               }
+               // Unreachable, all branches above explicitly break or continue
+               assert(0 && "UNREACHABLE");
+       }
 
        if (action == PUMP_FINISH) {
                if (ist->src_win.count > 0)
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to