pnowojski commented on a change in pull request #6417: [FLINK-9913][runtime] 
Improve output serialization only once in RecordWriter
URL: https://github.com/apache/flink/pull/6417#discussion_r215615035
 
 

 ##########
 File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/io/network/api/writer/RecordWriter.java
 ##########
 @@ -89,77 +88,101 @@ public RecordWriter(ResultPartitionWriter writer, 
ChannelSelector<T> channelSele
 
                this.numChannels = writer.getNumberOfSubpartitions();
 
-               /*
-                * The runtime exposes a channel abstraction for the produced 
results
-                * (see {@link ChannelSelector}). Every channel has an 
independent
-                * serializer.
-                */
-               this.serializers = new SpanningRecordSerializer[numChannels];
+               this.serializer = new SpanningRecordSerializer<T>();
                this.bufferBuilders = new Optional[numChannels];
+               this.broadcastChannels = new int[numChannels];
                for (int i = 0; i < numChannels; i++) {
-                       serializers[i] = new SpanningRecordSerializer<T>();
+                       broadcastChannels[i] = i;
                        bufferBuilders[i] = Optional.empty();
                }
        }
 
        public void emit(T record) throws IOException, InterruptedException {
-               for (int targetChannel : channelSelector.selectChannels(record, 
numChannels)) {
-                       sendToTarget(record, targetChannel);
-               }
+               emitToTargetChannels(record, 
channelSelector.selectChannels(record, numChannels));
        }
 
        /**
         * This is used to broadcast Streaming Watermarks in-band with records. 
This ignores
         * the {@link ChannelSelector}.
         */
        public void broadcastEmit(T record) throws IOException, 
InterruptedException {
-               for (int targetChannel = 0; targetChannel < numChannels; 
targetChannel++) {
-                       sendToTarget(record, targetChannel);
-               }
+               emitToTargetChannels(record, broadcastChannels);
        }
 
        /**
         * This is used to send LatencyMarks to a random target channel.
         */
        public void randomEmit(T record) throws IOException, 
InterruptedException {
-               sendToTarget(record, rng.nextInt(numChannels));
+               serializer.serializeRecord(record);
+
+               if (copyToTargetBuffers(rng.nextInt(numChannels))) {
+                       serializer.prune();
+               }
        }
 
-       private void sendToTarget(T record, int targetChannel) throws 
IOException, InterruptedException {
-               RecordSerializer<T> serializer = serializers[targetChannel];
+       private void emitToTargetChannels(T record, int[] targetChannels) 
throws IOException, InterruptedException {
+               serializer.serializeRecord(record);
+
+               boolean pruneAfterCopying = false;
+               for (int channel : targetChannels) {
+                       if (copyToTargetBuffers(channel)) {
+                               pruneAfterCopying = true;
+                       }
+               }
 
-               SerializationResult result = serializer.addRecord(record);
+               // Make sure we don't hold onto the large intermediate 
serialization buffer for too long
+               if (pruneAfterCopying) {
+                       serializer.prune();
+               }
+       }
 
+       /**
+        * Copies the intermediate serialization buffer to the BufferBuilder of 
the target channel, also
+        * checks to prune the intermediate buffer iif the target BufferBuilder 
is fulfilled and the record
+        * is full.
+        *
+        * @param targetChannel the target channel to get BufferBuilder
+        * @return <tt>true</tt> if the intermediate serialization buffer 
should be pruned
+        */
+       private boolean copyToTargetBuffers(int targetChannel) throws 
IOException, InterruptedException {
 
 Review comment:
   Third time I'm looking at this PR and third time I had to think for a minute 
what does the this method. I'm always forgetting that `serializer` is a class 
field and that this method copies from it.
   
   Maybe rename to `copyFromSerializerToTargetChannel`? Imo rename would allow 
us to drop most of the java doc and simplify it to just:
    ```
        /**
         * @param targetChannel
         * @return <tt>true</tt> if the intermediate serialization buffer 
should be pruned
         */
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to