nit0906 commented on code in PR #2053: URL: https://github.com/apache/jackrabbit-oak/pull/2053#discussion_r1944427702
########## oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedTreeStoreTask.java: ########## @@ -230,4 +237,68 @@ private void sortAndSaveBatch(NodeStateEntryBatch nseb) throws Exception { } } + /** + * If there are any, remove properties of bundled nodes (jcr:content/...) from the JSON-encoded node. + * + * @param path the path + * @param value the JSON-encoded node + * @return the cleaned JSON + */ + public static String removePropertiesOfBundledNodes(String path, String value) { + if (value.indexOf("\"jcr:content/") < 0) { + return value; + } + // possibly the node contains a bundled property, but we are not sure + // try to de-serialize + NodeStateEntryReader nodeReader = new NodeStateEntryReader(new MemoryBlobStore()); + try { + // the following line will throw an exception if de-serialization fails + nodeReader.read(path + "|" + value); + // ok it did not: it was a false positive + return value; + } catch (Exception e) { + LOG.warn("Path {} value {}", path, value); + JsopReader reader = new JsopTokenizer(value); + JsopBuilder writer = new JsopBuilder(); + reader.read('{'); + writer.object(); + if (!reader.matches('}')) { + do { + String key = reader.readString(); + reader.read(':'); + // skip properties that contain "/" + boolean skip = key.indexOf('/') >= 0; + if (!skip) { + writer.key(key); + } + if (reader.matches('[')) { + if (!skip) { + writer.array(); + } + do { + String raw = reader.readRawValue(); + if (!skip) { + writer.encodedValue(raw); + } + } while (reader.matches(',')); + reader.read(']'); + if (!skip) { + writer.endArray(); + } + } else { + String raw = reader.readRawValue(); + if (!skip) { + writer.encodedValue(raw); + } + } + } while (reader.matches(',')); + } + reader.read('}'); + writer.endObject(); + String result = writer.toString(); + LOG.warn("Cleaned {} : {}", path, result); Review Comment: Same as above, can we make the message a bit more descriptive. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: oak-dev-unsubscr...@jackrabbit.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org