Copilot commented on code in PR #2375:
URL: https://github.com/apache/sedona/pull/2375#discussion_r2412068487
##########
spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/iterators/BlobIterator.java:
##########
@@ -23,128 +23,76 @@
import java.io.IOException;
import java.util.Iterator;
import java.util.zip.DataFormatException;
-import org.apache.sedona.sql.datasources.osmpbf.DenseNodeIterator;
import org.apache.sedona.sql.datasources.osmpbf.build.Fileformat.Blob;
import org.apache.sedona.sql.datasources.osmpbf.build.Osmformat;
import org.apache.sedona.sql.datasources.osmpbf.extractors.DenseNodeExtractor;
-import org.apache.sedona.sql.datasources.osmpbf.extractors.NodeExtractor;
-import org.apache.sedona.sql.datasources.osmpbf.extractors.RelationExtractor;
-import org.apache.sedona.sql.datasources.osmpbf.extractors.WaysExtractor;
import org.apache.sedona.sql.datasources.osmpbf.model.OSMEntity;
-import org.apache.sedona.sql.datasources.osmpbf.model.OsmNode;
public class BlobIterator implements Iterator<OSMEntity> {
Blob blob;
Osmformat.PrimitiveBlock primitiveBlock;
int primitiveGroupIdx;
- int osmEntityIdx;
- DenseNodeIterator denseNodesIterator;
+ Iterator<OSMEntity> iterator;
Osmformat.PrimitiveGroup currentPrimitiveGroup;
public BlobIterator(Blob blob) throws DataFormatException, IOException {
primitiveBlock =
Osmformat.PrimitiveBlock.parseFrom(dataInputStreamBlob(blob));
primitiveGroupIdx = 0;
- osmEntityIdx = 0;
currentPrimitiveGroup =
primitiveBlock.getPrimitivegroup(primitiveGroupIdx);
+ iterator = resolveIterator();
this.blob = blob;
}
@Override
public boolean hasNext() {
- return primitiveBlock.getPrimitivegroupList().size() != primitiveGroupIdx;
+ return primitiveGroupIdx < primitiveBlock.getPrimitivegroupList().size() -
1
+ || iterator.hasNext();
}
@Override
public OSMEntity next() {
- if (currentPrimitiveGroup == null) {
- return null;
+ if (iterator.hasNext()) {
+ return iterator.next();
}
- if (!currentPrimitiveGroup.getRelationsList().isEmpty()) {
- return extractRelationPrimitiveGroup();
- }
-
- if (!currentPrimitiveGroup.getNodesList().isEmpty()) {
- return extractNodePrimitiveGroup();
- }
-
- if (!currentPrimitiveGroup.getWaysList().isEmpty()) {
- return extractWayPrimitiveGroup();
- }
-
- if (!currentPrimitiveGroup.getChangesetsList().isEmpty()) {
- return null;
- }
-
- if (currentPrimitiveGroup.getDense() != null) {
- return extractDenseNodePrimitiveGroup();
- }
-
- return null;
- }
+ primitiveGroupIdx += 1;
- private OSMEntity extractNodePrimitiveGroup() {
- osmEntityIdx += 1;
- if (currentPrimitiveGroup.getNodesList().size() == osmEntityIdx) {
- nextEntity();
- }
+ currentPrimitiveGroup =
primitiveBlock.getPrimitivegroup(primitiveGroupIdx);
- Osmformat.StringTable stringTable = primitiveBlock.getStringtable();
+ iterator = resolveIterator();
- return new NodeExtractor(currentPrimitiveGroup, primitiveBlock)
- .extract(osmEntityIdx, stringTable);
+ return iterator.next();
}
- public OSMEntity extractDenseNodePrimitiveGroup() {
- if (denseNodesIterator == null) {
- denseNodesIterator =
- new DenseNodeIterator(
- currentPrimitiveGroup.getDense().getIdCount(),
- primitiveBlock.getStringtable(),
- new DenseNodeExtractor(
- currentPrimitiveGroup.getDense(),
- primitiveBlock.getLatOffset(),
- primitiveBlock.getLonOffset(),
- primitiveBlock.getGranularity()));
+ Iterator<OSMEntity> resolveIterator() {
+ if (!currentPrimitiveGroup.getWaysList().isEmpty()) {
+ return new WayIterator(currentPrimitiveGroup.getWaysList(),
primitiveBlock.getStringtable());
}
- OsmNode node = denseNodesIterator.next();
-
- if (!denseNodesIterator.hasNext()) {
- denseNodesIterator = null;
- nextEntity();
+ if (!currentPrimitiveGroup.getRelationsList().isEmpty()) {
+ return new RelationIterator(
+ currentPrimitiveGroup.getRelationsList(),
primitiveBlock.getStringtable());
}
- return node;
- }
-
- public OSMEntity extractWayPrimitiveGroup() {
- osmEntityIdx += 1;
- if (currentPrimitiveGroup.getWaysList().size() == osmEntityIdx) {
- nextEntity();
+ if (!currentPrimitiveGroup.getNodesList().isEmpty()) {
+ return new NodeIterator(currentPrimitiveGroup.getNodesList(),
primitiveBlock);
}
- return new WaysExtractor(currentPrimitiveGroup,
primitiveBlock.getStringtable())
- .extract(osmEntityIdx);
- }
-
- public OSMEntity extractRelationPrimitiveGroup() {
- osmEntityIdx += 1;
- if (currentPrimitiveGroup.getRelationsList().size() == osmEntityIdx) {
- nextEntity();
+ if (currentPrimitiveGroup.getDense() != null) {
+ return new DenseNodeIterator(
+ currentPrimitiveGroup.getDense().getIdCount(),
+ primitiveBlock.getStringtable(),
+ new DenseNodeExtractor(
+ currentPrimitiveGroup.getDense(),
+ primitiveBlock.getLatOffset(),
+ primitiveBlock.getLonOffset(),
+ primitiveBlock.getGranularity()));
}
- Osmformat.StringTable stringTable = primitiveBlock.getStringtable();
-
- return new RelationExtractor(currentPrimitiveGroup,
stringTable).extract(osmEntityIdx);
- }
-
- public void nextEntity() {
- primitiveGroupIdx += 1;
- osmEntityIdx = 0;
+ return null;
Review Comment:
The resolveIterator() method returns null when no primitive groups match,
which could cause a NullPointerException when iterator.hasNext() or
iterator.next() is called. Consider returning an empty iterator instead.
##########
spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/iterators/PbfIterator.java:
##########
@@ -56,7 +56,7 @@ public OSMEntity next() {
}
private BlobIterator readNextBlock() throws DataFormatException, IOException
{
- while (pmGroupIterator.hasNext()) {
+ if (pmGroupIterator.hasNext()) {
Review Comment:
Changing from 'while' to 'if' breaks the iterator logic. If the first block
is not 'OSMData' type, the method will return null instead of continuing to
search for the next valid block. This should remain a 'while' loop to properly
iterate through all blocks until finding an 'OSMData' block.
```suggestion
while (pmGroupIterator.hasNext()) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]