alexeykudinkin commented on a change in pull request #4866:
URL: https://github.com/apache/hudi/pull/4866#discussion_r811469702



##########
File path: 
hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
##########
@@ -83,7 +83,7 @@ public void init() throws Exception {
     String fileId1 = UUID.randomUUID().toString();
     String fileId2 = UUID.randomUUID().toString();
     FileSystem fs = FSUtils.getFs(basePath(), hadoopConf());
-    HoodieTestDataGenerator.writePartitionMetadata(fs, 
HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath);
+    HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, 
HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath);

Review comment:
       Nah, we can let those die down naturally

##########
File path: 
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
##########
@@ -140,35 +142,64 @@
   public static final TypeDescription ORC_TRIP_SCHEMA = 
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA));
   public static final Schema FLATTENED_AVRO_SCHEMA = new 
Schema.Parser().parse(TRIP_FLATTENED_SCHEMA);
 
-  private static final Random RAND = new Random(46474747);
+  private final Random r;

Review comment:
       :-)
   
   Why do you think `rand` is better than `r`?

##########
File path: 
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
##########
@@ -865,4 +883,31 @@ public int getNumExistingKeys(String schemaStr) {
   public void close() {
     existingKeysBySchema.clear();
   }
+
+  private static long genRandomTimeMillis(Random r) {
+    // NOTE: {@code Date} expects a number representing a number of millis 
elapsed since epoch
+    //       (01/01/1970 00:00:00) until approx year 8099
+    //       This modulo represents approx 6000 years in millis, which should 
be sufficient to
+    //       cover all possible use-cases
+    return Math.abs(r.nextLong()) % 189216000000000L;
+  }
+
+  private static UUID genPseudoRandomUUID(Random r) {
+    byte[] bytes = new byte[16];
+    r.nextBytes(bytes);
+
+    bytes[6] &= 0x0f;
+    bytes[6] |= 0x40;
+    bytes[8] &= 0x3f;
+    bytes[8] |= 0x80;

Review comment:
       Some standard v4 bits clearing/setting

##########
File path: 
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
##########
@@ -140,35 +142,64 @@
   public static final TypeDescription ORC_TRIP_SCHEMA = 
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA));
   public static final Schema FLATTENED_AVRO_SCHEMA = new 
Schema.Parser().parse(TRIP_FLATTENED_SCHEMA);
 
-  private static final Random RAND = new Random(46474747);
+  private final Random r;
 
   //Maintains all the existing keys schema wise
   private final Map<String, Map<Integer, KeyPartition>> existingKeysBySchema;
   private final String[] partitionPaths;
   //maintains the count of existing keys schema wise
   private Map<String, Integer> numKeysBySchema;
 
+  public HoodieTestDataGenerator(long seed) {
+    this(seed, DEFAULT_PARTITION_PATHS, new HashMap<>());
+  }
+
+  public HoodieTestDataGenerator(long seed, String[] partitionPaths, 
Map<Integer, KeyPartition> keyPartitionMap) {
+    this.r = new Random(seed);

Review comment:
       Good call!

##########
File path: 
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
##########
@@ -489,8 +507,8 @@ public static void createSavepointFile(String basePath, 
String instantTime, Conf
 
   public Stream<HoodieRecord> generateInsertsStream(String commitTime, Integer 
n, boolean isFlattened, String schemaStr, boolean containsAllPartitions) {
     return generateInsertsStream(commitTime, n, isFlattened, schemaStr, 
containsAllPartitions,
-        () -> partitionPaths[RAND.nextInt(partitionPaths.length)],
-        () -> UUID.randomUUID().toString());
+        () -> partitionPaths[r.nextInt(partitionPaths.length)],
+        () -> genPseudoRandomUUID(r).toString());

Review comment:
       Good catch! Weirdly my text search skipped through these

##########
File path: 
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
##########
@@ -140,35 +142,64 @@
   public static final TypeDescription ORC_TRIP_SCHEMA = 
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA));
   public static final Schema FLATTENED_AVRO_SCHEMA = new 
Schema.Parser().parse(TRIP_FLATTENED_SCHEMA);
 
-  private static final Random RAND = new Random(46474747);

Review comment:
       Correct

##########
File path: 
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
##########
@@ -269,25 +300,25 @@ public static GenericRecord generateGenericRecord(String 
rowKey, String partitio
     rec.put("partition_path", partitionPath);
     rec.put("rider", riderName);
     rec.put("driver", driverName);
-    rec.put("begin_lat", RAND.nextDouble());
-    rec.put("begin_lon", RAND.nextDouble());
-    rec.put("end_lat", RAND.nextDouble());
-    rec.put("end_lon", RAND.nextDouble());
+    rec.put("begin_lat", r.nextDouble());
+    rec.put("begin_lon", r.nextDouble());
+    rec.put("end_lat", r.nextDouble());
+    rec.put("end_lon", r.nextDouble());
     if (isFlattened) {
-      rec.put("fare", RAND.nextDouble() * 100);
+      rec.put("fare", r.nextDouble() * 100);
       rec.put("currency", "USD");
     } else {
-      rec.put("distance_in_meters", RAND.nextInt());
-      rec.put("seconds_since_epoch", RAND.nextLong());
-      rec.put("weight", RAND.nextFloat());
+      rec.put("distance_in_meters", r.nextInt());
+      rec.put("seconds_since_epoch", r.nextLong());
+      rec.put("weight", r.nextFloat());
       byte[] bytes = "Canada".getBytes();
       rec.put("nation", ByteBuffer.wrap(bytes));
-      long currentTimeMillis = System.currentTimeMillis();
-      Date date = new Date(currentTimeMillis);
+      long randomMillis = genRandomTimeMillis(r);

Review comment:
       I don't think this is an assumption that tests should be able to make. 
At the end of the day there's no such contract provided by the Generator, and 
tests should not depend on such implementation detail.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to