#!/usr/bin/perl
#
# generate fake dataset for schema:
#
# CREATE TABLE files (
#    id SERIAL PRIMARY KEY,
#    filename TEXT NOT NULL,
#    state VARCHAR(20) NOT NULL,
#    filler TEXT
# );
#
#  to test query:
#
# SELECT id, filename
# FROM files
# WHERE state = 'waiting' | 'done';

use strict;
use warnings;

my $dir = '/data0/dump';
my @states = ('done','waiting','assigning','processing');
my $name;
my $state;
my $filler;
my $i;
my $stateidx;

my $rows = 10000000;
print "generate files\n";

open(FH, "> $dir/files.dat") || die ("cannot open files.dat: $!");

for ($i = 0; $i < $rows; $i++) {

    $name = 'file' . $i;
    if (rand() > 0.95) {
        $stateidx = 1 + rand(scalar(@states) -1 );
    } else {
        $stateidx = 0;
    }

    $state = $states[$stateidx];
    #$filler = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx';
    $filler = 'xxxxxxxxxxxxx';
    print FH "$i,$name,$state,$filler\n";
}
close (FH);
