I tested bulk loading in cassandra with CQLSSTableWriter and sstableloader.

It turns out that writing 1 millions rows with sstableloader took over twice as 
long as inserting regularly with batch CQL statements from Java 
(cassandra-driver-core,   version 2.0.0). Specifically, the call to 
sstableloader shown below took just over 12 minutes, while inserting with Java 
batch statements took just over 5 minutes.

I checked this twice and the same thing happened both times.

Is this expected?

    Thanks, Don

Here's the code (slightly edited and abbreviated):

import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.io.sstable.CQLSSTableWriter;

import java.io.IOException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.Random;

// sstableloader -v -d 10.12.2.91,10.12.2.92,10.12.2.93 /tmp/test/test_table

public class CreateLoadableSSTableCQL {
        ......
        // ---------------------------------
        private static void create(int count) throws IOException, 
InvalidRequestException {
                String schema = "CREATE TABLE test.test_table (id text PRIMARY 
KEY,  value text)";
                String insert = "INSERT INTO test.test_table (id, value) VALUES 
(?, ?)";

                CQLSSTableWriter writer = 
CQLSSTableWriter.builder().inDirectory("/tmp/test/test_table").forTable(schema)
                                .using(insert).build();
                for(int i=0;i<count;i++) {
                        
writer.addRow(makeRandomString(32),makeRandomString(100));
                }
                writer.close();
        }
        // ------------------------------
        public static void main(String [] args) {
                int count=1000000; //12.1 minutes using sstableloader on qa.  
5.1 minutes using regular batched inserts
                if (args.length>0) {
                        count=Integer.parseInt(args[0]);
                }
                try {
                        create(count);
                } catch (InvalidRequestException e) {
                        e.printStackTrace();
                } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                }
        }
}


Reply via email to