I tested bulk loading in cassandra with CQLSSTableWriter and sstableloader.
It turns out that writing 1 millions rows with sstableloader took over twice as long as inserting regularly with batch CQL statements from Java (cassandra-driver-core, version 2.0.0). Specifically, the call to sstableloader shown below took just over 12 minutes, while inserting with Java batch statements took just over 5 minutes. I checked this twice and the same thing happened both times. Is this expected? Thanks, Don Here's the code (slightly edited and abbreviated): import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.io.sstable.CQLSSTableWriter; import java.io.IOException; import java.net.URL; import java.net.URLClassLoader; import java.util.Random; // sstableloader -v -d 10.12.2.91,10.12.2.92,10.12.2.93 /tmp/test/test_table public class CreateLoadableSSTableCQL { ...... // --------------------------------- private static void create(int count) throws IOException, InvalidRequestException { String schema = "CREATE TABLE test.test_table (id text PRIMARY KEY, value text)"; String insert = "INSERT INTO test.test_table (id, value) VALUES (?, ?)"; CQLSSTableWriter writer = CQLSSTableWriter.builder().inDirectory("/tmp/test/test_table").forTable(schema) .using(insert).build(); for(int i=0;i<count;i++) { writer.addRow(makeRandomString(32),makeRandomString(100)); } writer.close(); } // ------------------------------ public static void main(String [] args) { int count=1000000; //12.1 minutes using sstableloader on qa. 5.1 minutes using regular batched inserts if (args.length>0) { count=Integer.parseInt(args[0]); } try { create(count); } catch (InvalidRequestException e) { e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }