Dear team,

FYI:we have a 4 quad core intel processor on each server on 2 node cluster with 
more than 1 TB of storage
I Ihave constructed the  2 node physical machine riak  cluster with n_val 2 and 
my app.config ,vm.args are attached for your reference..

Please tell me where the bulk inserted data onto riak db gets stored on Local 
file system...its taking  huge time to load small size itself...how to tune it 
to perform to large scale since we deal wit hbigdata of in few hungred 
GB's?????????????????

Cmd used:time ./load_data1m Customercalls1m.csv

./load_data100m CustomerCalls100m(got this error so changed default config of 
app.config...from 8 MB to 3072 MB
escript: exception error: no match of right hand side value {error,enoent}


size

Load time

No of mappersonapp.config

Js-max-vm-mem on app.config

Js-thread-stack

100k(10,lakhrows)-5 MB

20m39.625 seconds

48

3 GB 3072MB(changedfromdefault 8MB)since i/p data is large)

3 GB 3072MB(changedfromdefault 8MB)since i/p data is large)

1millionrows---54 MB

198m42.375seconds

48

3 GB 3072MB(changedfromdefault 8MB)since i/p data is large)

3 GB 3072MB(changedfromdefault 8MB)since i/p data is large)

.


./load_data script used:

#!/usr/local/bin/escript
main([Filename]) ->
    {ok, Data} = file:read_file(Filename),
    Lines = tl(re:split(Data, "\r?\n", [{return, binary},trim])),
    lists:foreach(fun(L) -> LS = re:split(L, ","), format_and_insert(LS) end, 
Lines).

format_and_insert(Line) ->
    JSON = 
io_lib:format("{\"id\":\"~s\",\"phonenumber\":~s,\"callednumber\":~s,\"starttime\":~s,\"endtime\":~s,\"status\":~s}",
 Line),
    Command = io_lib:format("curl -X PUT 
http://10.232.5.169:8098/riak/CustomerCalls100k/~s -d '~s' -H 'content-type: 
application/json'", [hd(Line),JSON]),
    io:format("Inserting: ~s~n", [hd(Line)]),
    os:cmd(Command).



Thanks in advance!!!!!!!!!!waiting fr  the reply...plz anyone help..struck u 
pwit hbulk loading.....and make me clear how riak splits the data and gets 
loaded on cluster
Thanks & regards
sangeetha


This e-mail and any files transmitted with it are for the sole use of the 
intended recipient(s) and may contain confidential and privileged information. 
If you are not the intended recipient(s), please reply to the sender and 
destroy all copies of the original message. Any unauthorized review, use, 
disclosure, dissemination, forwarding, printing or copying of this email, 
and/or any action taken in reliance on the contents of this e-mail is strictly 
prohibited and may be unlawful.
%% -*- tab-width: 4;erlang-indent-level: 4;indent-tabs-mode: nil -*-
%% ex: ts=4 sw=4 et
[
 %% Riak Core config
 {riak_core, [
              %% Default location of ringstate
              {ring_state_dir, "/var/lib/riak/ring"},

              %% http is a list of IP addresses and TCP ports that the Riak
              %% HTTP interface will bind.
              {http, [ {"10.232.5.169", 8098 } ]},

              %% https is a list of IP addresses and TCP ports that the Riak
              %% HTTPS interface will bind.
              %{https, [{ "10.232.15.169", 8098 }]},

              %% Default cert and key locations for https can be overridden
              %% with the ssl config variable, for example:
              %{ssl, [
              %       {certfile, "/etc/riak/cert.pem"},
              %       {keyfile, "/etc/riak/key.pem"}
              %      ]},
              
              %% riak_handoff_port is the TCP port that Riak uses for
              %% intra-cluster data handoff.
              {handoff_port, 8099 },

              %% To encrypt riak_core intra-cluster data handoff traffic,
              %% uncomment the following line and edit its path to an
              %% appropriate certfile and keyfile.  (This example uses a
              %% single file with both items concatenated together.)
              %{handoff_ssl_options, [{certfile, "/tmp/erlserver.pem"}]},

              %% Platform-specific installation paths (substituted by rebar)
              {platform_bin_dir, "/usr/sbin"},
              {platform_data_dir, "/var/lib/riak"},
              {platform_etc_dir, "/etc/riak"},
              {platform_lib_dir, "/usr/lib64/riak"},
              {platform_log_dir, "/var/log/riak"},
                {target_n_val, 2}
             ]},

 %% Riak KV config
 {riak_kv, [
            %% Storage_backend specifies the Erlang module defining the storage
            %% mechanism that will be used on this node.
            {storage_backend, riak_kv_bitcask_backend},

            %% pb_ip is the IP address that the Riak Protocol Buffers interface
            %% will bind to.  If this is undefined, the interface will not run.
            {pb_ip,   "127.0.0.1" },

            %% pb_port is the TCP port that the Riak Protocol Buffers interface
            %% will bind to
            {pb_port, 8087 },

            %% pb_backlog is the maximum length to which the queue of pending
            %% connections may grow. If set, it must be an integer >= 0.
            %% By default the value is 5. If you anticipate a huge number of
            %% connections being initialised *simultaneously*, set this number
            %% higher.
            %% {pb_backlog, 64},

            %% raw_name is the first part of all URLS used by the Riak raw HTTP
            %% interface.  See riak_web.erl and raw_http_resource.erl for
            %% details.
            %{raw_name, "riak"},

            %% mapred_name is URL used to submit map/reduce requests to Riak.
            {mapred_name, "mapred"},

            %% mapred_system indicates which version of the MapReduce
            %% system should be used: 'pipe' means riak_pipe will
            %% power MapReduce queries, while 'legacy' means that luke
            %% will be used
            {mapred_system, pipe},

            %% directory used to store a transient queue for pending
            %% map tasks
            %% Only valid when mapred_system == legacy
            %% {mapred_queue_dir, "/var/lib/riak/mr_queue" },

            %% Each of the following entries control how many Javascript
            %% virtual machines are available for executing map, reduce,
            %% pre- and post-commit hook functionnaged chan map8red6 to 24,18.
           {map_js_vm_count, 48},
            {reduce_js_vm_count, 18 },
            {hook_js_vm_count, 2 },

            %% Number of items the mapper will fetch in one request.
            %% Larger values can impact read/write performance for
            %% non-MapReduce requests.
            %% Only valid when mapred_system == legacy
            %% {mapper_batch_size, 5},

            %% js_max_vm_mem is the maximum amount of memory, in megabytes,
            %% allocated to the Javascript VMs. If unset, the default is
            %% 8MB.
          {js_max_vm_mem, 3072},

            %% js_thread_stack is the maximum amount of thread stack, in 
megabyes,
            %% allocate to the Javascript VMs. If unset, the default is 16MB.
            %% NOTE: This is not the same as the C thread stack.
            {js_thread_stack, 3072},

            %% Number of objects held in the MapReduce cache. These will be
            %% ejected when the cache runs out of room or the bucket/key
            %% pair for that entry changes
            %% Only valid when mapred_system == legacy
            %% {map_cache_size, 10000},

            %% js_source_dir should point to a directory containing Javascript
            %% source files which will be loaded by Riak when it initializes
            %% Javascript VMs.
            %{js_source_dir, "/tmp/js_source"},

            %% http_url_encoding determines how Riak treats URL encoded
            %% buckets, keys, and links over the REST API. When set to 'on'
            %% Riak always decodes encoded values sent as URLs and Headers.
            %% Otherwise, Riak defaults to compatibility mode where links
            %% are decoded, but buckets and keys are not. The compatibility
            %% mode will be removed in a future release.
            {http_url_encoding, on},

            %% riak_stat enables the use of the "riak-admin status" command to
            %% retrieve information the Riak node for performance and debugging 
needs
            {riak_kv_stat, true},
            {legacy_stats, false},

            %% Switch to vnode-based vclocks rather than client ids.  This
            %% significantly reduces the number of vclock entries.
            %% Only set true if *all* nodes in the cluster are upgraded to 1.0
            {vnode_vclocks, true},

            %% This option enables compatability of bucket and key listing
            %% with 0.14 and earlier versions. Once a rolling upgrade to
            %% a version > 0.14 is completed for a cluster, this should be
            %% set to false for improved performance for bucket and key
            %% listing operations.
            {legacy_keylisting, false}
           ]},

 %% Riak Search Config
 {riak_search, [
                %% To enable Search functionality set this 'true'.
                {enabled, true}
               ]},

 %% Merge Index Config
 {merge_index, [
                %% The root dir to store search merge_index data
                {data_root, "/var/lib/riak/merge_index"},

                %% The root dir to store secondary index merge_index data
                {data_root_2i, "/var/lib/riak/merge_index_2i"},

                %% Size, in bytes, of the in-memory buffer.  When this
                %% threshold has been reached the data is transformed
                %% into a segment file which resides on disk.
                {buffer_rollover_size, 1048576},

                %% Overtime the segment files need to be compacted.
                %% This is the maximum number of segments that will be
                %% compacted at once.  A lower value will lead to
                %% quicker but more frequent compactions.
                {max_compact_segments, 20}
               ]},

 %% Bitcask Config
 {bitcask, [
             {data_root, "/var/lib/riak/bitcask"}
           ]},

 %% eLevelDB Config
 {eleveldb, [
             {data_root, "/var/lib/riak/leveldb"}
            ]},

 %% Luwak Config
 {luwak, [
             {enabled, false}
         ]},

{lager, [
            %% What handlers to install with what arguments
            {handlers, [
                {lager_console_backend, info},
                {lager_file_backend, [
                    {"/var/log/riak/error.log", error},
                    {"/var/log/riak/console.log", info}
                ]}
            ]},
            %% Whether to write a crash log, and where.
            %% Commented/omitted/undefined means no crash logger.
            {crash_log, "/var/log/riak/crash.log"},
            %% Maximum size in bytes of events in the crash log.
            %% Default is 64kb.
            {crash_log_size, 65536},
            %% Whether to redirect error_logger messages into lager - defaults 
to true
            {error_logger_redirect, true}
        ]},

 %% riak_sysmon config
 {riak_sysmon, [
         %% To disable forwarding events of a particular type, use a
         %% limit of 0.
         {process_limit, 30},
         {port_limit, 30},

         %% Finding reasonable limits for a given workload is a matter
         %% of experimentation.
         {gc_ms_limit, 50},
         {heap_word_limit, 10485760}
        ]},

 %% SASL config
 {sasl, [
         {sasl_error_logger, false}
        ]}
].
## Name of the riak node
-name riak@10.232.5.169

## Cookie for distributed erlang.  All nodes in the same cluster
## should use the same cookie or they will not be able to communicate.
-setcookie riak

## Heartbeat management; auto-restarts VM if it dies or becomes unresponsive
## (Disabled by default..use with caution!)
##-heart

## Enable kernel poll and a few async threads
+K true
+A 64

## Treat error_logger warnings as warnings
+W w

## Increase number of concurrent ports/sockets
-env ERL_MAX_PORTS 4096

## Tweak GC to run more often 
-env ERL_FULLSWEEP_AFTER 0

## Set the location of crash dumps
-env ERL_CRASH_DUMP /var/log/riak/erl_crash.dump

## Begin SSL distribution items, DO NOT DELETE OR EDIT THIS COMMENT

## To enable SSL encryption of the Erlang intra-cluster communication,
## un-comment the three lines below and make certain that the paths
## point to correct PEM data files.  See docs TODO for details.

## -proto_dist inet_ssl
## -ssl_dist_opt client_certfile "/etc/riak/erlclient.pem"
## -ssl_dist_opt server_certfile "/etc/riak/erlserver.pem"

## End SSL distribution items, DO NOT DELETE OR EDIT THIS COMMENT


_______________________________________________
riak-users mailing list
riak-users@lists.basho.com
http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com

Reply via email to