Hi, I am using below code to trigger spark job from remote jvm.
import org.apache.hadoop.conf.Configuration; import org.apache.spark.SparkConf; import org.apache.spark.deploy.yarn.Client; import org.apache.spark.deploy.yarn.ClientArguments; /** * @version 1.0, 15-Jul-2015 * @author ankit */ public class QueryEngineImpl implements IQueryEngine { SparkSqlEngine sqlEngine; public QueryEngineImpl(SparkSqlEngine sparkSqlEngine) { this.sqlEngine = sparkSqlEngine; } @Override public void executeQuery(String query, String resultLocation, String... parquetFileLocation) { // TODO Auto-generated method stub String[] args = new String[] { // the name of your application "--name", "RemoteJVM", // memory for driver (optional) "--driver-memory", "1000M", // path to your application's JAR file // required in yarn-cluster mode "--jar", "hdfs:// 52.24.76.10:9000/ankit/SPARKSQLPOC-0.0.1-SNAPSHOT-jar-with-dependencies.jar ", // name of your application's main class (required) "--class", "SparkSqlEngine", // argument 1 to your Spark program (SparkFriendRecommendation) "--arg", query, // argument 2 to your Spark program (SparkFriendRecommendation) "--arg", resultLocation, // argument 3 to your Spark program (SparkFriendRecommendation) "--arg", parquetFileLocation[0], "--arg", "yarn-cluster" }; Configuration conf = new Configuration(); conf.set("yarn.resourcemanager.address", "52.24.76.10:9022"); conf.set("HADOOP_HOME", "/home/hadoop"); System.setProperty("SPARK_YARN_MODE", "true"); SparkConf sparkConf = new SparkConf(); System.out.println("SPARK CONF" + sparkConf.toDebugString()); // create ClientArguments, which will be passed to Client org.apache.spark.deploy.yarn.ClientArguments cArgs = new ClientArguments(args, sparkConf); //create a insntance of yarn client Client client = new Client(cArgs, conf, sparkConf); client.run(); } public static void main(String[] args) { QueryEngineImpl impl = new QueryEngineImpl(null); impl.executeQuery("select count(*) from parquetTable", "/tmp/ankit.txt", "s3n://AKIAJPLOFN3DM27DIIUQ:zKsFTopwgmu4zNdAfZ5Xe+Qe0XtbegHLTgy629VB@hadoop-poc-ashish /parquet"); } } But I am getting below exception. *23:08:09.268 [main] WARN org.apache.hadoop.hdfs.DFSClient - Failed to connect to /172.31.24.27:9200 <http://172.31.24.27:9200> for block, add to deadNodes and continue. org.apache.hadoop.net.ConnectTimeoutException: 60000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=/172.31.24.27:9200 <http://172.31.24.27:9200>]* org.apache.hadoop.net.ConnectTimeoutException: 60000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=/172.31.24.27:9200 ] at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:532) ~[hadoop-common-2.2.0.jar:na] at org.apache.hadoop.hdfs.DFSInputStream.newTcpPeer(DFSInputStream.java:955) *23:08:09.269 [main] WARN org.apache.hadoop.hdfs.DFSClient - DFS Read* *org.apache.hadoop.hdfs.BlockMissingException: Could not obtain block: BP-511626939-172.31.24.27-1436185102368:blk_1073741964_126634 file=/ankit/SPARKSQLPOC-0.0.1-SNAPSHOT-jar-with-dependencies.jar* * at org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:838) [hadoop-hdfs-2.2.0.jar:na]* * at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:526) [hadoop-hdfs-2.2.0.jar:na]* at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:749) [hadoop-hdfs-2.2.0.jar:na] at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:793) [hadoop-hdfs-2.2.0.jar:na] at java.io.DataInputStream.read(DataInputStream.java:100) [na:1.7.0_80] Please suggest me is there any other way to trigger job from remote jvm??