Hi Everyone, Can anyone tell me if there is anything wrong with my code flow below ? Based on each element from the text file I would like to run a query against Hive table and persist results in another Hive table. I want to do this in parallel for each element in the file. I appreciate any of your inputs on this.
$ cat /home/ajay/flds.txt PHARMY_NPI_ID ALT_SUPPLIER_STORE_NBR MAIL_SERV_NBR spark-shell --name hivePersistTest --master yarn --deploy-mode client val dataElementsFile = "/home/ajay/flds.txt" val dataElements = Source.fromFile(dataElementsFile).getLines.toArray def calculateQuery (de: String) : DataFrame = { val calculatedQuery = sqlContext.sql("select 'UDA' as ds_nm, cyc_dt, supplier_proc_i as supplier_proc_id, '" + de + "' as data_elm, " + de + " as data_elm_val," + " count(1) as derx_val_cnt, current_timestamp as load_dt " + "from SPRINT2_TEST2 " + "group by 'UDA', cyc_dt, supplier_proc_i, '" + de + "' , " + de + " ") return calculatedQuery } def persistResults (calculatedQuery: DataFrame) = { calculatedQuery.write.insertInto("sprint2_stp1_test2") } dataElements.map(calculateQuery).foreach(persistResults) Thanks.