I generally have to put them in my hadoop classpath or hive-0.8.0/auxlib folder. I never quite understood why buy the classpath that reads the data seems different from the classpath that processes the data. Hence the distrinction between add jar and auxlib.
Edward On Tue, Jan 17, 2012 at 6:39 AM, Bing Li <smallpu...@gmail.com> wrote: > My Steps: > I define a class "public class myInputFormat extends TextInputFormat > implements JobConfigurable" to specify input format. > > hive> add jar /home/biadmin/hiveudf/myFileFormat.jar; > Added /home/biadmin/hiveudf/myFileFormat.jar to class path > Added resource: /home/biadmin/hiveudf/myFileFormat.jar > > hive> list jars; > /home/biadmin/hiveudf/myFileFormat.jar > > hive> create table IOtable(str1 string, str2 string, str3 string) stored > as INPUTFORMAT 'com.mytest.fileformat.myInputFormat' OUTPUTFORMAT > 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' ; > OK > Time taken: 0.081 seconds > > hive> load data local inpath '/home/biadmin/hivetbl/IOtable_data.txt' into > table IOtable; > Copying data from file:/home/biadmin/hivetbl/IOtable_data.txt > Copying file: file:/home/biadmin/hivetbl/IOtable_data.txt > Loading data to table default.iotable > OK > Time taken: 0.147 seconds > > hive> select * from IOtable; > OK > Failed with exception java.io.IOException:java.io.IOException: Cannot > create an instance of InputFormat class com.mytest.fileformat.myInputFormat > as specified in mapredWork! > Time taken: 0.059 seconds > > > > > *Here is my source code :* > =============================== > package com.mytest.fileformat; > > import java.io.IOException; > > import org.apache.hadoop.io.LongWritable; > import org.apache.hadoop.io.Text; > import org.apache.commons.logging.Log; > import org.apache.commons.logging.LogFactory; > import org.apache.hadoop.mapred.FileSplit; > import org.apache.hadoop.mapred.InputSplit; > import org.apache.hadoop.mapred.JobConf; > import org.apache.hadoop.mapred.JobConfigurable; > import org.apache.hadoop.mapred.LineRecordReader; > import org.apache.hadoop.mapred.RecordReader; > import org.apache.hadoop.mapred.Reporter; > import org.apache.hadoop.mapred.InputFormat; > import org.apache.hadoop.mapred.TextInputFormat; > > @SuppressWarnings("deprecation") > public class myInputFormat extends TextInputFormat implements > JobConfigurable { > TextInputFormat format; > JobConf job; > > public myInputFormat() { > format = new TextInputFormat(); > } > > @Override > public void configure(JobConf job) { > this.job = job; > format.configure(job); > } > public RecordReader<LongWritable, Text> getRecordReader( > InputSplit genericSplit, JobConf job, Reporter reporter) > throws IOException { > > reporter.setStatus(genericSplit.toString()); > return new myLineRecordReader(job, (FileSplit) genericSplit); > } > > > public static class myLineRecordReader implements > RecordReader<LongWritable, Text> { > LineRecordReader lineReader; > LongWritable lineKey; > Text lineValue; > > public myLineRecordReader(JobConf job, FileSplit split) throws > IOException { > lineReader = new LineRecordReader(job, split); > lineKey = lineReader.createKey(); > lineValue = lineReader.createValue(); > } > > public boolean next(LongWritable key, Text value) throws IOException > { > while (lineReader.next(lineKey, lineValue)) { > String strReplace = lineValue.toString().toLowerCase().replace( > "$$$$" , "\001" ); > Text txtReplace = new Text(); > txtReplace.set(strReplace); > value.set(txtReplace.getBytes(), 0, txtReplace.getLength()); > return true ; > } > // no more data > return false; > } /** end next **/ > > > public LongWritable createKey() { > return lineReader.createKey(); > } > public Text createValue() { > return lineReader.createValue(); > } > public long getPos() throws IOException{ > return lineReader.getPos(); > } > public float getProgress() throws IOException{ > return lineReader.getProgress(); > } > public void close() throws IOException{ > lineReader.close(); > } > } /** end class myLineRecordReader **/ > } >