Modified HadoopWriterOperatorDescriptor. The operator previously used to create Sequence files by opening FSDataOutputStream. Though this results in correct creation of the sequence file, it is still better to have it done by obtaining a SequenceFile writer from the outputFormat. git-svn-id: https://hyracks.googlecode.com/svn/trunk@202 123451ca-8445-de46-9d55-352943316053

commit: 8062d1e72d1f2220600c16b582b636028e0de045 [log] [tgz]
author: RamanGrover29 <RamanGrover29@123451ca-8445-de46-9d55-352943316053> Tue Nov 16 22:07:56 2010 +0000
committer: RamanGrover29 <RamanGrover29@123451ca-8445-de46-9d55-352943316053> Tue Nov 16 22:07:56 2010 +0000
tree: 48b9b9084d0f934f70d65471b04638288c364a90
parent: 9fb3e4253435bb9f66307c054a9411fa3efdd7ad [diff]
diff --git a/hyracks/hyracks-dataflow-hadoop/src/main/java/edu/uci/ics/hyracks/dataflow/hadoop/HadoopWriteOperatorDescriptor.java b/hyracks/hyracks-dataflow-hadoop/src/main/java/edu/uci/ics/hyracks/dataflow/hadoop/HadoopWriteOperatorDescriptor.java
index d9b41ce..95a4647 100644
--- a/hyracks/hyracks-dataflow-hadoop/src/main/java/edu/uci/ics/hyracks/dataflow/hadoop/HadoopWriteOperatorDescriptor.java
+++ b/hyracks/hyracks-dataflow-hadoop/src/main/java/edu/uci/ics/hyracks/dataflow/hadoop/HadoopWriteOperatorDescriptor.java

@@ -16,18 +16,13 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.io.OutputStream;
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.SequenceFile.Writer;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
@@ -41,23 +36,18 @@
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.ReflectionUtils;
 
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
 import edu.uci.ics.hyracks.api.job.JobSpecification;
 import edu.uci.ics.hyracks.dataflow.hadoop.util.DatatypeHelper;
 import edu.uci.ics.hyracks.dataflow.std.file.AbstractFileWriteOperatorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
 import edu.uci.ics.hyracks.dataflow.std.file.IRecordWriter;
-import edu.uci.ics.hyracks.dataflow.std.file.RecordWriter;
 
 public class HadoopWriteOperatorDescriptor extends AbstractFileWriteOperatorDescriptor {
 
-    private static String nullWritableClassName = NullWritable.class.getName();
-
     private  class HadoopFileWriter implements IRecordWriter {
 
         Object recordWriter;
         JobConf conf;
-        final boolean useNewMapReduceLib;
         Path finalOutputFile;
         Path tempOutputFile;
       
@@ -67,12 +57,11 @@
             this.conf = conf;
             this.finalOutputFile = outputFile;
             this.tempOutputFile = tempOutputFile;
-            useNewMapReduceLib = conf.getUseNewMapper();
         }
 
         @Override
         public void write(Object[] record) throws Exception {
-            if (useNewMapReduceLib){
+            if (conf.getUseNewMapper()){
                 ((org.apache.hadoop.mapreduce.RecordWriter)recordWriter).write(record[0], record[1]);
             } else {
                 ((org.apache.hadoop.mapred.RecordWriter)recordWriter).write(record[0], record[1]);
@@ -82,12 +71,12 @@
         @Override
         public void close() {
             try {
-                if (useNewMapReduceLib){
+                if (conf.getUseNewMapper()){
                    ((org.apache.hadoop.mapreduce.RecordWriter)recordWriter).close(new TaskAttemptContext(conf, new TaskAttemptID()));
                 } else {
                     ((org.apache.hadoop.mapred.RecordWriter)recordWriter).close(null);
-                    FileSystem.get(conf).rename( tempOutputFile, finalOutputFile);
                 }
+                FileSystem.get(conf).rename( tempOutputFile, finalOutputFile);
             } catch (Exception e) {
                 e.printStackTrace();
             }
@@ -137,46 +126,34 @@
         Path finalOutputFile = null;
         checkIfCanWriteToHDFS(new FileSplit[] { fileSplit });
         Object recordWriter  = null;
-        boolean sequenceFileOutput = false;
         Object outputFormat = null;
+        String taskAttempId = new TaskAttemptID().toString();
+        conf.set("mapred.task.id",taskAttempId);
+        outputPath = new Path(conf.get("mapred.output.dir"));
+        outputTempPath = new Path(outputPath,"_temporary");
+        if(outputPath != null && !fileSystem.exists(outputPath)) {
+            fileSystem.mkdirs(outputTempPath);
+        }
+        String suffix =  new String("part-r-00000");
+        suffix = new String(suffix.substring(0, suffix.length() - ("" + index).length()));
+        suffix = suffix + index;
+        tempOutputFile = new Path(outputTempPath,"_" + taskAttempId + "/" + suffix);
+        if (conf.getNumReduceTasks() == 0 ) {
+            suffix.replace("-r-", "-m-");
+        }
+        finalOutputFile = new Path(outputPath,suffix);
         if(conf.getUseNewMapper()){
             org.apache.hadoop.mapreduce.OutputFormat newOutputFormat = (org.apache.hadoop.mapreduce.OutputFormat)ReflectionUtils.newInstance((new JobContext(conf,null)).getOutputFormatClass(),conf);
-            if(newOutputFormat instanceof org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat) {
-                 sequenceFileOutput = true;
-            }else{
                 recordWriter = newOutputFormat.getRecordWriter(new TaskAttemptContext(conf, new TaskAttemptID()));
-            }    
         }else {
-            if(conf.getOutputFormat() instanceof SequenceFileOutputFormat) {
-                sequenceFileOutput = true;
-            } else {
-                String taskAttempId = new TaskAttemptID().toString();
-                conf.set("mapred.task.id",taskAttempId);
-                outputPath = new Path(conf.get("mapred.output.dir"));
-                outputTempPath = new Path(outputPath,"_temporary");
-                if(outputPath != null && !fileSystem.exists(outputPath)) {
-                    fileSystem.mkdirs(outputTempPath);
-                }
-                String suffix = new String("part-00000");
-                suffix = new String(suffix.substring(0, suffix.length() - ("" + index).length()));
-                suffix = suffix + index;
-                tempOutputFile = new Path(outputTempPath,"_" + taskAttempId + "/" + suffix);
-                finalOutputFile = new Path(outputPath,suffix);
-                recordWriter = conf.getOutputFormat().getRecordWriter(fileSystem, conf,suffix, new Progressable() {
-                @Override
-                public void progress() {}
-                });
-            }
+           recordWriter = conf.getOutputFormat().getRecordWriter(fileSystem, conf,suffix, new Progressable() {
+           @Override
+           public void progress() {}
+           });
         }
-        if(!sequenceFileOutput) {
-            return new HadoopFileWriter(recordWriter,tempOutputFile,finalOutputFile,conf);
-        } else { 
-            Class keyClass = Class.forName(conf.getOutputKeyClass().getName());
-            Class valueClass = Class.forName(conf.getOutputValueClass().getName());
-            conf.setClass("mapred.output.compression.codec", GzipCodec.class, CompressionCodec.class);
-            Writer writer = SequenceFile.createWriter(fileSystem, conf, path, keyClass, valueClass);
-            return new HadoopSequenceWriter(writer);    
-        }   
+        
+        
+        return new HadoopFileWriter(recordWriter, tempOutputFile, finalOutputFile, conf);
     }
commit	8062d1e72d1f2220600c16b582b636028e0de045	[log] [tgz]
author	RamanGrover29 <RamanGrover29@123451ca-8445-de46-9d55-352943316053>	Tue Nov 16 22:07:56 2010 +0000
committer	RamanGrover29 <RamanGrover29@123451ca-8445-de46-9d55-352943316053>	Tue Nov 16 22:07:56 2010 +0000
tree	48b9b9084d0f934f70d65471b04638288c364a90
parent	9fb3e4253435bb9f66307c054a9411fa3efdd7ad [diff]