Merged -r 438:524 from trunk into branch git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_indexes@525 123451ca-8445-de46-9d55-352943316053

commit: bb6a829533be265692e93e4763f54df2dca88f33 [log] [tgz]
author: salsubaiee <salsubaiee@123451ca-8445-de46-9d55-352943316053> Thu Aug 04 19:14:38 2011 +0000
committer: salsubaiee <salsubaiee@123451ca-8445-de46-9d55-352943316053> Thu Aug 04 19:14:38 2011 +0000
tree: fb2699af953db513f1862391455b09118df960a3
parent: 7c4db76fc323b3b8405898298ea43ab9732ca72e [diff]
diff --git a/hyracks-hadoop-compat/pom.xml b/hyracks-hadoop-compat/pom.xml
index 26b7bd6..44eb842 100644
--- a/hyracks-hadoop-compat/pom.xml
+++ b/hyracks-hadoop-compat/pom.xml

@@ -2,12 +2,12 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>edu.uci.ics.hyracks</groupId>
   <artifactId>hyracks-hadoop-compat</artifactId>
-  <version>0.1.5</version>
+  <version>0.1.7-SNAPSHOT</version>
 
   <parent>
     <groupId>edu.uci.ics.hyracks</groupId>
     <artifactId>hyracks</artifactId>
-    <version>0.1.5</version>
+    <version>0.1.7-SNAPSHOT</version>
   </parent>
 
   <build>
@@ -79,7 +79,7 @@
     <dependency>
     	<groupId>edu.uci.ics.hyracks</groupId>
     	<artifactId>hyracks-dataflow-hadoop</artifactId>
-    	<version>0.1.5</version>
+    	<version>0.1.7-SNAPSHOT</version>
     	<type>jar</type>
     	<scope>compile</scope>
     </dependency>

diff --git a/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/client/HyracksClient.java b/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/client/HyracksClient.java
index e4daf0b..a363221 100644
--- a/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/client/HyracksClient.java
+++ b/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/client/HyracksClient.java

@@ -1,93 +1,91 @@
 package edu.uci.ics.hyracks.hadoop.compat.client;
 
-import java.io.File;
-import java.util.List;
+import java.util.EnumSet;
+import java.util.HashSet;
 import java.util.Properties;
 import java.util.Set;
 import java.util.UUID;
 
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hyracks.hadoop.compat.util.ConfigurationConstants;
-import edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter;
-import edu.uci.ics.hyracks.hadoop.compat.util.Utilities;
-import edu.uci.ics.hyracks.hadoop.compat.client.HyracksRunningJob;
 import edu.uci.ics.hyracks.api.client.HyracksRMIConnection;
+import edu.uci.ics.hyracks.api.job.JobFlag;
 import edu.uci.ics.hyracks.api.job.JobSpecification;
 import edu.uci.ics.hyracks.api.job.JobStatus;
+import edu.uci.ics.hyracks.hadoop.compat.util.ConfigurationConstants;
+import edu.uci.ics.hyracks.hadoop.compat.util.Utilities;
 
 public class HyracksClient {
 
-    private HadoopAdapter hadoopAdapter;
-    private static HyracksRMIConnection connection;
-    private static final String applicationName = "CompatibilityLayer";
+	private static HyracksRMIConnection connection;
+	private static final String jobProfilingKey = "jobProfilingKey";
+	Set<String> systemLibs;
 
-    public HyracksClient(String clusterConf) throws Exception {
-        Properties properties = Utilities.getProperties(clusterConf, '=');
-        String clusterController = (String) properties.get(ConfigurationConstants.clusterControllerHost);
-        String fileSystem = (String) properties.get(ConfigurationConstants.namenodeURL);
-        initialize(clusterController, fileSystem);
-    }
+	public HyracksClient(Properties clusterProperties) throws Exception {
+		initialize(clusterProperties);
+	}
 
-    public HyracksClient(String clusterControllerAddr, String fileSystem) throws Exception {
-        initialize(clusterControllerAddr, fileSystem);
-    }
+	private void initialize(Properties properties) throws Exception {
+		String clusterController = (String) properties
+				.get(ConfigurationConstants.clusterControllerHost);
+		connection = new HyracksRMIConnection(clusterController, 1099);
+		systemLibs = new HashSet<String>();
+		for (String systemLib : ConfigurationConstants.systemLibs) {
+			String systemLibPath = properties.getProperty(systemLib);
+			if (systemLibPath != null) {
+				systemLibs.add(systemLibPath);
+			}
+		}
+	}
 
-    private void initialize(String clusterControllerAddr, String namenodeUrl) throws Exception {
-        connection = new HyracksRMIConnection(clusterControllerAddr, 1099);
-        connection.destroyApplication(applicationName);
-        hadoopAdapter = new HadoopAdapter(namenodeUrl);
-    }
+	public HyracksClient(String clusterConf, char delimiter) throws Exception {
+		Properties properties = Utilities.getProperties(clusterConf, delimiter);
+		initialize(properties);
+	}
 
-    public HyracksRunningJob submitJobs(List<JobConf> confs, Set<String> requiredLibs) throws Exception {
-        JobSpecification spec = hadoopAdapter.getJobSpecification(confs);
-        String appName  = getApplicationNameHadoopJob(confs.get(0));
-        return submitJob(appName,spec, requiredLibs);
-    }
+	private Set<String> getRequiredLibs(Set<String> userLibs) {
+		Set<String> requiredLibs = new HashSet<String>();
+		for (String systemLib : systemLibs) {
+			requiredLibs.add(systemLib);
+		}
+		for (String userLib : userLibs) {
+			requiredLibs.add(userLib);
+		}
+		return requiredLibs;
+	}
 
-    private String getApplicationNameHadoopJob(JobConf jobConf) {
-        String jar = jobConf.getJar();
-        if( jar != null){
-            return jar.substring(jar.lastIndexOf("/") >=0 ? jar.lastIndexOf("/") +1 : 0);
-        }else {
-            return "" + System.currentTimeMillis();
-        }
-    }
-    
-    public HyracksRunningJob submitJob(JobConf conf, Set<String> requiredLibs) throws Exception {
-        JobSpecification spec = hadoopAdapter.getJobSpecification(conf);
-        String appName  = getApplicationNameHadoopJob(conf);
-        return submitJob(appName, spec, requiredLibs);
-    }
+	public JobStatus getJobStatus(UUID jobId) throws Exception {
+		return connection.getJobStatus(jobId);
+	}
 
-    public JobStatus getJobStatus(UUID jobId) throws Exception {
-        return connection.getJobStatus(jobId);
-    }
+	private void createApplication(String applicationName, Set<String> userLibs)
+			throws Exception {
+		connection.createApplication(applicationName, Utilities
+				.getHyracksArchive(applicationName, getRequiredLibs(userLibs)));
+	}
 
-    public HyracksRunningJob submitJob(String applicationName, JobSpecification spec, Set<String> requiredLibs) throws Exception {
-        UUID jobId = null;
-        try {
-            jobId = connection.createJob(applicationName, spec);
-        } catch (Exception e){
-            System.out.println(" application not found, creating application" + applicationName);
-            connection.createApplication(applicationName, Utilities.getHyracksArchive(applicationName, requiredLibs));
-            jobId = connection.createJob(applicationName, spec);
-        }
-        connection.start(jobId);
-        HyracksRunningJob runningJob = new HyracksRunningJob(jobId, spec, this);
-        return runningJob;
-    }
+	public HyracksRunningJob submitJob(String applicationName,
+			JobSpecification spec) throws Exception {
+		String jobProfilingVal = System.getenv(jobProfilingKey);
+		boolean doProfiling = ("true".equalsIgnoreCase(jobProfilingVal));
+		UUID jobId;
+		if (doProfiling) {
+			System.out.println("PROFILING");
+			jobId = connection.createJob(applicationName, spec, EnumSet
+					.of(JobFlag.PROFILE_RUNTIME));
+		} else {
+			jobId = connection.createJob(applicationName, spec);
+		}
+		connection.start(jobId);
+		HyracksRunningJob runningJob = new HyracksRunningJob(jobId, spec, this);
+		return runningJob;
+	}
 
-    public HadoopAdapter getHadoopAdapter() {
-        return hadoopAdapter;
-    }
+	public HyracksRunningJob submitJob(String applicationName,
+			JobSpecification spec, Set<String> userLibs) throws Exception {
+		createApplication(applicationName, userLibs);
+		return submitJob(applicationName, spec);
+	}
 
-    public void setHadoopAdapter(HadoopAdapter hadoopAdapter) {
-        this.hadoopAdapter = hadoopAdapter;
-    }
-
-    public void waitForCompleton(UUID jobId) throws Exception {
-        connection.waitForCompletion(jobId);
-    }
-
+	public void waitForCompleton(UUID jobId) throws Exception {
+		connection.waitForCompletion(jobId);
+	}
 }

diff --git a/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/driver/CompatibilityLayer.java b/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/driver/CompatibilityLayer.java
index 0b96041..37f4d34 100644
--- a/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/driver/CompatibilityLayer.java
+++ b/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/driver/CompatibilityLayer.java

@@ -1,5 +1,6 @@
 package edu.uci.ics.hyracks.hadoop.compat.driver;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -25,175 +26,183 @@
 
 public class CompatibilityLayer {
 
-    HyracksClient hyracksClient;
-    DCacheHandler dCacheHander = null;
-    Properties clusterConf;
-    Set<String> systemLibs;
+	HyracksClient hyracksClient;
+	DCacheHandler dCacheHander = null;
+	Properties clusterConf;
+	HadoopAdapter hadoopAdapter;
 
-    private static char configurationFileDelimiter = '=';
-    private static final String dacheKeyPrefix = "dcache.key";
+	private static char configurationFileDelimiter = '=';
+	private static final String dacheKeyPrefix = "dcache.key";
 
-    public CompatibilityLayer(CompatibilityConfig clConfig) throws Exception {
-        initialize(clConfig);
-    }
+	public CompatibilityLayer(CompatibilityConfig clConfig) throws Exception {
+		initialize(clConfig);
+	}
 
-    public HyracksRunningJob submitJobs(String[] jobFiles, Set<String> userLibs) throws Exception {
-        Set<String> requiredLibs = getRequiredLibs(userLibs);
-        List<JobConf> jobConfs = constructHadoopJobConfs(jobFiles);
-        Map<String, String> dcacheTasks = preparePreLaunchDCacheTasks(jobFiles[0]);
-        String tempDir = "/tmp";
-        if (dcacheTasks.size() > 0) {
-            HadoopAdapter hadoopAdapter = hyracksClient.getHadoopAdapter();
-            for (String key : dcacheTasks.keySet()) {
-                String destPath = tempDir + "/" + key + System.currentTimeMillis();
-                hadoopAdapter.getHDFSClient().copyToLocalFile(new Path(dcacheTasks.get(key)), new Path(destPath));
-                System.out.println(" source :" + dcacheTasks.get(key));
-                System.out.println(" dest :" + destPath);
-                System.out.println(" key :" + key);
-                System.out.println(" value :" + destPath);
-                dCacheHander.put(key, destPath);
-            }
-        }
-        HyracksRunningJob hyraxRunningJob = hyracksClient.submitJobs(jobConfs, requiredLibs);
-        return hyraxRunningJob;
-    }
+	private void initialize(CompatibilityConfig clConfig) throws Exception {
+		clusterConf = Utilities.getProperties(clConfig.clusterConf,
+				configurationFileDelimiter);
+		hadoopAdapter = new HadoopAdapter(clusterConf
+				.getProperty(ConfigurationConstants.namenodeURL));
+		hyracksClient = new HyracksClient(clusterConf);
+		dCacheHander = new DCacheHandler(clusterConf
+				.getProperty(ConfigurationConstants.dcacheServerConfiguration));
+	}
 
-    private Set<String> getRequiredLibs(Set<String> userLibs) {
-        Set<String> requiredLibs = new HashSet<String>();
-        for (String systemLib : systemLibs) {
-            requiredLibs.add(systemLib);
-        }
-        for (String userLib : userLibs) {
-            requiredLibs.add(userLib);
-        }
-        return requiredLibs;
-    }
+	public HyracksRunningJob submitJob(JobConf conf,Set<String> userLibs) throws Exception {
+		List<JobConf> jobConfs = new ArrayList<JobConf>();
+		jobConfs.add(conf);
+		String applicationName = conf.getJobName() + System.currentTimeMillis();
+		JobSpecification spec = hadoopAdapter.getJobSpecification(jobConfs);
+		HyracksRunningJob hyracksRunningJob = hyracksClient.submitJob(
+				applicationName, spec, userLibs);
+		return hyracksRunningJob; 
+	}
+	
+	public HyracksRunningJob submitJobs(String applicationName,
+			String[] jobFiles, Set<String> userLibs) throws Exception {
+		List<JobConf> jobConfs = constructHadoopJobConfs(jobFiles);
+		populateDCache(jobFiles[0]);
+		JobSpecification spec = hadoopAdapter.getJobSpecification(jobConfs);
+		HyracksRunningJob hyracksRunningJob = hyracksClient.submitJob(
+				applicationName, spec, userLibs);
+		return hyracksRunningJob;
+	}
 
-    private void initialize(CompatibilityConfig clConfig) throws Exception {
-        clusterConf = Utilities.getProperties(clConfig.clusterConf, configurationFileDelimiter);
-        systemLibs = new HashSet<String>();
-        for (String systemLib : ConfigurationConstants.systemLibs) {
-            String systemLibPath = clusterConf.getProperty(systemLib);
-            if (systemLibPath != null) {
-                systemLibs.add(systemLibPath);
-            }
-        }
-        String clusterControllerHost = clusterConf.getProperty(ConfigurationConstants.clusterControllerHost);
-        String dacheServerConfiguration = clusterConf.getProperty(ConfigurationConstants.dcacheServerConfiguration);
-        String fileSystem = clusterConf.getProperty(ConfigurationConstants.namenodeURL);
-        hyracksClient = new HyracksClient(clusterControllerHost, fileSystem);
-        try {
-            dCacheHander = new DCacheHandler(dacheServerConfiguration);
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-    }
+	public HyracksRunningJob submitJobs(String applicationName,
+			String[] jobFiles) throws Exception {
+		List<JobConf> jobConfs = constructHadoopJobConfs(jobFiles);
+		populateDCache(jobFiles[0]);
+		JobSpecification spec = hadoopAdapter.getJobSpecification(jobConfs);
+		HyracksRunningJob hyracksRunningJob = hyracksClient.submitJob(
+				applicationName, spec);
+		return hyracksRunningJob;
+	}
 
-    private Map<String, String> initializeCustomProperties(Properties properties, String prefix) {
-        Map<String, String> foundProperties = new HashMap<String, String>();
-        Set<Entry<Object, Object>> entrySet = properties.entrySet();
-        for (Entry entry : entrySet) {
-            String key = (String) entry.getKey();
-            String value = (String) entry.getValue();
-            if ((key.startsWith(prefix))) {
-                String actualKey = key.substring(prefix.length() + 1); // "cut off '<prefix>.' from the beginning"
-                foundProperties.put(actualKey, value);
-            }
-        }
-        return foundProperties;
-    }
+	private void populateDCache(String jobFile) throws IOException {
+		Map<String, String> dcacheTasks = preparePreLaunchDCacheTasks(jobFile);
+		String tempDir = "/tmp";
+		if (dcacheTasks.size() > 0) {
+			for (String key : dcacheTasks.keySet()) {
+				String destPath = tempDir + "/" + key
+						+ System.currentTimeMillis();
+				hadoopAdapter.getHDFSClient().copyToLocalFile(
+						new Path(dcacheTasks.get(key)), new Path(destPath));
+				System.out.println(" source :" + dcacheTasks.get(key));
+				System.out.println(" dest :" + destPath);
+				System.out.println(" key :" + key);
+				System.out.println(" value :" + destPath);
+				dCacheHander.put(key, destPath);
+			}
+		}
+	}
 
-    public Map<String, String> preparePreLaunchDCacheTasks(String jobFile) {
-        Properties jobProperties = Utilities.getProperties(jobFile, ',');
-        Map<String, String> dcacheTasks = new HashMap<String, String>();
-        Map<String, String> dcacheKeys = initializeCustomProperties(jobProperties, dacheKeyPrefix);
-        for (String key : dcacheKeys.keySet()) {
-            String sourcePath = dcacheKeys.get(key);
-            if (sourcePath != null) {
-                dcacheTasks.put(key, sourcePath);
-            }
-        }
-        return dcacheTasks;
-    }
+	private String getApplicationNameForHadoopJob(JobConf jobConf) {
+		String jar = jobConf.getJar();
+		if (jar != null) {
+			return jar.substring(jar.lastIndexOf("/") >= 0 ? jar
+					.lastIndexOf("/") + 1 : 0);
+		} else {
+			return "" + System.currentTimeMillis();
+		}
+	}
 
-    public void waitForCompletion(UUID jobId) throws Exception {
-        hyracksClient.waitForCompleton(jobId);
-    }
+	private Map<String, String> initializeCustomProperties(
+			Properties properties, String prefix) {
+		Map<String, String> foundProperties = new HashMap<String, String>();
+		Set<Entry<Object, Object>> entrySet = properties.entrySet();
+		for (Entry entry : entrySet) {
+			String key = (String) entry.getKey();
+			String value = (String) entry.getValue();
+			if ((key.startsWith(prefix))) {
+				String actualKey = key.substring(prefix.length() + 1); // "cut off '<prefix>.' from the beginning"
+				foundProperties.put(actualKey, value);
+			}
+		}
+		return foundProperties;
+	}
 
-    public HyracksRunningJob submitHadoopJobToHyrax(JobConf jobConf, Set<String> userLibs) {
-        HyracksRunningJob hyraxRunningJob = null;
-        List<JobConf> jobConfs = new ArrayList<JobConf>();
-        jobConfs.add(jobConf);
-        try {
-            hyraxRunningJob = hyracksClient.submitJobs(jobConfs, getRequiredLibs(userLibs));
-            System.out.println(" Result in " + jobConf.get("mapred.output.dir"));
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return hyraxRunningJob;
-    }
+	public Map<String, String> preparePreLaunchDCacheTasks(String jobFile) {
+		Properties jobProperties = Utilities.getProperties(jobFile, ',');
+		Map<String, String> dcacheTasks = new HashMap<String, String>();
+		Map<String, String> dcacheKeys = initializeCustomProperties(
+				jobProperties, dacheKeyPrefix);
+		for (String key : dcacheKeys.keySet()) {
+			String sourcePath = dcacheKeys.get(key);
+			if (sourcePath != null) {
+				dcacheTasks.put(key, sourcePath);
+			}
+		}
+		return dcacheTasks;
+	}
 
-    public HyracksRunningJob submitJob(String appName, JobSpecification jobSpec, Set<String> userLibs) {
-        HyracksRunningJob hyraxRunningJob = null;
-        try {
-            hyraxRunningJob = hyracksClient.submitJob(appName, jobSpec, getRequiredLibs(userLibs));
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return hyraxRunningJob;
-    }
+	public void waitForCompletion(UUID jobId) throws Exception {
+		hyracksClient.waitForCompleton(jobId);
+	}
 
-    private List<JobConf> constructHadoopJobConfs(String[] jobFiles) throws Exception {
-        List<JobConf> jobConfs = new ArrayList<JobConf>();
-        for (String jobFile : jobFiles) {
-            jobConfs.add(constructHadoopJobConf(jobFile));
-        }
-        return jobConfs;
-    }
+	private List<JobConf> constructHadoopJobConfs(String[] jobFiles)
+			throws Exception {
+		List<JobConf> jobConfs = new ArrayList<JobConf>();
+		for (String jobFile : jobFiles) {
+			jobConfs.add(constructHadoopJobConf(jobFile));
+		}
+		return jobConfs;
+	}
 
-    private JobConf constructHadoopJobConf(String jobFile) {
-        Properties jobProperties = Utilities.getProperties(jobFile, '=');
-        JobConf conf = hyracksClient.getHadoopAdapter().getConf();
-        for (Entry entry : jobProperties.entrySet()) {
-            conf.set((String) entry.getKey(), (String) entry.getValue());
-            System.out.println((String) entry.getKey() + " : " + (String) entry.getValue());
-        }
-        return conf;
-    }
+	private JobConf constructHadoopJobConf(String jobFile) {
+		Properties jobProperties = Utilities.getProperties(jobFile, '=');
+		JobConf conf = new JobConf(hadoopAdapter.getConf());
+		for (Entry entry : jobProperties.entrySet()) {
+			conf.set((String) entry.getKey(), (String) entry.getValue());
+			System.out.println((String) entry.getKey() + " : "
+					+ (String) entry.getValue());
+		}
+		return conf;
+	}
 
-    private String[] getJobs(CompatibilityConfig clConfig) {
-        return clConfig.jobFiles == null ? new String[0] : clConfig.jobFiles.split(",");
-    }
+	private String[] getJobs(CompatibilityConfig clConfig) {
+		return clConfig.jobFiles == null ? new String[0] : clConfig.jobFiles
+				.split(",");
+	}
 
-    public static void main(String args[]) throws Exception {
-        long startTime = System.nanoTime();
-        CompatibilityConfig clConfig = new CompatibilityConfig();
-        CmdLineParser cp = new CmdLineParser(clConfig);
-        try {
-            cp.parseArgument(args);
-        } catch (Exception e) {
-            System.err.println(e.getMessage());
-            cp.printUsage(System.err);
-            return;
-        }
-        CompatibilityLayer compatLayer = new CompatibilityLayer(clConfig);
-        String[] jobFiles = compatLayer.getJobs(clConfig);
-        String[] tempUserLibs = clConfig.userLibs == null ? new String[0] : clConfig.userLibs.split(",");
-        Set<String> userLibs = new HashSet<String>();
-        for(String userLib : tempUserLibs) {
-            userLibs.add(userLib);
-        }
-        HyracksRunningJob hyraxRunningJob = null;
-        try {
-            hyraxRunningJob = compatLayer.submitJobs(jobFiles, userLibs);
-            compatLayer.waitForCompletion(hyraxRunningJob.getJobId());
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        hyraxRunningJob.waitForCompletion();
-        long end_time = System.nanoTime();
-        System.out.println("TOTAL TIME (from Launch to Completion):" + ((end_time - startTime) / (float) 1000000000.0)
-                + " seconds.");
-    }
-
+	public static void main(String args[]) throws Exception {
+		long startTime = System.nanoTime();
+		CompatibilityConfig clConfig = new CompatibilityConfig();
+		CmdLineParser cp = new CmdLineParser(clConfig);
+		try {
+			cp.parseArgument(args);
+		} catch (Exception e) {
+			System.err.println(e.getMessage());
+			cp.printUsage(System.err);
+			return;
+		}
+		CompatibilityLayer compatLayer = new CompatibilityLayer(clConfig);
+		String applicationName = clConfig.applicationName;
+		String[] jobFiles = compatLayer.getJobs(clConfig);
+		String[] userLibraries = null;
+		if (clConfig.userLibs != null) {
+			userLibraries = clConfig.userLibs.split(",");
+		}
+		try {
+			HyracksRunningJob hyraxRunningJob = null;
+			if (userLibraries != null) {
+				Set<String> userLibs = new HashSet<String>();
+				for (String userLib : userLibraries) {
+					userLibs.add(userLib);
+				}
+				hyraxRunningJob = compatLayer.submitJobs(applicationName,
+						jobFiles, userLibs);
+			} else {
+				hyraxRunningJob = compatLayer.submitJobs(applicationName,
+						jobFiles);
+			}
+			compatLayer.waitForCompletion(hyraxRunningJob.getJobId());
+			long end_time = System.nanoTime();
+			System.out.println("TOTAL TIME (from Launch to Completion):"
+					+ ((end_time - startTime) / (float) 1000000000.0)
+					+ " seconds.");
+		} catch (Exception e) {
+			e.printStackTrace();
+			throw e;
+		}
+	}
 }

diff --git a/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/util/CompatibilityConfig.java b/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/util/CompatibilityConfig.java
index 1dd266f..6d94bc7 100644
--- a/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/util/CompatibilityConfig.java
+++ b/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/util/CompatibilityConfig.java

@@ -4,17 +4,20 @@
 
 public class CompatibilityConfig {
 
-    @Option(name = "-cluster", required = true, usage = "Defines the path to the configuration file that provides the following info: +"
-            + " (1) Address of HyracksClusterController service" + " (2) Address of Hadoop namenode service")
-    public String clusterConf;
+	@Option(name = "-cluster", required = true, usage = "Defines the path to the configuration file that provides the following info: +"
+			+ " (1) Address of HyracksClusterController service"
+			+ " (2) Address of Hadoop namenode service")
+	public String clusterConf;
 
-    @Option(name = "-jobFiles", usage = "Comma separated list of jobFiles. "
-            + "Each job file defines the hadoop job + " + "The order in the list defines the sequence in which"
-            + "the jobs are to be executed")
-    public String jobFiles;
+	@Option(name = "-jobFiles", usage = "Comma separated list of jobFiles. "
+			+ "Each job file defines the hadoop job + "
+			+ "The order in the list defines the sequence in which"
+			+ "the jobs are to be executed")
+	public String jobFiles;
 
-    @Option(name = "-userLibs", usage = " A comma separated list of jar files that are required to be addedd to classpath when running "
-            + " mappers/reducers etc ")
-    public String userLibs;
+	@Option(name = "-applicationName", usage = " The application as part of which the job executes")
+	public String applicationName;
 
+	@Option(name = "-userLibs", usage = " A comma separated list of jar files that are required to be addedd to classpath when running ")
+	public String userLibs;
 }

diff --git a/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/util/HadoopAdapter.java b/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/util/HadoopAdapter.java
index d0df7f1..f2f7d03 100644
--- a/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/util/HadoopAdapter.java
+++ b/hyracks-hadoop-compat/src/main/java/edu/uci/ics/hyracks/hadoop/compat/util/HadoopAdapter.java

@@ -47,310 +47,360 @@
 
 public class HadoopAdapter {
 
-    public static final String FS_DEFAULT_NAME = "fs.default.name";
-    private JobConf jobConf;
-    private Map<OperatorDescriptorId,Integer> operatorInstanceCount = new HashMap<OperatorDescriptorId,Integer>();
-    public static final String HYRACKS_EX_SORT_FRAME_LIMIT = "HYRACKS_EX_SORT_FRAME_LIMIT"; 
-    public static final int DEFAULT_EX_SORT_FRAME_LIMIT = 4096;
-    public static final int DEFAULT_MAX_MAPPERS = 40;
-    public static final int DEFAULT_MAX_REDUCERS= 40;
-    public static final String MAX_MAPPERS_KEY = "maxMappers";
-    public static final String MAX_REDUCERS_KEY = "maxReducers";
-    public static final String EX_SORT_FRAME_LIMIT_KEY = "sortFrameLimit";
-    
-    private  int maxMappers = DEFAULT_MAX_MAPPERS;
-    private  int maxReducers = DEFAULT_MAX_REDUCERS;
-    private int exSortFrame = DEFAULT_EX_SORT_FRAME_LIMIT;
-    
-    class NewHadoopConstants {
-    	public static final String INPUT_FORMAT_CLASS_ATTR = "mapreduce.inputformat.class";
-    	public static final String MAP_CLASS_ATTR = "mapreduce.map.class";
-    	public static final String COMBINE_CLASS_ATTR = "mapreduce.combine.class";
-    	public static final String REDUCE_CLASS_ATTR = "mapreduce.reduce.class";
-    	public static final String OUTPUT_FORMAT_CLASS_ATTR = "mapreduce.outputformat.class";
-    	public static final String PARTITIONER_CLASS_ATTR = "mapreduce.partitioner.class";
-    }
-    
-    public HadoopAdapter(String namenodeUrl) {
-        jobConf = new JobConf(true);
-        jobConf.set(FS_DEFAULT_NAME, namenodeUrl);
-        if(System.getenv(MAX_MAPPERS_KEY) != null) {
-            maxMappers = Integer.parseInt(System.getenv(MAX_MAPPERS_KEY));
-        }
-        if(System.getenv(MAX_REDUCERS_KEY) != null) {
-            maxReducers= Integer.parseInt(System.getenv(MAX_REDUCERS_KEY));
-        }
-        if(System.getenv(EX_SORT_FRAME_LIMIT_KEY) != null) {
-            exSortFrame= Integer.parseInt(System.getenv(EX_SORT_FRAME_LIMIT_KEY));
-        }
-    }
+	public static final String FS_DEFAULT_NAME = "fs.default.name";
+	private JobConf jobConf;
+	private Map<OperatorDescriptorId, Integer> operatorInstanceCount = new HashMap<OperatorDescriptorId, Integer>();
+	public static final String HYRACKS_EX_SORT_FRAME_LIMIT = "HYRACKS_EX_SORT_FRAME_LIMIT";
+	public static final int DEFAULT_EX_SORT_FRAME_LIMIT = 4096;
+	public static final int DEFAULT_MAX_MAPPERS = 40;
+	public static final int DEFAULT_MAX_REDUCERS = 40;
+	public static final String MAX_MAPPERS_KEY = "maxMappers";
+	public static final String MAX_REDUCERS_KEY = "maxReducers";
+	public static final String EX_SORT_FRAME_LIMIT_KEY = "sortFrameLimit";
 
-    private String getEnvironmentVariable(String key, String def) {
-        String ret =  System.getenv(key);
-        return ret != null ? ret : def;
-    }
-    
-    public JobConf getConf() {
-        return jobConf;
-    }
+	private int maxMappers = DEFAULT_MAX_MAPPERS;
+	private int maxReducers = DEFAULT_MAX_REDUCERS;
+	private int exSortFrame = DEFAULT_EX_SORT_FRAME_LIMIT;
 
-    public static VersionedProtocol getProtocol(Class protocolClass, InetSocketAddress inetAddress, JobConf jobConf)
-            throws IOException {
-        VersionedProtocol versionedProtocol = RPC.getProxy(protocolClass, ClientProtocol.versionID, inetAddress,
-                jobConf);
-        return versionedProtocol;
-    }
+	class NewHadoopConstants {
+		public static final String INPUT_FORMAT_CLASS_ATTR = "mapreduce.inputformat.class";
+		public static final String MAP_CLASS_ATTR = "mapreduce.map.class";
+		public static final String COMBINE_CLASS_ATTR = "mapreduce.combine.class";
+		public static final String REDUCE_CLASS_ATTR = "mapreduce.reduce.class";
+		public static final String OUTPUT_FORMAT_CLASS_ATTR = "mapreduce.outputformat.class";
+		public static final String PARTITIONER_CLASS_ATTR = "mapreduce.partitioner.class";
+	}
 
-    private static RecordDescriptor getHadoopRecordDescriptor(String className1, String className2) {
-        RecordDescriptor recordDescriptor = null;
-        try {
-            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor((Class<? extends Writable>) Class
-                    .forName(className1), (Class<? extends Writable>) Class.forName(className2));
-        } catch (ClassNotFoundException cnfe) {
-            cnfe.printStackTrace();
-        }
-        return recordDescriptor;
-    }
+	public HadoopAdapter(String namenodeUrl) {
+		jobConf = new JobConf(true);
+		jobConf.set(FS_DEFAULT_NAME, namenodeUrl);
+		if (System.getenv(MAX_MAPPERS_KEY) != null) {
+			maxMappers = Integer.parseInt(System.getenv(MAX_MAPPERS_KEY));
+		}
+		if (System.getenv(MAX_REDUCERS_KEY) != null) {
+			maxReducers = Integer.parseInt(System.getenv(MAX_REDUCERS_KEY));
+		}
+		if (System.getenv(EX_SORT_FRAME_LIMIT_KEY) != null) {
+			exSortFrame = Integer.parseInt(System
+					.getenv(EX_SORT_FRAME_LIMIT_KEY));
+		}
+	}
 
-    private Object[] getInputSplits(JobConf conf) throws IOException, ClassNotFoundException, InterruptedException {
-        if (conf.getUseNewMapper()) {
-        	return getNewInputSplits(conf);
-        } else {
-        	return getOldInputSplits(conf);
-        }
-    }
-    
-    private org.apache.hadoop.mapreduce.InputSplit[] getNewInputSplits(JobConf conf) throws ClassNotFoundException, IOException, InterruptedException {
-    	org.apache.hadoop.mapreduce.InputSplit[] splits = null;
-    	JobContext context = new JobContext(conf,null);
-    	org.apache.hadoop.mapreduce.InputFormat inputFormat = ReflectionUtils.newInstance(context.getInputFormatClass(),conf);
-    	List<org.apache.hadoop.mapreduce.InputSplit> inputSplits = inputFormat.getSplits(context);
-    	return inputSplits.toArray(new org.apache.hadoop.mapreduce.InputSplit[]{});
-    }
-    
-    private InputSplit[] getOldInputSplits(JobConf conf) throws IOException  {
-      	InputFormat inputFormat = conf.getInputFormat();
-    	return inputFormat.getSplits(conf, conf.getNumMapTasks());
-    }
-   
-    private void configurePartitionCountConstraint(JobSpecification spec, IOperatorDescriptor operator,int instanceCount){
-        PartitionConstraintHelper.addPartitionCountConstraint(spec, operator, instanceCount);
-        operatorInstanceCount.put(operator.getOperatorId(),instanceCount);
-    }
+	private String getEnvironmentVariable(String key, String def) {
+		String ret = System.getenv(key);
+		return ret != null ? ret : def;
+	}
 
-    public HadoopMapperOperatorDescriptor getMapper(JobConf conf,JobSpecification spec, IOperatorDescriptor previousOp)
-            throws Exception {
-        boolean selfRead = previousOp == null;
-        IHadoopClassFactory classFactory = new ClasspathBasedHadoopClassFactory();
-        HadoopMapperOperatorDescriptor mapOp = null;
-        if(selfRead) {
-            Object [] splits = getInputSplits(conf,maxMappers);
-            mapOp = new HadoopMapperOperatorDescriptor(spec, conf, splits,classFactory);
-	    configurePartitionCountConstraint(spec,mapOp,splits.length);
-            System.out.println("No of  mappers :" + splits.length);
-        } else {
-	    configurePartitionCountConstraint(spec,mapOp,getInstanceCount(previousOp));
-            mapOp = new HadoopMapperOperatorDescriptor(spec,conf,classFactory);
-            spec.connect(new OneToOneConnectorDescriptor(spec), previousOp, 0, mapOp, 0);
-        }
-        return mapOp;
-    }
+	public JobConf getConf() {
+		return jobConf;
+	}
 
-    public HadoopReducerOperatorDescriptor getReducer(JobConf conf, JobSpecification spec) {
-        HadoopReducerOperatorDescriptor reduceOp = new HadoopReducerOperatorDescriptor(spec, conf, null,
-                new ClasspathBasedHadoopClassFactory());
-        return reduceOp;
-    }
+	public static VersionedProtocol getProtocol(Class protocolClass,
+			InetSocketAddress inetAddress, JobConf jobConf) throws IOException {
+		VersionedProtocol versionedProtocol = RPC.getProxy(protocolClass,
+				ClientProtocol.versionID, inetAddress, jobConf);
+		return versionedProtocol;
+	}
 
-    public FileSystem getHDFSClient() {
-        FileSystem fileSystem = null;
-        try {
-            fileSystem = FileSystem.get(jobConf);
-        } catch (IOException ioe) {
-            ioe.printStackTrace();
-        }
-        return fileSystem;
-    }
+	private static RecordDescriptor getHadoopRecordDescriptor(
+			String className1, String className2) {
+		RecordDescriptor recordDescriptor = null;
+		try {
+			recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
+					(Class<? extends Writable>) Class.forName(className1),
+					(Class<? extends Writable>) Class.forName(className2));
+		} catch (ClassNotFoundException cnfe) {
+			cnfe.printStackTrace();
+		}
+		return recordDescriptor;
+	}
 
-    public JobSpecification getJobSpecification(List<JobConf> jobConfs) throws Exception {
-        JobSpecification spec = null;
-        if (jobConfs.size() == 1) {
-            spec = getJobSpecification(jobConfs.get(0));
-        } else {
-            spec = getPipelinedSpec(jobConfs);
-        }
-        return spec;
-    }
+	private Object[] getInputSplits(JobConf conf) throws IOException,
+			ClassNotFoundException, InterruptedException {
+		if (conf.getUseNewMapper()) {
+			return getNewInputSplits(conf);
+		} else {
+			return getOldInputSplits(conf);
+		}
+	}
 
-    private IOperatorDescriptor configureOutput( IOperatorDescriptor previousOperator, JobConf conf,
-            JobSpecification spec) throws Exception {
-	int instanceCountPreviousOperator = operatorInstanceCount.get(previousOperator.getOperatorId());
-        int numOutputters = conf.getNumReduceTasks() != 0 ? conf.getNumReduceTasks() : instanceCountPreviousOperator;
-        HadoopWriteOperatorDescriptor writer = null;
-        writer = new HadoopWriteOperatorDescriptor(spec, conf, numOutputters);
-	configurePartitionCountConstraint(spec,writer,numOutputters);
-        spec.connect(new OneToOneConnectorDescriptor(spec), previousOperator, 0, writer, 0);
-        return writer;
-    }
+	private org.apache.hadoop.mapreduce.InputSplit[] getNewInputSplits(
+			JobConf conf) throws ClassNotFoundException, IOException,
+			InterruptedException {
+		org.apache.hadoop.mapreduce.InputSplit[] splits = null;
+		JobContext context = new JobContext(conf, null);
+		org.apache.hadoop.mapreduce.InputFormat inputFormat = ReflectionUtils
+				.newInstance(context.getInputFormatClass(), conf);
+		List<org.apache.hadoop.mapreduce.InputSplit> inputSplits = inputFormat
+				.getSplits(context);
+		return inputSplits
+				.toArray(new org.apache.hadoop.mapreduce.InputSplit[] {});
+	}
 
+	private InputSplit[] getOldInputSplits(JobConf conf) throws IOException {
+		InputFormat inputFormat = conf.getInputFormat();
+		return inputFormat.getSplits(conf, conf.getNumMapTasks());
+	}
 
-    private int getInstanceCount(IOperatorDescriptor operator) {
-        return operatorInstanceCount.get(operator.getOperatorId());
-    } 
+	private void configurePartitionCountConstraint(JobSpecification spec,
+			IOperatorDescriptor operator, int instanceCount) {
+		PartitionConstraintHelper.addPartitionCountConstraint(spec, operator,
+				instanceCount);
+		operatorInstanceCount.put(operator.getOperatorId(), instanceCount);
+	}
 
-    private IOperatorDescriptor addCombiner(IOperatorDescriptor previousOperator, JobConf jobConf,
-            JobSpecification spec) throws Exception {
-        boolean useCombiner = (jobConf.getCombinerClass() != null);
-        IOperatorDescriptor mapSideOutputOp = previousOperator;
-        if (useCombiner) {
-            System.out.println("Using Combiner:" + jobConf.getCombinerClass().getName());
-            IOperatorDescriptor mapSideCombineSortOp = getExternalSorter(jobConf, spec);
-	    configurePartitionCountConstraint(spec,mapSideCombineSortOp,getInstanceCount(previousOperator));
-    
-            HadoopReducerOperatorDescriptor mapSideCombineReduceOp = getReducer(jobConf, spec);
-	    configurePartitionCountConstraint(spec,mapSideCombineReduceOp,getInstanceCount(previousOperator));
-            spec.connect(new OneToOneConnectorDescriptor(spec), previousOperator, 0, mapSideCombineSortOp, 0);
-            spec.connect(new OneToOneConnectorDescriptor(spec), mapSideCombineSortOp, 0, mapSideCombineReduceOp, 0);
-            mapSideOutputOp = mapSideCombineSortOp;
-        }
-        return mapSideOutputOp;
-    }
-    
-    private int getNumReduceTasks(JobConf jobConf) {
-        int numReduceTasks = Math.min(maxReducers,jobConf.getNumReduceTasks());
-        return numReduceTasks;
-    }
-    
-    private IOperatorDescriptor addReducer(IOperatorDescriptor previousOperator, JobConf jobConf,
-            JobSpecification spec) throws Exception {
-        IOperatorDescriptor mrOutputOperator = previousOperator;
-        if (jobConf.getNumReduceTasks() != 0) {
-            IOperatorDescriptor sorter = getExternalSorter(jobConf, spec);
-            HadoopReducerOperatorDescriptor reducer = getReducer(jobConf, spec);
-            int numReduceTasks = getNumReduceTasks(jobConf);
-            System.out.println("No of Reducers :" + numReduceTasks);
-	    configurePartitionCountConstraint(spec,sorter,numReduceTasks);
-	    configurePartitionCountConstraint(spec,reducer,numReduceTasks);
-    
-            IConnectorDescriptor mToNConnectorDescriptor = getMtoNHashPartitioningConnector(jobConf, spec);
-            spec.connect(mToNConnectorDescriptor, previousOperator, 0, sorter, 0);
-            spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, reducer, 0);
-            mrOutputOperator = reducer;
-        }   
-        return mrOutputOperator;
-    }
-    
-    private long getInputSize(Object[] splits,JobConf conf) throws IOException, InterruptedException {
-        long totalInputSize =0;
-    	if(conf.getUseNewMapper()) {
-        	for (org.apache.hadoop.mapreduce.InputSplit split : (org.apache.hadoop.mapreduce.InputSplit[])splits) {
-        	    totalInputSize += split.getLength();
-            }                                       
-        } else {
-	    	for (InputSplit split : (InputSplit[])splits) {
-	            totalInputSize += split.getLength();
-	        }
-        }
-    	return totalInputSize;
-    }
-    
-    private Object[] getInputSplits(JobConf conf, int desiredMaxMappers) throws Exception {
-        Object[] splits = getInputSplits(conf);
-        System.out.println(" initial split count :" + splits.length);
-        System.out.println(" desired mappers :" + desiredMaxMappers);
-        if (splits.length > desiredMaxMappers) {
-            long totalInputSize = getInputSize(splits,conf);
-            long goalSize = (totalInputSize/desiredMaxMappers);
-            System.out.println(" total input length :" + totalInputSize);
-            System.out.println(" goal size :" + goalSize);
-            conf.setLong("mapred.min.split.size", goalSize);
-            conf.setNumMapTasks(desiredMaxMappers);
-            splits = getInputSplits(conf);
-            System.out.println(" revised split count :" + splits.length);
-        }
-        return splits; 
-    }
-    
-    public JobSpecification getPipelinedSpec(List<JobConf> jobConfs) throws Exception {
-        JobSpecification spec = new JobSpecification();
-        Iterator<JobConf> iterator = jobConfs.iterator();
-        JobConf firstMR = iterator.next();
-        IOperatorDescriptor mrOutputOp = configureMapReduce(null, spec,firstMR);
-        while (iterator.hasNext())
-            for (JobConf currentJobConf : jobConfs) {
-                mrOutputOp = configureMapReduce(mrOutputOp, spec , currentJobConf);
-            }
-        configureOutput(mrOutputOp, jobConfs.get(jobConfs.size() - 1), spec);
-        return spec;
-    }
+	public HadoopMapperOperatorDescriptor getMapper(JobConf conf,
+			JobSpecification spec, IOperatorDescriptor previousOp)
+			throws Exception {
+		boolean selfRead = previousOp == null;
+		IHadoopClassFactory classFactory = new ClasspathBasedHadoopClassFactory();
+		HadoopMapperOperatorDescriptor mapOp = null;
+		if (selfRead) {
+			Object[] splits = getInputSplits(conf, maxMappers);
+			mapOp = new HadoopMapperOperatorDescriptor(spec, conf, splits,
+					classFactory);
+			configurePartitionCountConstraint(spec, mapOp, splits.length);
+		} else {
+			configurePartitionCountConstraint(spec, mapOp,
+					getInstanceCount(previousOp));
+			mapOp = new HadoopMapperOperatorDescriptor(spec, conf, classFactory);
+			spec.connect(new OneToOneConnectorDescriptor(spec), previousOp, 0,
+					mapOp, 0);
+		}
+		return mapOp;
+	}
 
-    public JobSpecification getJobSpecification(JobConf conf) throws Exception {
-        JobSpecification spec = new JobSpecification();
-        IOperatorDescriptor mrOutput = configureMapReduce(null,spec, conf);
-        IOperatorDescriptor printer = configureOutput(mrOutput, conf, spec);
-        spec.addRoot(printer);
-        System.out.println(spec);
-        return spec;
-    }
-    
-    private IOperatorDescriptor configureMapReduce(IOperatorDescriptor previousOuputOp, JobSpecification spec, JobConf conf) throws Exception {
-        IOperatorDescriptor mapper = getMapper(conf,spec,previousOuputOp);
-        IOperatorDescriptor mapSideOutputOp = addCombiner(mapper,conf,spec);
-        IOperatorDescriptor reducer = addReducer(mapSideOutputOp, conf, spec);
-        return reducer; 
-    }
+	public HadoopReducerOperatorDescriptor getReducer(JobConf conf,
+			JobSpecification spec, boolean useAsCombiner) {
+		HadoopReducerOperatorDescriptor reduceOp = new HadoopReducerOperatorDescriptor(
+				spec, conf, null, new ClasspathBasedHadoopClassFactory(),
+				useAsCombiner);
+		return reduceOp;
+	}
 
-    public static InMemorySortOperatorDescriptor getInMemorySorter(JobConf conf, JobSpecification spec) {
-        InMemorySortOperatorDescriptor inMemorySortOp = null;
-        RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(), conf
-                .getMapOutputValueClass().getName());
-        Class<? extends RawComparator> rawComparatorClass = null;
-        WritableComparator writableComparator = WritableComparator.get(conf.getMapOutputKeyClass().asSubclass(
-                WritableComparable.class));
-        WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
-                writableComparator.getClass());
-        inMemorySortOp = new InMemorySortOperatorDescriptor(spec, new int[] { 0 },
-                new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
-        return inMemorySortOp;
-    }
+	public FileSystem getHDFSClient() {
+		FileSystem fileSystem = null;
+		try {
+			fileSystem = FileSystem.get(jobConf);
+		} catch (IOException ioe) {
+			ioe.printStackTrace();
+		}
+		return fileSystem;
+	}
 
-    public static ExternalSortOperatorDescriptor getExternalSorter(JobConf conf, JobSpecification spec) {
-        ExternalSortOperatorDescriptor externalSortOp = null;
-        RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(), conf
-                .getMapOutputValueClass().getName());
-        Class<? extends RawComparator> rawComparatorClass = null;
-        WritableComparator writableComparator = WritableComparator.get(conf.getMapOutputKeyClass().asSubclass(
-                WritableComparable.class));
-        WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
-                writableComparator.getClass());
-        externalSortOp = new ExternalSortOperatorDescriptor(spec,conf.getInt(HYRACKS_EX_SORT_FRAME_LIMIT,DEFAULT_EX_SORT_FRAME_LIMIT),new int[] { 0 },
-                new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
-        return externalSortOp;
-    }
-    
-    public static MToNHashPartitioningConnectorDescriptor getMtoNHashPartitioningConnector(JobConf conf,
-            JobSpecification spec) {
+	public JobSpecification getJobSpecification(List<JobConf> jobConfs)
+			throws Exception {
+		JobSpecification spec = null;
+		if (jobConfs.size() == 1) {
+			spec = getJobSpecification(jobConfs.get(0));
+		} else {
+			spec = getPipelinedSpec(jobConfs);
+		}
+		return spec;
+	}
 
-        Class mapOutputKeyClass = conf.getMapOutputKeyClass();
-        Class mapOutputValueClass = conf.getMapOutputValueClass();
+	private IOperatorDescriptor configureOutput(
+			IOperatorDescriptor previousOperator, JobConf conf,
+			JobSpecification spec) throws Exception {
+		int instanceCountPreviousOperator = operatorInstanceCount
+				.get(previousOperator.getOperatorId());
+		int numOutputters = conf.getNumReduceTasks() != 0 ? conf
+				.getNumReduceTasks() : instanceCountPreviousOperator;
+		HadoopWriteOperatorDescriptor writer = null;
+		writer = new HadoopWriteOperatorDescriptor(spec, conf, numOutputters);
+		configurePartitionCountConstraint(spec, writer, numOutputters);
+		spec.connect(new OneToOneConnectorDescriptor(spec), previousOperator,
+				0, writer, 0);
+		return writer;
+	}
 
-        MToNHashPartitioningConnectorDescriptor connectorDescriptor = null;
-        ITuplePartitionComputerFactory factory = null;
-        conf.getMapOutputKeyClass();
-        if (conf.getPartitionerClass() != null && !conf.getPartitionerClass().getName().startsWith("org.apache.hadoop")) {
-            Class<? extends Partitioner> partitioner = conf.getPartitionerClass();
-            factory = new HadoopPartitionerTuplePartitionComputerFactory(partitioner, DatatypeHelper
-                    .createSerializerDeserializer(mapOutputKeyClass), DatatypeHelper
-                    .createSerializerDeserializer(mapOutputValueClass));
-        } else {
-            RecordDescriptor recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(mapOutputKeyClass,
-                    mapOutputValueClass);
-            ISerializerDeserializer mapOutputKeySerializerDerserializer = DatatypeHelper
-                    .createSerializerDeserializer(mapOutputKeyClass);
-            factory = new HadoopHashTuplePartitionComputerFactory(mapOutputKeySerializerDerserializer);
-        }
-        connectorDescriptor = new MToNHashPartitioningConnectorDescriptor(spec, factory);
-        return connectorDescriptor;
-    }
+	private int getInstanceCount(IOperatorDescriptor operator) {
+		return operatorInstanceCount.get(operator.getOperatorId());
+	}
+
+	private IOperatorDescriptor addCombiner(
+			IOperatorDescriptor previousOperator, JobConf jobConf,
+			JobSpecification spec) throws Exception {
+		boolean useCombiner = (jobConf.getCombinerClass() != null);
+		IOperatorDescriptor mapSideOutputOp = previousOperator;
+		if (useCombiner) {
+			System.out.println("Using Combiner:"
+					+ jobConf.getCombinerClass().getName());
+			IOperatorDescriptor mapSideCombineSortOp = getExternalSorter(
+					jobConf, spec);
+			configurePartitionCountConstraint(spec, mapSideCombineSortOp,
+					getInstanceCount(previousOperator));
+
+			HadoopReducerOperatorDescriptor mapSideCombineReduceOp = getReducer(
+					jobConf, spec, true);
+			configurePartitionCountConstraint(spec, mapSideCombineReduceOp,
+					getInstanceCount(previousOperator));
+			spec.connect(new OneToOneConnectorDescriptor(spec),
+					previousOperator, 0, mapSideCombineSortOp, 0);
+			spec.connect(new OneToOneConnectorDescriptor(spec),
+					mapSideCombineSortOp, 0, mapSideCombineReduceOp, 0);
+			mapSideOutputOp = mapSideCombineReduceOp;
+		}
+		return mapSideOutputOp;
+	}
+
+	private int getNumReduceTasks(JobConf jobConf) {
+		int numReduceTasks = Math.min(maxReducers, jobConf.getNumReduceTasks());
+		return numReduceTasks;
+	}
+
+	private IOperatorDescriptor addReducer(
+			IOperatorDescriptor previousOperator, JobConf jobConf,
+			JobSpecification spec) throws Exception {
+		IOperatorDescriptor mrOutputOperator = previousOperator;
+		if (jobConf.getNumReduceTasks() != 0) {
+			IOperatorDescriptor sorter = getExternalSorter(jobConf, spec);
+			HadoopReducerOperatorDescriptor reducer = getReducer(jobConf, spec,
+					false);
+			int numReduceTasks = getNumReduceTasks(jobConf);
+			configurePartitionCountConstraint(spec, sorter, numReduceTasks);
+			configurePartitionCountConstraint(spec, reducer, numReduceTasks);
+
+			IConnectorDescriptor mToNConnectorDescriptor = getMtoNHashPartitioningConnector(
+					jobConf, spec);
+			spec.connect(mToNConnectorDescriptor, previousOperator, 0, sorter,
+					0);
+			spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0,
+					reducer, 0);
+			mrOutputOperator = reducer;
+		}
+		return mrOutputOperator;
+	}
+
+	private long getInputSize(Object[] splits, JobConf conf)
+			throws IOException, InterruptedException {
+		long totalInputSize = 0;
+		if (conf.getUseNewMapper()) {
+			for (org.apache.hadoop.mapreduce.InputSplit split : (org.apache.hadoop.mapreduce.InputSplit[]) splits) {
+				totalInputSize += split.getLength();
+			}
+		} else {
+			for (InputSplit split : (InputSplit[]) splits) {
+				totalInputSize += split.getLength();
+			}
+		}
+		return totalInputSize;
+	}
+
+	private Object[] getInputSplits(JobConf conf, int desiredMaxMappers)
+			throws Exception {
+		Object[] splits = getInputSplits(conf);
+		if (splits.length > desiredMaxMappers) {
+			long totalInputSize = getInputSize(splits, conf);
+			long goalSize = (totalInputSize / desiredMaxMappers);
+			conf.setLong("mapred.min.split.size", goalSize);
+			conf.setNumMapTasks(desiredMaxMappers);
+			splits = getInputSplits(conf);
+		}
+		return splits;
+	}
+
+	public JobSpecification getPipelinedSpec(List<JobConf> jobConfs)
+			throws Exception {
+		JobSpecification spec = new JobSpecification();
+		Iterator<JobConf> iterator = jobConfs.iterator();
+		JobConf firstMR = iterator.next();
+		IOperatorDescriptor mrOutputOp = configureMapReduce(null, spec, firstMR);
+		while (iterator.hasNext())
+			for (JobConf currentJobConf : jobConfs) {
+				mrOutputOp = configureMapReduce(mrOutputOp, spec,
+						currentJobConf);
+			}
+		configureOutput(mrOutputOp, jobConfs.get(jobConfs.size() - 1), spec);
+		return spec;
+	}
+
+	public JobSpecification getJobSpecification(JobConf conf) throws Exception {
+		JobSpecification spec = new JobSpecification();
+		IOperatorDescriptor mrOutput = configureMapReduce(null, spec, conf);
+		IOperatorDescriptor printer = configureOutput(mrOutput, conf, spec);
+		spec.addRoot(printer);
+		System.out.println(spec);
+		return spec;
+	}
+
+	private IOperatorDescriptor configureMapReduce(
+			IOperatorDescriptor previousOuputOp, JobSpecification spec,
+			JobConf conf) throws Exception {
+		IOperatorDescriptor mapper = getMapper(conf, spec, previousOuputOp);
+		IOperatorDescriptor mapSideOutputOp = addCombiner(mapper, conf, spec);
+		IOperatorDescriptor reducer = addReducer(mapSideOutputOp, conf, spec);
+		return reducer;
+	}
+
+	public static InMemorySortOperatorDescriptor getInMemorySorter(
+			JobConf conf, JobSpecification spec) {
+		InMemorySortOperatorDescriptor inMemorySortOp = null;
+		RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf
+				.getMapOutputKeyClass().getName(), conf
+				.getMapOutputValueClass().getName());
+		Class<? extends RawComparator> rawComparatorClass = null;
+		WritableComparator writableComparator = WritableComparator.get(conf
+				.getMapOutputKeyClass().asSubclass(WritableComparable.class));
+		WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
+				writableComparator.getClass());
+		inMemorySortOp = new InMemorySortOperatorDescriptor(spec,
+				new int[] { 0 },
+				new IBinaryComparatorFactory[] { comparatorFactory },
+				recordDescriptor);
+		return inMemorySortOp;
+	}
+
+	public static ExternalSortOperatorDescriptor getExternalSorter(
+			JobConf conf, JobSpecification spec) {
+		ExternalSortOperatorDescriptor externalSortOp = null;
+		RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf
+				.getMapOutputKeyClass().getName(), conf
+				.getMapOutputValueClass().getName());
+		Class<? extends RawComparator> rawComparatorClass = null;
+		WritableComparator writableComparator = WritableComparator.get(conf
+				.getMapOutputKeyClass().asSubclass(WritableComparable.class));
+		WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
+				writableComparator.getClass());
+		externalSortOp = new ExternalSortOperatorDescriptor(spec, conf.getInt(
+				HYRACKS_EX_SORT_FRAME_LIMIT, DEFAULT_EX_SORT_FRAME_LIMIT),
+				new int[] { 0 },
+				new IBinaryComparatorFactory[] { comparatorFactory },
+				recordDescriptor);
+		return externalSortOp;
+	}
+
+	public static MToNHashPartitioningConnectorDescriptor getMtoNHashPartitioningConnector(
+			JobConf conf, JobSpecification spec) {
+
+		Class mapOutputKeyClass = conf.getMapOutputKeyClass();
+		Class mapOutputValueClass = conf.getMapOutputValueClass();
+
+		MToNHashPartitioningConnectorDescriptor connectorDescriptor = null;
+		ITuplePartitionComputerFactory factory = null;
+		conf.getMapOutputKeyClass();
+		if (conf.getPartitionerClass() != null
+				&& !conf.getPartitionerClass().getName().startsWith(
+						"org.apache.hadoop")) {
+			Class<? extends Partitioner> partitioner = conf
+					.getPartitionerClass();
+			factory = new HadoopPartitionerTuplePartitionComputerFactory(
+					partitioner, DatatypeHelper
+							.createSerializerDeserializer(mapOutputKeyClass),
+					DatatypeHelper
+							.createSerializerDeserializer(mapOutputValueClass));
+		} else {
+			RecordDescriptor recordDescriptor = DatatypeHelper
+					.createKeyValueRecordDescriptor(mapOutputKeyClass,
+							mapOutputValueClass);
+			ISerializerDeserializer mapOutputKeySerializerDerserializer = DatatypeHelper
+					.createSerializerDeserializer(mapOutputKeyClass);
+			factory = new HadoopHashTuplePartitionComputerFactory(
+					mapOutputKeySerializerDerserializer);
+		}
+		connectorDescriptor = new MToNHashPartitioningConnectorDescriptor(spec,
+				factory);
+		return connectorDescriptor;
+	}
 
 }
commit	bb6a829533be265692e93e4763f54df2dca88f33	[log] [tgz]
author	salsubaiee <salsubaiee@123451ca-8445-de46-9d55-352943316053>	Thu Aug 04 19:14:38 2011 +0000
committer	salsubaiee <salsubaiee@123451ca-8445-de46-9d55-352943316053>	Thu Aug 04 19:14:38 2011 +0000
tree	fb2699af953db513f1862391455b09118df960a3
parent	7c4db76fc323b3b8405898298ea43ab9732ca72e [diff]