fix maximal cliques and add two more tests

git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_dynamic_deployment@3279 123451ca-8445-de46-9d55-352943316053
diff --git a/pregelix/pregelix-example/data/clique2/clique.txt b/pregelix/pregelix-example/data/clique2/clique.txt
new file mode 100755
index 0000000..68fbaea
--- /dev/null
+++ b/pregelix/pregelix-example/data/clique2/clique.txt
@@ -0,0 +1,6 @@
+1 2 3 4
+2 1 3
+3 1 2 4 5
+4 1 3
+5 3 6
+6 5
\ No newline at end of file
diff --git a/pregelix/pregelix-example/data/clique3/clique.txt b/pregelix/pregelix-example/data/clique3/clique.txt
new file mode 100755
index 0000000..ed0b7c1
--- /dev/null
+++ b/pregelix/pregelix-example/data/clique3/clique.txt
@@ -0,0 +1,20 @@
+0 1 19
+1 2 3 4 5 6 7 8 9
+2 1 3 4 5 6 7 8 9
+3 1 2 4 5 6 7 8 9
+4 1 2 3 5 6 7 8 9
+5 1 2 3 4 6 7 8 9
+6 1 2 3 4 5 7 8 9
+7 1 2 3 4 5 6 8 9
+8 1 2 3 4 5 6 7 9
+9 1 2 3 4 5 6 7 8 10
+10 9 11
+11 10 12 13 14 15 16 17 18 19
+12 11 13 14 15 16 17 18 19
+13 11 12 14 15 16 17 18 19
+14 11 12 13 15 16 17 18 19
+15 11 12 13 14 16 17 18 19
+16 11 12 13 14 15 17 18 19
+17 11 12 13 14 15 16 18 19
+18 11 12 13 14 15 16 17 19
+19 0 11 12 13 14 15 16 17 18
\ No newline at end of file
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/CliquesWritable.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/CliquesWritable.java
index 0e22ea1..d2c5e6f 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/CliquesWritable.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/CliquesWritable.java
@@ -32,6 +32,7 @@
 
     private List<VLongWritable> cliques = new ArrayList<VLongWritable>();
     private int sizeOfClique = 0;
+    private VLongWritable srcId = new VLongWritable(0);
 
     public CliquesWritable(List<VLongWritable> cliques, int sizeOfClique) {
         this.cliques = cliques;
@@ -43,6 +44,16 @@
     }
 
     /**
+     * Set the srcId
+     * 
+     * @param srcId
+     *            the source vertex Id
+     */
+    public void setSrcId(VLongWritable srcId) {
+        this.srcId = srcId;
+    }
+
+    /**
      * Set the size of cliques.
      * 
      * @param sizeOfClique
@@ -103,6 +114,11 @@
                 cliques.add(vid);
             }
         }
+
+        if (srcId == null) {
+            srcId = new VLongWritable();
+        }
+        srcId.readFields(input);
     }
 
     @Override
@@ -117,6 +133,8 @@
         for (int i = 0; i < cliques.size(); i++) {
             cliques.get(i).write(output);
         }
+
+        srcId.write(output);
     }
 
     @Override
@@ -126,11 +144,14 @@
         StringBuffer sb = new StringBuffer();
         int numCliques = cliques.size() / sizeOfClique;
         for (int i = 0; i < numCliques; i++) {
+            sb.append(srcId);
+            sb.append(",");
+            int start = i * sizeOfClique;
             for (int j = 0; j < sizeOfClique - 1; j++) {
-                sb.append(cliques.get(j));
+                sb.append(cliques.get(start + j));
                 sb.append(",");
             }
-            sb.append(cliques.get(sizeOfClique - 1));
+            sb.append(cliques.get(start + sizeOfClique - 1));
             sb.append(";");
         }
         return sb.toString();
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/MaximalCliqueVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/MaximalCliqueVertex.java
index 266feb7..85a139e 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/MaximalCliqueVertex.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/MaximalCliqueVertex.java
@@ -44,8 +44,8 @@
 
 /**
  * The maximal clique example -- find maximal cliques in an undirected graph.
- * The result cliques contains vertexes ordered by the vertex id ascendingly. The algorithm takes
- * advantage of that property to do effective pruning.
+ * The result cliques contains vertexes ordered by the vertex id ascendingly.
+ * The algorithm takes advantage of that property to do effective pruning.
  */
 public class MaximalCliqueVertex extends Vertex<VLongWritable, CliquesWritable, NullWritable, AdjacencyListWritable> {
 
@@ -55,7 +55,7 @@
     private int largestCliqueSizeSoFar = 0;
     private List<BitSet> currentMaximalCliques = new ArrayList<BitSet>();
     private CliquesWritable tmpValue = new CliquesWritable();
-    private List<VLongWritable> cliques = new ArrayList<VLongWritable>();
+    private List<VLongWritable> cliqueList = new ArrayList<VLongWritable>();
 
     /**
      * Update the current maximal cliques
@@ -68,7 +68,6 @@
         vertexList.clear();
         invertedMap.clear();
         currentMaximalCliques.clear();
-        cliques.clear();
         tmpValue.reset();
 
         // build the initial sub graph
@@ -76,8 +75,6 @@
             AdjacencyListWritable adj = values.next();
             map.put(adj.getSource(), adj);
         }
-        VLongWritable srcId = getVertexId();
-        map.put(srcId, new AdjacencyListWritable());
 
         // build the vertex list (vertex id in ascending order) and the inverted list of vertexes
         int i = 0;
@@ -86,12 +83,14 @@
             invertedMap.put(v, i++);
         }
 
-        //clean up adjacency list --- remove vertexes who are not neighbors of key
+        // clean up adjacency list --- remove vertexes who are not neighbors of
+        // key
         for (AdjacencyListWritable adj : map.values()) {
             adj.cleanNonMatch(vertexList);
         }
 
-        // get the h-index of the subgraph --- which is the maximum depth to explore
+        // get the h-index of the subgraph --- which is the maximum depth to
+        // explore
         int[] neighborCounts = new int[map.size()];
         i = 0;
         for (AdjacencyListWritable adj : map.values()) {
@@ -105,11 +104,13 @@
             }
             h++;
         }
-        if (h < largestCliqueSizeSoFar) {
+
+        // the clique size is upper-bounded by h+1
+        if (h + 1 < largestCliqueSizeSoFar) {
             return;
         }
 
-        //start depth-first search
+        // start depth-first search
         BitSet cliqueSoFar = new BitSet(h);
         for (VLongWritable v : vertexList) {
             cliqueSoFar.set(invertedMap.get(v));
@@ -117,16 +118,15 @@
             cliqueSoFar.clear();
         }
 
-        //output local maximal cliques
+        // output local maximal cliques
+        tmpValue.setSrcId(getVertexId());
         for (BitSet clique : currentMaximalCliques) {
-            int keyIndex = invertedMap.get(srcId);
-            clique.set(keyIndex);
             generateClique(clique);
-            tmpValue.addCliques(cliques);
+            tmpValue.addCliques(cliqueList);
             tmpValue.setCliqueSize(clique.cardinality());
         }
 
-        //update the vertex state
+        // update the vertex state
         setVertexValue(tmpValue);
     }
 
@@ -136,13 +136,15 @@
      * @param clique
      *            the bitmap representation of a clique
      */
-    private void generateClique(BitSet clique) {
+    private List<VLongWritable> generateClique(BitSet clique) {
+        cliqueList.clear();
         for (int j = 0; j < clique.length();) {
             j = clique.nextSetBit(j);
             VLongWritable v = vertexList.get(j);
-            cliques.add(v);
+            cliqueList.add(v);
             j++;
         }
+        return cliqueList;
     }
 
     /**
@@ -170,7 +172,7 @@
         while (neighbors.hasNext()) {
             VLongWritable neighbor = neighbors.next();
             if (!isTested(neighbor, cliqueSoFar) && isClique(neighbor, cliqueSoFar)) {
-                //snapshot the clique
+                // snapshot the clique
                 int cliqueLength = cliqueSoFar.length();
                 // expand the clique
                 cliqueSoFar.set(invertedMap.get(neighbor));
@@ -217,7 +219,8 @@
         int largestSetIndex = cliqueSoFar.length() - 1;
         if (index > largestSetIndex) {
             // we only return cliques with vertexes in the ascending order
-            // hence, the new vertex must be larger than the largesetSetIndex in the clique
+            // hence, the new vertex must be larger than the largesetSetIndex in
+            // the clique
             return false;
         } else {
             // otherwise, we think the vertex is "tested"
@@ -236,7 +239,8 @@
      */
     private boolean isClique(VLongWritable newVertex, BitSet cliqueSoFar) {
         AdjacencyListWritable adj = map.get(newVertex);
-        // check whether each existing vertex is in the neighbor set of newVertex
+        // check whether each existing vertex is in the neighbor set of
+        // newVertex
         for (int i = 0; i < cliqueSoFar.length();) {
             i = cliqueSoFar.nextSetBit(i);
             VLongWritable v = vertexList.get(i);
@@ -249,9 +253,8 @@
     }
 
     /**
-     * For superstep 1, send outgoing mesages.
-     * For superstep 2, calculate maximal cliques.
-     * otherwise, vote to halt.
+     * For superstep 1, send outgoing mesages. For superstep 2, calculate
+     * maximal cliques. otherwise, vote to halt.
      */
     @Override
     public void compute(Iterator<AdjacencyListWritable> msgIterator) {
@@ -300,9 +303,11 @@
     private void sendOutgoingMsgs(List<Edge<VLongWritable, NullWritable>> edges) {
         for (int i = 0; i < edges.size(); i++) {
             if (edges.get(i).getDestVertexId().get() < getVertexId().get()) {
-                // only add emit for the vertexes whose id is smaller than the vertex id 
+                // only add emit for the vertexes whose id is smaller than the
+                // vertex id
                 // to avoid the duplicate removal step,
-                // because all the resulting cliques will have vertexes in the ascending order.
+                // because all the resulting cliques will have vertexes in the
+                // ascending order.
                 AdjacencyListWritable msg = new AdjacencyListWritable();
                 msg.setSource(getVertexId());
                 for (int j = i + 1; j < edges.size(); j++) {
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
index ca5a1c4..0a5b214 100644
--- a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
@@ -55,6 +55,8 @@
     private static String HDFS_OUTPUTPAH2 = "/resultcomplex";
 
     private static String HDFS_INPUTPATH3 = "/clique";
+    private static String HDFS_INPUTPATH4 = "/clique2";
+    private static String HDFS_INPUTPATH5 = "/clique3";
     private static String HDFS_OUTPUTPAH3 = "/resultclique";
 
     private static void generatePageRankJobReal(String jobName, String outputPath) throws IOException {
@@ -218,6 +220,30 @@
         FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH3));
         job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
     }
+    
+    private static void generateMaximalCliqueJob2(String jobName, String outputPath) throws IOException {
+        PregelixJob job = new PregelixJob(jobName);
+        job.setVertexClass(MaximalCliqueVertex.class);
+        job.setGlobalAggregatorClass(MaximalCliqueAggregator.class);
+        job.setDynamicVertexValueSize(true);
+        job.setVertexInputFormatClass(TextMaximalCliqueInputFormat.class);
+        job.setVertexOutputFormatClass(MaximalCliqueVertexOutputFormat.class);
+        FileInputFormat.setInputPaths(job, HDFS_INPUTPATH4);
+        FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH3));
+        job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+    }
+    
+    private static void generateMaximalCliqueJob3(String jobName, String outputPath) throws IOException {
+        PregelixJob job = new PregelixJob(jobName);
+        job.setVertexClass(MaximalCliqueVertex.class);
+        job.setGlobalAggregatorClass(MaximalCliqueAggregator.class);
+        job.setDynamicVertexValueSize(true);
+        job.setVertexInputFormatClass(TextMaximalCliqueInputFormat.class);
+        job.setVertexOutputFormatClass(MaximalCliqueVertexOutputFormat.class);
+        FileInputFormat.setInputPaths(job, HDFS_INPUTPATH5);
+        FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH3));
+        job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+    }
 
     private static void generateGraphMutationJob(String jobName, String outputPath) throws IOException {
         PregelixJob job = new PregelixJob(jobName);
@@ -261,6 +287,8 @@
 
     private static void genMaximalClique() throws IOException {
         generateMaximalCliqueJob("Maximal Clique", outputBase + "MaximalClique.xml");
+        generateMaximalCliqueJob2("Maximal Clique 2", outputBase + "MaximalClique2.xml");
+        generateMaximalCliqueJob3("Maximal Clique 3", outputBase + "MaximalClique3.xml");
     }
 
     private static void genGraphMutation() throws IOException {
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestCase.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestCase.java
index 5a556fa..af797bf 100644
--- a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestCase.java
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestCase.java
@@ -48,6 +48,12 @@
     private static String HDFS_INPUTPATH3 = "/clique";
     private static String HDFS_OUTPUTPAH3 = "/resultclique";
 
+    private static String HDFS_INPUTPATH4 = "/clique2";
+    private static String HDFS_OUTPUTPAH4 = "/resultclique";
+
+    private static String HDFS_INPUTPATH5 = "/clique3";
+    private static String HDFS_OUTPUTPAH5 = "/resultclique";
+
     private final PregelixJob job;
     private JobGen[] giraphJobGens;
     private final String resultFileName;
@@ -68,9 +74,15 @@
         } else if (inputPaths[0].toString().endsWith(HDFS_INPUTPATH2)) {
             FileInputFormat.setInputPaths(job, HDFS_INPUTPATH2);
             FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH2));
-        } else {
+        } else if (inputPaths[0].toString().endsWith(HDFS_INPUTPATH3)) {
             FileInputFormat.setInputPaths(job, HDFS_INPUTPATH3);
             FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH3));
+        } else if (inputPaths[0].toString().endsWith(HDFS_INPUTPATH4)) {
+            FileInputFormat.setInputPaths(job, HDFS_INPUTPATH4);
+            FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH4));
+        } else if (inputPaths[0].toString().endsWith(HDFS_INPUTPATH5)) {
+            FileInputFormat.setInputPaths(job, HDFS_INPUTPATH5);
+            FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH5));
         }
         job.setJobName(jobName);
         this.resultFileName = resultFile;
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java
index 4bf83e6..f415a03 100644
--- a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java
@@ -61,6 +61,12 @@
 
     private static final String DATA_PATH3 = "data/clique/clique.txt";
     private static final String HDFS_PATH3 = "/clique/";
+    
+    private static final String DATA_PATH4 = "data/clique2/clique.txt";
+    private static final String HDFS_PATH4 = "/clique2/";
+    
+    private static final String DATA_PATH5 = "data/clique3/clique.txt";
+    private static final String HDFS_PATH5 = "/clique3/";
 
     private static final String HYRACKS_APP_NAME = "pregelix";
     private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
@@ -110,6 +116,16 @@
         dest = new Path(HDFS_PATH3);
         dfs.mkdirs(dest);
         dfs.copyFromLocalFile(src, dest);
+        
+        src = new Path(DATA_PATH4);
+        dest = new Path(HDFS_PATH4);
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
+        
+        src = new Path(DATA_PATH5);
+        dest = new Path(HDFS_PATH5);
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
 
         DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
         conf.writeXml(confOutput);
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MaximalClique2.result b/pregelix/pregelix-example/src/test/resources/expected/MaximalClique2.result
new file mode 100644
index 0000000..afbffcd
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MaximalClique2.result
@@ -0,0 +1,6 @@
+1 1,2,3;1,3,4;
+2 2,3;
+3 
+4 
+5 
+6 
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MaximalClique3.result b/pregelix/pregelix-example/src/test/resources/expected/MaximalClique3.result
new file mode 100644
index 0000000..fa473e8
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MaximalClique3.result
@@ -0,0 +1,20 @@
+0 0,19;
+1 1,2,3,4,5,6,7,8,9;
+2 2,3,4,5,6,7,8,9;
+3 
+4 
+5 
+6 
+7 
+8 
+9 
+10 
+11 11,12,13,14,15,16,17,18,19;
+12 12,13,14,15,16,17,18,19;
+13 
+14 
+15 
+16 
+17 
+18 
+19 
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/MaximalClique2.xml b/pregelix/pregelix-example/src/test/resources/jobs/MaximalClique2.xml
new file mode 100644
index 0000000..5621259
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/jobs/MaximalClique2.xml
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.input.dir</name><value>file:/clique2</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.output.dir</name><value>/resultclique</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.job.name</name><value>Maximal Clique 2</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>fs.default.name</name><value>file:///</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>
+<property><name>pregelix.vertexClass</name><value>edu.uci.ics.pregelix.example.maximalclique.MaximalCliqueVertex</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>pregelix.vertexOutputFormatClass</name><value>edu.uci.ics.pregelix.example.maximalclique.MaximalCliqueVertex$MaximalCliqueVertexOutputFormat</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.maximalclique.TextMaximalCliqueInputFormat</value></property>
+<property><name>pregelix.aggregatorClass</name><value>edu.uci.ics.pregelix.example.maximalclique.MaximalCliqueAggregator</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>pregelix.incStateLength</name><value>true</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+</configuration>
\ No newline at end of file
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/MaximalClique3.xml b/pregelix/pregelix-example/src/test/resources/jobs/MaximalClique3.xml
new file mode 100644
index 0000000..d4f81ba
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/jobs/MaximalClique3.xml
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.input.dir</name><value>file:/clique3</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.output.dir</name><value>/resultclique</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.job.name</name><value>Maximal Clique 3</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>fs.default.name</name><value>file:///</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>
+<property><name>pregelix.vertexClass</name><value>edu.uci.ics.pregelix.example.maximalclique.MaximalCliqueVertex</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>pregelix.vertexOutputFormatClass</name><value>edu.uci.ics.pregelix.example.maximalclique.MaximalCliqueVertex$MaximalCliqueVertexOutputFormat</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.maximalclique.TextMaximalCliqueInputFormat</value></property>
+<property><name>pregelix.aggregatorClass</name><value>edu.uci.ics.pregelix.example.maximalclique.MaximalCliqueAggregator</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>pregelix.incStateLength</name><value>true</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+</configuration>
\ No newline at end of file