- Fixed Type Casting issue
- Reorganized duplicated internal class in the DelimitedDataParser and DelimitedDataParserFactory
- Prevented a user from creating an inverted index on a dataset with a variable-length PK
- INT64 is now the default type
- Issue 852 fixed

Change-Id: I2d71e8a21da4f709c3259a3d3f678c640f9e1160
Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/192
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <westmann@gmail.com>
diff --git a/asterix-fuzzyjoin/pom.xml b/asterix-fuzzyjoin/pom.xml
index 2066ed0..8d19efbc 100644
--- a/asterix-fuzzyjoin/pom.xml
+++ b/asterix-fuzzyjoin/pom.xml
@@ -21,28 +21,28 @@
   <build>
     <plugins>
       <plugin>
-	<groupId>org.apache.maven.plugins</groupId>
-	<artifactId>maven-compiler-plugin</artifactId>
-	<version>2.3.2</version>
-	<configuration>
-	  <source>1.7</source>
-	  <target>1.7</target>
-	  <compilerArguments>
-	    <encoding>utf8</encoding>
-	  </compilerArguments>
-	</configuration>
+    <groupId>org.apache.maven.plugins</groupId>
+    <artifactId>maven-compiler-plugin</artifactId>
+    <version>2.3.2</version>
+    <configuration>
+      <source>1.7</source>
+      <target>1.7</target>
+      <compilerArguments>
+        <encoding>utf8</encoding>
+      </compilerArguments>
+    </configuration>
       </plugin>
       <plugin>
-	<groupId>org.apache.maven.plugins</groupId>
-	<artifactId>maven-jar-plugin</artifactId>
-	<version>2.4</version>
-	<executions>
-	  <execution>
-	    <goals>
-	      <goal>test-jar</goal>
-	    </goals>
+    <groupId>org.apache.maven.plugins</groupId>
+    <artifactId>maven-jar-plugin</artifactId>
+    <version>2.4</version>
+    <executions>
+      <execution>
+        <goals>
+          <goal>test-jar</goal>
+        </goals>
             <phase>test-compile</phase>
-          </execution>  
+          </execution>
         </executions>
         <configuration>
           <outputDirectory>${basedir}/target</outputDirectory>
@@ -57,6 +57,10 @@
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
     </dependency>
+    <dependency>
+        <groupId>edu.uci.ics.hyracks</groupId>
+        <artifactId>hyracks-api</artifactId>
+    </dependency>
   </dependencies>
 
 </project>
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
index 85d1785..50ba7df 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
@@ -13,17 +13,20 @@
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations under
  * the License.
- * 
+ *
  * Author: Alexander Behm <abehm (at) ics.uci.edu>
  */
 
 package edu.uci.ics.asterix.fuzzyjoin.similarity;
 
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
 public interface IGenericSimilarityMetric {
     // returns similarity
-    public float getSimilarity(IListIterator firstList, IListIterator secondList);
+    public float getSimilarity(IListIterator firstList, IListIterator secondList) throws HyracksDataException;
 
     // returns -1 if does not satisfy threshold
     // else returns similarity
-    public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh);
+    public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh)
+            throws HyracksDataException;
 }
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java
index 647c35f..b4fbcef 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java
@@ -13,14 +13,16 @@
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations under
  * the License.
- * 
+ *
  * Author: Alexander Behm <abehm (at) ics.uci.edu>
  */
 
 package edu.uci.ics.asterix.fuzzyjoin.similarity;
 
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
 public interface IListIterator {
-    public int compare(IListIterator cmpIter);
+    public int compare(IListIterator cmpIter) throws HyracksDataException;
 
     public byte[] getData();
 
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java
index 415e785..a633e0e 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java
@@ -13,17 +13,18 @@
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations under
  * the License.
- * 
+ *
  * Author: Rares Vernica <rares (at) ics.uci.edu>
  */
 
 package edu.uci.ics.asterix.fuzzyjoin.similarity;
 
 import edu.uci.ics.asterix.fuzzyjoin.tokenizer.Tokenizer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 
 public abstract class SimilarityMetric {
 
-    public static int getIntersectSize(IListIterator tokensX, IListIterator tokensY) {
+    public static int getIntersectSize(IListIterator tokensX, IListIterator tokensY) throws HyracksDataException {
         int intersectSize = 0;
         while (tokensX.hasNext() && tokensY.hasNext()) {
             int cmp = tokensX.compare(tokensY);
@@ -169,7 +170,7 @@
     // public abstract float getSimilarity(DataBag tokensX, int lengthX,
     // DataBag tokensY, int lengthY);
 
-    public float getSimilarity(IListIterator tokensX, IListIterator tokensY) {
+    public float getSimilarity(IListIterator tokensX, IListIterator tokensY) throws HyracksDataException {
         int intersectionSize = SimilarityMetric.getIntersectSize(tokensX, tokensY);
         int totalSize = tokensX.size() + tokensY.size();
 
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
index ab60b2c..f723998 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
@@ -13,7 +13,7 @@
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations under
  * the License.
- * 
+ *
  * Author: Alexander Behm <abehm (at) ics.uci.edu>
  */
 
@@ -22,6 +22,7 @@
 import java.util.Arrays;
 
 import edu.uci.ics.asterix.fuzzyjoin.tokenizer.StringUtils;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 
 public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
 
@@ -42,7 +43,7 @@
     }
 
     @Override
-    public float getSimilarity(IListIterator firstList, IListIterator secondList) {
+    public float getSimilarity(IListIterator firstList, IListIterator secondList) throws HyracksDataException {
         int flLen = firstList.size();
         int slLen = secondList.size();
 
@@ -84,7 +85,8 @@
     }
 
     @Override
-    public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh) {
+    public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh)
+            throws HyracksDataException {
 
         int edThresh = (int) simThresh;
 
@@ -104,7 +106,8 @@
         }
     }
 
-    public int getSimilarityContains(IListIterator exprList, IListIterator patternList, int simThresh) {
+    public int getSimilarityContains(IListIterator exprList, IListIterator patternList, int simThresh)
+            throws HyracksDataException {
         int exprLen = exprList.size();
         int patternLen = patternList.size();
 
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
index b0c0638..f8cd3ec 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
@@ -13,7 +13,7 @@
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations under
  * the License.
- * 
+ *
  * Author: Rares Vernica <rares (at) ics.uci.edu>
  */
 
@@ -23,6 +23,7 @@
 import java.util.TreeSet;
 
 import edu.uci.ics.asterix.fuzzyjoin.tokenizer.Tokenizer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 
 public class SimilarityMetricJaccard extends SimilarityMetric implements IGenericSimilarityMetric {
 
@@ -60,7 +61,7 @@
     // }
 
     @Override
-    public float getSimilarity(IListIterator tokensX, IListIterator tokensY) {
+    public float getSimilarity(IListIterator tokensX, IListIterator tokensY) throws HyracksDataException {
         int intersectionSize = SimilarityMetric.getIntersectSize(tokensX, tokensY);
         int totalSize = tokensX.size() + tokensY.size();
 
@@ -68,7 +69,8 @@
     }
 
     @Override
-    public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh) {
+    public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh)
+            throws HyracksDataException {
 
         // apply length filter
         int lengthLowerBound = (int) Math.ceil(simThresh * firstList.size());