- Fixed Type Casting issue
- Reorganized duplicated internal class in the DelimitedDataParser and DelimitedDataParserFactory
- Prevented a user from creating an inverted index on a dataset with a variable-length PK
- INT64 is now the default type
- Issue 852 fixed
Change-Id: I2d71e8a21da4f709c3259a3d3f678c640f9e1160
Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/192
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <westmann@gmail.com>
diff --git a/asterix-fuzzyjoin/pom.xml b/asterix-fuzzyjoin/pom.xml
index 2066ed0..8d19efbc 100644
--- a/asterix-fuzzyjoin/pom.xml
+++ b/asterix-fuzzyjoin/pom.xml
@@ -21,28 +21,28 @@
<build>
<plugins>
<plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>2.3.2</version>
- <configuration>
- <source>1.7</source>
- <target>1.7</target>
- <compilerArguments>
- <encoding>utf8</encoding>
- </compilerArguments>
- </configuration>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.3.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <compilerArguments>
+ <encoding>utf8</encoding>
+ </compilerArguments>
+ </configuration>
</plugin>
<plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.4</version>
- <executions>
- <execution>
- <goals>
- <goal>test-jar</goal>
- </goals>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.4</version>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
<phase>test-compile</phase>
- </execution>
+ </execution>
</executions>
<configuration>
<outputDirectory>${basedir}/target</outputDirectory>
@@ -57,6 +57,10 @@
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ </dependency>
</dependencies>
</project>
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
index 85d1785..50ba7df 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
@@ -13,17 +13,20 @@
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under
* the License.
- *
+ *
* Author: Alexander Behm <abehm (at) ics.uci.edu>
*/
package edu.uci.ics.asterix.fuzzyjoin.similarity;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
public interface IGenericSimilarityMetric {
// returns similarity
- public float getSimilarity(IListIterator firstList, IListIterator secondList);
+ public float getSimilarity(IListIterator firstList, IListIterator secondList) throws HyracksDataException;
// returns -1 if does not satisfy threshold
// else returns similarity
- public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh);
+ public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh)
+ throws HyracksDataException;
}
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java
index 647c35f..b4fbcef 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/IListIterator.java
@@ -13,14 +13,16 @@
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under
* the License.
- *
+ *
* Author: Alexander Behm <abehm (at) ics.uci.edu>
*/
package edu.uci.ics.asterix.fuzzyjoin.similarity;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
public interface IListIterator {
- public int compare(IListIterator cmpIter);
+ public int compare(IListIterator cmpIter) throws HyracksDataException;
public byte[] getData();
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java
index 415e785..a633e0e 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetric.java
@@ -13,17 +13,18 @@
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under
* the License.
- *
+ *
* Author: Rares Vernica <rares (at) ics.uci.edu>
*/
package edu.uci.ics.asterix.fuzzyjoin.similarity;
import edu.uci.ics.asterix.fuzzyjoin.tokenizer.Tokenizer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
public abstract class SimilarityMetric {
- public static int getIntersectSize(IListIterator tokensX, IListIterator tokensY) {
+ public static int getIntersectSize(IListIterator tokensX, IListIterator tokensY) throws HyracksDataException {
int intersectSize = 0;
while (tokensX.hasNext() && tokensY.hasNext()) {
int cmp = tokensX.compare(tokensY);
@@ -169,7 +170,7 @@
// public abstract float getSimilarity(DataBag tokensX, int lengthX,
// DataBag tokensY, int lengthY);
- public float getSimilarity(IListIterator tokensX, IListIterator tokensY) {
+ public float getSimilarity(IListIterator tokensX, IListIterator tokensY) throws HyracksDataException {
int intersectionSize = SimilarityMetric.getIntersectSize(tokensX, tokensY);
int totalSize = tokensX.size() + tokensY.size();
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
index ab60b2c..f723998 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
@@ -13,7 +13,7 @@
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under
* the License.
- *
+ *
* Author: Alexander Behm <abehm (at) ics.uci.edu>
*/
@@ -22,6 +22,7 @@
import java.util.Arrays;
import edu.uci.ics.asterix.fuzzyjoin.tokenizer.StringUtils;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
@@ -42,7 +43,7 @@
}
@Override
- public float getSimilarity(IListIterator firstList, IListIterator secondList) {
+ public float getSimilarity(IListIterator firstList, IListIterator secondList) throws HyracksDataException {
int flLen = firstList.size();
int slLen = secondList.size();
@@ -84,7 +85,8 @@
}
@Override
- public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh) {
+ public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh)
+ throws HyracksDataException {
int edThresh = (int) simThresh;
@@ -104,7 +106,8 @@
}
}
- public int getSimilarityContains(IListIterator exprList, IListIterator patternList, int simThresh) {
+ public int getSimilarityContains(IListIterator exprList, IListIterator patternList, int simThresh)
+ throws HyracksDataException {
int exprLen = exprList.size();
int patternLen = patternList.size();
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
index b0c0638..f8cd3ec 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
@@ -13,7 +13,7 @@
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under
* the License.
- *
+ *
* Author: Rares Vernica <rares (at) ics.uci.edu>
*/
@@ -23,6 +23,7 @@
import java.util.TreeSet;
import edu.uci.ics.asterix.fuzzyjoin.tokenizer.Tokenizer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
public class SimilarityMetricJaccard extends SimilarityMetric implements IGenericSimilarityMetric {
@@ -60,7 +61,7 @@
// }
@Override
- public float getSimilarity(IListIterator tokensX, IListIterator tokensY) {
+ public float getSimilarity(IListIterator tokensX, IListIterator tokensY) throws HyracksDataException {
int intersectionSize = SimilarityMetric.getIntersectSize(tokensX, tokensY);
int totalSize = tokensX.size() + tokensY.size();
@@ -68,7 +69,8 @@
}
@Override
- public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh) {
+ public float getSimilarity(IListIterator firstList, IListIterator secondList, float simThresh)
+ throws HyracksDataException {
// apply length filter
int lengthLowerBound = (int) Math.ceil(simThresh * firstList.size());