edit-distance-contains function implementation
diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
index 39b9480..6e0c1f7 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
@@ -105,6 +105,7 @@
     static {
         secondLevelFuncIdents.add(AsterixBuiltinFunctions.SIMILARITY_JACCARD_CHECK);
         secondLevelFuncIdents.add(AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK);
+        secondLevelFuncIdents.add(AsterixBuiltinFunctions.EDIT_DISTANCE_CONTAINS);
     }
 
     public static InvertedIndexAccessMethod INSTANCE = new InvertedIndexAccessMethod();
@@ -115,16 +116,16 @@
     }
 
     @Override
-    public boolean analyzeFuncExprArgs(AbstractFunctionCallExpression funcExpr, List<AbstractLogicalOperator> assignsAndUnnests,
-            AccessMethodAnalysisContext analysisCtx) {
+    public boolean analyzeFuncExprArgs(AbstractFunctionCallExpression funcExpr,
+            List<AbstractLogicalOperator> assignsAndUnnests, AccessMethodAnalysisContext analysisCtx) {
         if (funcExpr.getFunctionIdentifier() == AsterixBuiltinFunctions.CONTAINS) {
             return AccessMethodUtils.analyzeFuncExprArgsForOneConstAndVar(funcExpr, analysisCtx);
         }
         return analyzeGetItemFuncExpr(funcExpr, assignsAndUnnests, analysisCtx);
     }
 
-    public boolean analyzeGetItemFuncExpr(AbstractFunctionCallExpression funcExpr, List<AbstractLogicalOperator> assignsAndUnnests,
-            AccessMethodAnalysisContext analysisCtx) {
+    public boolean analyzeGetItemFuncExpr(AbstractFunctionCallExpression funcExpr,
+            List<AbstractLogicalOperator> assignsAndUnnests, AccessMethodAnalysisContext analysisCtx) {
         if (funcExpr.getFunctionIdentifier() != AsterixBuiltinFunctions.GET_ITEM) {
             return false;
         }
@@ -169,8 +170,7 @@
                         matchedFuncExpr = (AbstractFunctionCallExpression) matchedExpr;
                         break;
                     }
-                }
-                else {
+                } else {
                     UnnestOperator unnest = (UnnestOperator) op;
                     LogicalVariable var = unnest.getVariable();
                     if (var == varRefExpr.getVariableReference()) {
@@ -182,7 +182,8 @@
                         if (unnestFuncExpr.getFunctionIdentifier() != AsterixBuiltinFunctions.SCAN_COLLECTION) {
                             return false;
                         }
-                        matchedFuncExpr = (AbstractFunctionCallExpression) unnestFuncExpr.getArguments().get(0).getValue();
+                        matchedFuncExpr = (AbstractFunctionCallExpression) unnestFuncExpr.getArguments().get(0)
+                                .getValue();
                     }
                 }
                 // We've already found a match.
@@ -198,8 +199,8 @@
         }
         boolean selectMatchFound = analyzeSelectSimilarityCheckFuncExprArgs(matchedFuncExpr, assignsAndUnnests,
                 matchedAssignOrUnnestIndex, analysisCtx);
-        boolean joinMatchFound = analyzeJoinSimilarityCheckFuncExprArgs(matchedFuncExpr, assignsAndUnnests, matchedAssignOrUnnestIndex,
-                analysisCtx);
+        boolean joinMatchFound = analyzeJoinSimilarityCheckFuncExprArgs(matchedFuncExpr, assignsAndUnnests,
+                matchedAssignOrUnnestIndex, analysisCtx);
         if (selectMatchFound || joinMatchFound) {
             return true;
         }
@@ -207,7 +208,8 @@
     }
 
     private boolean analyzeJoinSimilarityCheckFuncExprArgs(AbstractFunctionCallExpression funcExpr,
-            List<AbstractLogicalOperator> assignsAndUnnests, int matchedAssignOrUnnestIndex, AccessMethodAnalysisContext analysisCtx) {
+            List<AbstractLogicalOperator> assignsAndUnnests, int matchedAssignOrUnnestIndex,
+            AccessMethodAnalysisContext analysisCtx) {
         // There should be exactly three arguments.
         // The last function argument is assumed to be the similarity threshold.
         IAlgebricksConstantValue constThreshVal = null;
@@ -223,11 +225,13 @@
                 || arg2.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
             return false;
         }
-        LogicalVariable fieldVar1 = getNonConstArgFieldVar(arg1, funcExpr, assignsAndUnnests, matchedAssignOrUnnestIndex);
+        LogicalVariable fieldVar1 = getNonConstArgFieldVar(arg1, funcExpr, assignsAndUnnests,
+                matchedAssignOrUnnestIndex);
         if (fieldVar1 == null) {
             return false;
         }
-        LogicalVariable fieldVar2 = getNonConstArgFieldVar(arg2, funcExpr, assignsAndUnnests, matchedAssignOrUnnestIndex);
+        LogicalVariable fieldVar2 = getNonConstArgFieldVar(arg2, funcExpr, assignsAndUnnests,
+                matchedAssignOrUnnestIndex);
         if (fieldVar2 == null) {
             return false;
         }
@@ -237,7 +241,8 @@
     }
 
     private boolean analyzeSelectSimilarityCheckFuncExprArgs(AbstractFunctionCallExpression funcExpr,
-            List<AbstractLogicalOperator> assignsAndUnnests, int matchedAssignOrUnnestIndex, AccessMethodAnalysisContext analysisCtx) {
+            List<AbstractLogicalOperator> assignsAndUnnests, int matchedAssignOrUnnestIndex,
+            AccessMethodAnalysisContext analysisCtx) {
         // There should be exactly three arguments.
         // The last function argument is assumed to be the similarity threshold.
         IAlgebricksConstantValue constThreshVal = null;
@@ -264,7 +269,8 @@
         }
         ConstantExpression constExpr = (ConstantExpression) constArg;
         IAlgebricksConstantValue constFilterVal = constExpr.getValue();
-        LogicalVariable fieldVar = getNonConstArgFieldVar(nonConstArg, funcExpr, assignsAndUnnests, matchedAssignOrUnnestIndex);
+        LogicalVariable fieldVar = getNonConstArgFieldVar(nonConstArg, funcExpr, assignsAndUnnests,
+                matchedAssignOrUnnestIndex);
         if (fieldVar == null) {
             return false;
         }
@@ -274,7 +280,8 @@
     }
 
     private LogicalVariable getNonConstArgFieldVar(ILogicalExpression nonConstArg,
-            AbstractFunctionCallExpression funcExpr, List<AbstractLogicalOperator> assignsAndUnnests, int matchedAssignOrUnnestIndex) {
+            AbstractFunctionCallExpression funcExpr, List<AbstractLogicalOperator> assignsAndUnnests,
+            int matchedAssignOrUnnestIndex) {
         LogicalVariable fieldVar = null;
         // Analyze nonConstArg depending on similarity function.
         if (funcExpr.getFunctionIdentifier() == AsterixBuiltinFunctions.SIMILARITY_JACCARD_CHECK) {
@@ -290,7 +297,8 @@
                 nonConstArg = nonConstFuncExpr.getArguments().get(0).getValue();
             }
         }
-        if (funcExpr.getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK) {
+        if (funcExpr.getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK
+                || funcExpr.getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CONTAINS) {
             while (nonConstArg.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
                 AbstractFunctionCallExpression nonConstFuncExpr = (AbstractFunctionCallExpression) nonConstArg;
                 if (nonConstFuncExpr.getFunctionIdentifier() != AsterixBuiltinFunctions.WORD_TOKENS
@@ -431,7 +439,8 @@
         // Create "panic" (non indexed) nested-loop join path if necessary.
         Mutable<ILogicalOperator> panicJoinRef = null;
         Map<LogicalVariable, LogicalVariable> panicVarMap = null;
-        if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK) {
+        if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK
+                || optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CONTAINS) {
             panicJoinRef = new MutableObject<ILogicalOperator>(joinRef.getValue());
             panicVarMap = new HashMap<LogicalVariable, LogicalVariable>();
             Mutable<ILogicalOperator> newProbeRootRef = createPanicNestedLoopJoinPlan(panicJoinRef, indexSubTree,
@@ -658,9 +667,9 @@
                         .getConstantVal(0))));
                 isFilterableArgs.add(new MutableObject<ILogicalExpression>(AccessMethodUtils
                         .createInt32Constant(chosenIndex.getGramLength())));
-                // TODO: Currently usePrePost is hardcoded to be true.
+                boolean usePrePost = optFuncExpr.containsPartialField() ? false : true;
                 isFilterableArgs.add(new MutableObject<ILogicalExpression>(AccessMethodUtils
-                        .createBooleanConstant(true)));
+                        .createBooleanConstant(usePrePost)));
                 isFilterableExpr = new ScalarFunctionCallExpression(
                         FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.EDIT_DISTANCE_STRING_IS_FILTERABLE),
                         isFilterableArgs);
@@ -743,7 +752,8 @@
             // Add the similarity threshold which, by convention, is the last constant value.
             jobGenParams.setSimilarityThreshold(optFuncExpr.getConstantVal(optFuncExpr.getNumConstantVals() - 1));
         }
-        if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK) {
+        if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK
+                || optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CONTAINS) {
             if (optFuncExpr.containsPartialField()) {
                 jobGenParams.setSearchModifierType(SearchModifierType.CONJUNCTIVE_EDIT_DISTANCE);
             } else {
@@ -771,7 +781,8 @@
                 .containsKey(SkipSecondaryIndexSearchExpressionAnnotation.INSTANCE)) {
             return false;
         }
-        if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK) {
+        if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK
+                || optFuncExpr.getFuncExpr().getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CONTAINS) {
             // Must be for a join query.
             if (optFuncExpr.getNumConstantVals() == 1) {
                 return true;
@@ -794,7 +805,7 @@
                             - edThresh.getIntegerValue() * index.getGramLength();
                 } else {
                     mergeThreshold = (astr.getStringValue().length() + index.getGramLength() - 1)
-                        - edThresh.getIntegerValue() * index.getGramLength();
+                            - edThresh.getIntegerValue() * index.getGramLength();
                 }
             }
             // We can only optimize edit distance on lists using a word index.
@@ -866,7 +877,8 @@
             case SINGLE_PARTITION_NGRAM_INVIX:
             case LENGTH_PARTITIONED_NGRAM_INVIX: {
                 // Make sure not to use pre- and postfixing for conjunctive searches.
-                boolean prePost = (searchModifierType == SearchModifierType.CONJUNCTIVE || searchModifierType == SearchModifierType.CONJUNCTIVE_EDIT_DISTANCE) ? false : true;
+                boolean prePost = (searchModifierType == SearchModifierType.CONJUNCTIVE || searchModifierType == SearchModifierType.CONJUNCTIVE_EDIT_DISTANCE) ? false
+                        : true;
                 return AqlBinaryTokenizerFactoryProvider.INSTANCE.getNGramTokenizerFactory(searchKeyType,
                         index.getGramLength(), prePost, false);
             }
diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/OptimizableFuncExpr.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/OptimizableFuncExpr.java
index 78acc26..79ccfc0 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/OptimizableFuncExpr.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/OptimizableFuncExpr.java
@@ -14,6 +14,7 @@
  */
 package edu.uci.ics.asterix.optimizer.rules.am;
 
+import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
 import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
 import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IAlgebricksConstantValue;
@@ -36,8 +37,12 @@
         this.logicalVars = logicalVars;
         this.constantVals = constantVals;
         this.fieldNames = new String[logicalVars.length];
-        this.partialField = false;
         this.subTrees = new OptimizableOperatorSubTree[logicalVars.length];
+        if (funcExpr.getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CONTAINS) {
+            this.partialField = true;
+        } else {
+            this.partialField = false;
+        }
     }
 
     // Special, more convenient c'tor for simple binary functions.
@@ -45,10 +50,14 @@
             IAlgebricksConstantValue constantVal) {
         this.funcExpr = funcExpr;
         this.logicalVars = new LogicalVariable[] { logicalVar };
-        this.partialField = false;
         this.constantVals = new IAlgebricksConstantValue[] { constantVal };
         this.fieldNames = new String[logicalVars.length];
         this.subTrees = new OptimizableOperatorSubTree[logicalVars.length];
+        if (funcExpr.getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE_CONTAINS) {
+            this.partialField = true;
+        } else {
+            this.partialField = false;
+        }
     }
 
     @Override
diff --git a/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql
new file mode 100644
index 0000000..5d61ede
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-contains.aql
@@ -0,0 +1,38 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-contains function of their authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXType as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create dataset DBLP(DBLPType) primary key id;
+
+create dataset CSX(CSXType) primary key id;
+
+create index ngram_index on DBLP(authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance-contains.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where edit-distance-contains($a.authors, $b.authors, 3)[0] and $a.id < $b.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-edit-distance-contains.plan b/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-edit-distance-contains.plan
new file mode 100644
index 0000000..56b5df4
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-edit-distance-contains.plan
@@ -0,0 +1,55 @@
+-- DISTRIBUTE_RESULT  |PARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+    -- STREAM_PROJECT  |PARTITIONED|
+      -- ASSIGN  |PARTITIONED|
+        -- STREAM_PROJECT  |PARTITIONED|
+          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+            -- HYBRID_HASH_JOIN [$$21][$$14]  |PARTITIONED|
+              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                -- DATASOURCE_SCAN  |PARTITIONED|
+                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                    -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+              -- HASH_PARTITION_EXCHANGE [$$14]  |PARTITIONED|
+                -- UNION_ALL  |PARTITIONED|
+                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                    -- STREAM_PROJECT  |PARTITIONED|
+                      -- STREAM_SELECT  |PARTITIONED|
+                        -- STREAM_PROJECT  |PARTITIONED|
+                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                            -- BTREE_SEARCH  |PARTITIONED|
+                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                -- STABLE_SORT [$$26(ASC)]  |PARTITIONED|
+                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                    -- LENGTH_PARTITIONED_INVERTED_INDEX_SEARCH  |PARTITIONED|
+                                      -- BROADCAST_EXCHANGE  |PARTITIONED|
+                                        -- STREAM_SELECT  |PARTITIONED|
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            -- SPLIT  |PARTITIONED|
+                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                -- STREAM_PROJECT  |PARTITIONED|
+                                                  -- ASSIGN  |PARTITIONED|
+                                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                      -- DATASOURCE_SCAN  |PARTITIONED|
+                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                          -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                    -- STREAM_PROJECT  |PARTITIONED|
+                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                        -- NESTED_LOOP  |PARTITIONED|
+                          -- BROADCAST_EXCHANGE  |PARTITIONED|
+                            -- ASSIGN  |PARTITIONED|
+                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                -- DATASOURCE_SCAN  |PARTITIONED|
+                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                    -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                            -- STREAM_SELECT  |PARTITIONED|
+                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                -- SPLIT  |PARTITIONED|
+                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                    -- STREAM_PROJECT  |PARTITIONED|
+                                      -- ASSIGN  |PARTITIONED|
+                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                          -- DATASOURCE_SCAN  |PARTITIONED|
+                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.ddl.aql
new file mode 100644
index 0000000..36d37d8
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.ddl.aql
@@ -0,0 +1,17 @@
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPType as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create nodegroup group1 if not exists on nc1, nc2;
+
+create dataset DBLP(DBLPType)
+  primary key id on group1;
+
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.2.update.aql
new file mode 100644
index 0000000..f590a97
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.2.update.aql
@@ -0,0 +1,6 @@
+use dataverse test;
+
+load dataset DBLP
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.3.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.3.ddl.aql
new file mode 100644
index 0000000..4209874
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.3.ddl.aql
@@ -0,0 +1,3 @@
+use dataverse test;
+
+create index ngram_index on DBLP(title) type ngram(3);
diff --git a/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.4.query.aql b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.4.query.aql
new file mode 100644
index 0000000..9749da8
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.4.query.aql
@@ -0,0 +1,9 @@
+use dataverse test;
+
+for $paper in dataset('DBLP')
+where edit-distance-contains($paper.title, "Multmedia", 1)[0]
+order by $paper.id
+return {
+  "id" : $paper.id,
+  "title" : $paper.title
+}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.adm b/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.adm
new file mode 100644
index 0000000..12593bf
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-contains/inverted-index-ngram-edit-distance-contains.1.adm
@@ -0,0 +1,3 @@
+{ "id": 4, "title": "Multimedia Information Systems  Issues and Approaches." }
+{ "id": 89, "title": "VORTEX  Video Retrieval and Tracking from Compressed Multimedia Databases." }
+{ "id": 90, "title": "VORTEX  Video Retrieval and Tracking from Compressed Multimedia Databases ¾ Visual Search Engine." }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 39f01c8..33227b3 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -2340,6 +2340,11 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="index-selection">
+      <compilation-unit name="inverted-index-ngram-edit-distance-contains">
+        <output-dir compare="Text">inverted-index-ngram-edit-distance-contains</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="index-selection">
       <compilation-unit name="inverted-index-olist-edit-distance-panic">
         <output-dir compare="Text">inverted-index-olist-edit-distance-panic</output-dir>
       </compilation-unit>
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
index 247bbd0..9b9dfc6 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
@@ -213,4 +213,60 @@
             return ed;
         }
     }
+
+    // checks whether the first string contains a similar string to the second string
+    public int UTF8StringEditDistanceContains(byte[] bytes, int fsStart, int ssStart, int edThresh) {
+
+        int fsLen = StringUtils.getStrLen(bytes, fsStart);
+        int ssLen = StringUtils.getStrLen(bytes, ssStart);
+
+        // reuse existing matrix if possible
+        if (ssLen >= cols) {
+            cols = ssLen + 1;
+            matrix = new int[rows][cols];
+        }
+
+        int fsDataStart = fsStart + utf8SizeIndicatorSize;
+        int ssDataStart = ssStart + utf8SizeIndicatorSize;
+
+        // init matrix
+        for (int i = 0; i <= ssLen; i++) {
+            matrix[0][i] = 0;
+        }
+
+        int currRow = 1;
+        int prevRow = 0;
+        int minEd = Integer.MAX_VALUE;
+        // expand dynamic programming matrix row by row
+        int fsPos = fsDataStart;
+        for (int i = 1; i <= fsLen; i++) {
+            matrix[currRow][0] = i;
+            char fsChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, fsPos));
+
+            int ssPos = ssDataStart;
+            for (int j = 1; j <= ssLen; j++) {
+                char ssChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, ssPos));
+
+                matrix[currRow][j] = Math.min(Math.min(matrix[prevRow][j] + 1, matrix[currRow][j - 1] + 1),
+                        matrix[prevRow][j - 1] + (fsChar == ssChar ? 0 : 1));
+
+                ssPos += StringUtils.charSize(bytes, ssPos);
+
+                if (i == fsLen && matrix[currRow][j] < minEd) {
+                    minEd = matrix[currRow][j];
+                }
+            }
+
+            fsPos += StringUtils.charSize(bytes, fsPos);
+
+            int tmp = currRow;
+            currRow = prevRow;
+            prevRow = tmp;
+        }
+        if (minEd > edThresh) {
+            return -1;
+        } else {
+            return minEd;
+        }
+    }
 }
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/om/functions/AsterixBuiltinFunctions.java b/asterix-om/src/main/java/edu/uci/ics/asterix/om/functions/AsterixBuiltinFunctions.java
index 8f0bab3..e8472a0 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/om/functions/AsterixBuiltinFunctions.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/om/functions/AsterixBuiltinFunctions.java
@@ -390,6 +390,8 @@
             FunctionConstants.ASTERIX_NS, "edit-distance-list-is-filterable", 2);
     public final static FunctionIdentifier EDIT_DISTANCE_STRING_IS_FILTERABLE = new FunctionIdentifier(
             FunctionConstants.ASTERIX_NS, "edit-distance-string-is-filterable", 4);
+    public final static FunctionIdentifier EDIT_DISTANCE_CONTAINS = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
+            "edit-distance-contains", 3);
 
     // tokenizers:
     public final static FunctionIdentifier WORD_TOKENS = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
@@ -827,6 +829,7 @@
         addPrivateFunction(SERIAL_LOCAL_AVG, NonTaggedLocalAvgTypeComputer.INSTANCE, true);
         addPrivateFunction(SERIAL_SUM, NonTaggedNumericAggTypeComputer.INSTANCE, true);
         addPrivateFunction(SERIAL_LOCAL_SUM, NonTaggedNumericAggTypeComputer.INSTANCE, true);
+        addFunction(EDIT_DISTANCE_CONTAINS, OrderedListOfAnyTypeComputer.INSTANCE, true);
         addFunction(SIMILARITY_JACCARD, AFloatTypeComputer.INSTANCE, true);
         addFunction(SIMILARITY_JACCARD_CHECK, OrderedListOfAnyTypeComputer.INSTANCE, true);
         addPrivateFunction(SIMILARITY_JACCARD_SORTED, AFloatTypeComputer.INSTANCE, true);
@@ -1277,6 +1280,7 @@
         similarityFunctions.add(getAsterixFunctionInfo(SIMILARITY_JACCARD_CHECK));
         similarityFunctions.add(getAsterixFunctionInfo(EDIT_DISTANCE));
         similarityFunctions.add(getAsterixFunctionInfo(EDIT_DISTANCE_CHECK));
+        similarityFunctions.add(getAsterixFunctionInfo(EDIT_DISTANCE_CONTAINS));
     }
 
     public static boolean isSimilarityFunction(FunctionIdentifier fi) {
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/EditDistanceContainsDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/EditDistanceContainsDescriptor.java
new file mode 100644
index 0000000..679a9bc
--- /dev/null
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/EditDistanceContainsDescriptor.java
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.asterix.runtime.evaluators.functions;
+
+import java.io.DataOutput;
+import edu.uci.ics.asterix.builders.OrderedListBuilder;
+import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
+import edu.uci.ics.asterix.fuzzyjoin.similarity.SimilarityMetricEditDistance;
+import edu.uci.ics.asterix.om.base.ABoolean;
+import edu.uci.ics.asterix.om.base.AInt32;
+import edu.uci.ics.asterix.om.base.AMutableInt32;
+import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
+import edu.uci.ics.asterix.om.functions.IFunctionDescriptor;
+import edu.uci.ics.asterix.om.functions.IFunctionDescriptorFactory;
+import edu.uci.ics.asterix.om.types.AOrderedListType;
+import edu.uci.ics.asterix.om.types.ATypeTag;
+import edu.uci.ics.asterix.om.types.BuiltinType;
+import edu.uci.ics.asterix.om.types.EnumDeserializer;
+import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+
+public class EditDistanceContainsDescriptor extends AbstractScalarFunctionDynamicDescriptor {
+
+    private static final long serialVersionUID = 1L;
+    public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() {
+        public IFunctionDescriptor createFunctionDescriptor() {
+            return new EditDistanceContainsDescriptor();
+        }
+    };
+
+    @Override
+    public ICopyEvaluatorFactory createEvaluatorFactory(final ICopyEvaluatorFactory[] args) throws AlgebricksException {
+        return new ICopyEvaluatorFactory() {
+            private static final long serialVersionUID = 1L;
+
+            @Override
+            public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+                return new EditDistanceContainsEvaluator(args, output);
+            }
+        };
+    }
+
+    @Override
+    public FunctionIdentifier getIdentifier() {
+        return AsterixBuiltinFunctions.EDIT_DISTANCE_CONTAINS;
+    }
+
+    private static class EditDistanceContainsEvaluator implements ICopyEvaluator {
+
+        // assuming type indicator in serde format
+        private final int typeIndicatorSize = 1;
+
+        private DataOutput out;
+        private final ArrayBackedValueStorage argOut = new ArrayBackedValueStorage();
+        private final ICopyEvaluator evalString;
+        private final ICopyEvaluator evalPattern;
+        private final ICopyEvaluator evalEdThresh;
+        private final SimilarityMetricEditDistance ed = new SimilarityMetricEditDistance();
+        private int edThresh = -1;
+        private final OrderedListBuilder listBuilder;
+        private ArrayBackedValueStorage inputVal;
+        protected final AMutableInt32 aInt32 = new AMutableInt32(-1);
+        @SuppressWarnings("unchecked")
+        private final ISerializerDeserializer<ABoolean> booleanSerde = AqlSerializerDeserializerProvider.INSTANCE
+                .getSerializerDeserializer(BuiltinType.ABOOLEAN);
+        @SuppressWarnings("unchecked")
+        private final ISerializerDeserializer<AInt32> int32Serde = AqlSerializerDeserializerProvider.INSTANCE
+                .getSerializerDeserializer(BuiltinType.AINT32);
+
+        // allowed input types
+        private final static byte SER_NULL_TYPE_TAG = ATypeTag.NULL.serialize();
+        private final static byte SER_STRING_TYPE_TAG = ATypeTag.STRING.serialize();
+        private final static byte SER_INT32_TYPE_TAG = ATypeTag.INT32.serialize();
+
+        public EditDistanceContainsEvaluator(ICopyEvaluatorFactory[] args, IDataOutputProvider output)
+                throws AlgebricksException {
+            out = output.getDataOutput();
+            evalString = args[0].createEvaluator(argOut);
+            evalPattern = args[1].createEvaluator(argOut);
+            evalEdThresh = args[2].createEvaluator(argOut);
+            listBuilder = new OrderedListBuilder();
+            inputVal = new ArrayBackedValueStorage();
+        }
+
+        @Override
+        public void evaluate(IFrameTupleReference tuple) throws AlgebricksException {
+            argOut.reset();
+            int patternStart = argOut.getLength();
+            evalString.evaluate(tuple);
+            int stringStart = argOut.getLength();
+            evalPattern.evaluate(tuple);
+            int edThreshStart = argOut.getLength();
+            evalEdThresh.evaluate(tuple);
+
+            try {
+                // edit distance between null and anything else is 0
+                if (argOut.getByteArray()[stringStart] == SER_NULL_TYPE_TAG
+                        || argOut.getByteArray()[patternStart] == SER_NULL_TYPE_TAG) {
+                    writeResult(0);
+                    return;
+                }
+
+                if (argOut.getByteArray()[stringStart] != SER_STRING_TYPE_TAG
+                        || argOut.getByteArray()[patternStart] != SER_STRING_TYPE_TAG
+                        || argOut.getByteArray()[edThreshStart] != SER_INT32_TYPE_TAG) {
+                    throw new AlgebricksException(AsterixBuiltinFunctions.EDIT_DISTANCE_CONTAINS
+                            + ": expects input type (STRING/NULL, STRING/NULL, INT), but got ("
+                            + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getByteArray()[stringStart])
+                            + ", "
+                            + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getByteArray()[patternStart])
+                            + ", "
+                            + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getByteArray()[edThreshStart])
+                            + ").");
+                }
+            } catch (HyracksDataException e) {
+                throw new AlgebricksException(e);
+            }
+
+            edThresh = IntegerSerializerDeserializer.getInt(argOut.getByteArray(), edThreshStart + typeIndicatorSize);
+            int minEd = ed.UTF8StringEditDistanceContains(argOut.getByteArray(), stringStart + typeIndicatorSize,
+                    patternStart + typeIndicatorSize, edThresh);
+
+            try {
+                writeResult(minEd);
+            } catch (HyracksDataException e) {
+                throw new AlgebricksException(e);
+            }
+        }
+
+        protected void writeResult(int minEd) throws HyracksDataException {
+            boolean contains = (minEd < 0) ? false : true;
+            listBuilder.reset(new AOrderedListType(BuiltinType.ANY, "list"));
+            inputVal.reset();
+            booleanSerde.serialize(contains ? ABoolean.TRUE : ABoolean.FALSE, inputVal.getDataOutput());
+            listBuilder.addItem(inputVal);
+            inputVal.reset();
+            aInt32.setValue((contains) ? minEd : Integer.MAX_VALUE);
+            int32Serde.serialize(aInt32, inputVal.getDataOutput());
+            listBuilder.addItem(inputVal);
+            listBuilder.write(out, true);
+        }
+    }
+}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
index 835e7e1..4501fda 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
@@ -166,6 +166,7 @@
 import edu.uci.ics.asterix.runtime.evaluators.functions.CreateRectangleDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.functions.CreateUUIDDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.functions.EditDistanceCheckDescriptor;
+import edu.uci.ics.asterix.runtime.evaluators.functions.EditDistanceContainsDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.functions.EditDistanceDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.functions.EditDistanceListIsFilterable;
 import edu.uci.ics.asterix.runtime.evaluators.functions.EditDistanceStringIsFilterable;
@@ -533,6 +534,7 @@
         temp.add(EditDistanceCheckDescriptor.FACTORY);
         temp.add(EditDistanceStringIsFilterable.FACTORY);
         temp.add(EditDistanceListIsFilterable.FACTORY);
+        temp.add(EditDistanceContainsDescriptor.FACTORY);
 
         temp.add(SimilarityJaccardDescriptor.FACTORY);
         temp.add(SimilarityJaccardCheckDescriptor.FACTORY);