commit | 908ae63bb7d99ae01f999e9c3b5290c697dce033 | [log] [tgz] |
---|---|---|
author | Taewoo Kim <wangsaeu@yahoo.com> | Tue Jan 17 12:58:03 2017 -0800 |
committer | Taewoo Kim <wangsaeu@yahoo.com> | Tue Jan 17 15:42:40 2017 -0800 |
tree | 02191b890436e0ceecbfad39c40115c7bc7050c2 | |
parent | c8ea9d6bb1203ecf8ea54fc378fcaa4779fb8589 [diff] |
Add a corner case handling for NGramUTF8StringBinaryTokenizer - For a corner case where the length of given string is less than the given gram length, it returns 0 as the total number of grams. Change-Id: I5965856b4da018276b37460bed7fb1fc60d8c2f3 Reviewed-on: https://asterix-gerrit.ics.uci.edu/1448 Reviewed-by: Ian Maxon <imaxon@apache.org> Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu> BAD: Jenkins <jenkins@fulliautomatix.ics.uci.edu> Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java index 4c486c5..8bd0c50 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
@@ -110,7 +110,11 @@ if (usePrePost) { totalGrams = numChars + gramLength - 1; } else { - totalGrams = numChars - gramLength + 1; + if (numChars >= gramLength) { + totalGrams = numChars - gramLength + 1; + } else { + totalGrams = 0; + } } }