Eliminated StringUtils.toLowerCase()
git-svn-id: https://hyracks.googlecode.com/svn/trunk@599 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java
index d11245b..bcaeb22 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java
@@ -18,195 +18,95 @@
import java.io.IOException;
public class StringUtils {
- public static char charAt(byte[] b, int s) {
- int c = b[s] & 0xff;
- switch (c >> 4) {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- return (char) c;
+ public static char charAt(byte[] b, int s) {
+ int c = b[s] & 0xff;
+ switch (c >> 4) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ return (char) c;
- case 12:
- case 13:
- return (char) (((c & 0x1F) << 6) | ((b[s + 1]) & 0x3F));
+ case 12:
+ case 13:
+ return (char) (((c & 0x1F) << 6) | ((b[s + 1]) & 0x3F));
- case 14:
- return (char) (((c & 0x0F) << 12) | (((b[s + 1]) & 0x3F) << 6) | (((b[s + 2]) & 0x3F) << 0));
+ case 14:
+ return (char) (((c & 0x0F) << 12) | (((b[s + 1]) & 0x3F) << 6) | (((b[s + 2]) & 0x3F) << 0));
- default:
- throw new IllegalArgumentException();
- }
- }
+ default:
+ throw new IllegalArgumentException();
+ }
+ }
- public static int charSize(byte[] b, int s) {
- int c = b[s] & 0xff;
- switch (c >> 4) {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- return 1;
+ public static int charSize(byte[] b, int s) {
+ int c = b[s] & 0xff;
+ switch (c >> 4) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ return 1;
- case 12:
- case 13:
- return 2;
+ case 12:
+ case 13:
+ return 2;
- case 14:
- return 3;
- }
- throw new IllegalStateException();
- }
+ case 14:
+ return 3;
+ }
+ throw new IllegalStateException();
+ }
- public static int getModifiedUTF8Len(char c) {
- if (c >= 0x0000 && c <= 0x007F) {
- return 1;
- } else if (c <= 0x07FF) {
- return 2;
- } else {
- return 3;
- }
- }
+ public static int getModifiedUTF8Len(char c) {
+ if (c >= 0x0000 && c <= 0x007F) {
+ return 1;
+ } else if (c <= 0x07FF) {
+ return 2;
+ } else {
+ return 3;
+ }
+ }
- public static int getStrLen(byte[] b, int s) {
- int pos = s + 2;
- int end = pos + getUTFLen(b, s);
- int charCount = 0;
- while (pos < end) {
- charCount++;
- pos += charSize(b, pos);
- }
- return charCount;
- }
+ public static int getStrLen(byte[] b, int s) {
+ int pos = s + 2;
+ int end = pos + getUTFLen(b, s);
+ int charCount = 0;
+ while (pos < end) {
+ charCount++;
+ pos += charSize(b, pos);
+ }
+ return charCount;
+ }
- public static int getUTFLen(byte[] b, int s) {
- return ((b[s] & 0xff) << 8) + ((b[s + 1] & 0xff) << 0);
- }
+ public static int getUTFLen(byte[] b, int s) {
+ return ((b[s] & 0xff) << 8) + ((b[s + 1] & 0xff) << 0);
+ }
- public static char toLowerCase(char c) {
- switch (c) {
- case 'A':
- return 'a';
- case 'B':
- return 'b';
- case 'C':
- return 'c';
- case 'D':
- return 'd';
- case 'E':
- return 'e';
- case 'F':
- return 'f';
- case 'G':
- return 'g';
- case 'H':
- return 'h';
- case 'I':
- return 'i';
- case 'J':
- return 'j';
- case 'K':
- return 'k';
- case 'L':
- return 'l';
- case 'M':
- return 'm';
- case 'N':
- return 'n';
- case 'O':
- return 'o';
- case 'P':
- return 'p';
- case 'Q':
- return 'q';
- case 'R':
- return 'r';
- case 'S':
- return 's';
- case 'T':
- return 't';
- case 'U':
- return 'u';
- case 'V':
- return 'v';
- case 'W':
- return 'w';
- case 'X':
- return 'x';
- case 'Y':
- return 'y';
- case 'Z':
- return 'z';
- case 'Ä':
- return 'ä';
- case 'Ǟ':
- return 'ǟ';
- case 'Ë':
- return 'ë';
- case 'Ḧ':
- return 'ḧ';
- case 'Ï':
- return 'ï';
- case 'Ḯ':
- return 'ḯ';
- case 'Ö':
- return 'ö';
- case 'Ȫ':
- return 'ȫ';
- case 'Ṏ':
- return 'ṏ';
- case 'Ü':
- return 'ü';
- case 'Ǖ':
- return 'ǖ';
- case 'Ǘ':
- return 'ǘ';
- case 'Ǚ':
- return 'ǚ';
- case 'Ǜ':
- return 'ǜ';
- case 'Ṳ':
- return 'ṳ';
- case 'Ṻ':
- return 'ṻ';
- case 'Ẅ':
- return 'ẅ';
- case 'Ẍ':
- return 'ẍ';
- case 'Ÿ':
- return 'ÿ';
- default:
- // since I probably missed some chars above
- // use Java to convert to lower case to be safe
- return Character.toLowerCase(c);
- }
- }
+ public static void writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
- public static void writeCharAsModifiedUTF8(char c, DataOutput dos)
- throws IOException {
+ if (c >= 0x0000 && c <= 0x007F) {
+ dos.writeByte(c);
+ } else if (c <= 0x07FF) {
+ dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
+ dos.writeByte((byte) (0x80 | (c & 0x3F)));
+ } else {
+ dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
+ dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
+ dos.writeByte((byte) (0x80 | (c & 0x3F)));
+ }
+ }
- if (c >= 0x0000 && c <= 0x007F) {
- dos.writeByte(c);
- } else if (c <= 0x07FF) {
- dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
- dos.writeByte((byte) (0x80 | (c & 0x3F)));
- } else {
- dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
- dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
- dos.writeByte((byte) (0x80 | (c & 0x3F)));
- }
- }
-
- public static void writeUTF8Len(int len, DataOutput dos) throws IOException {
- dos.write((len >>> 8) & 0xFF);
- dos.write((len >>> 0) & 0xFF);
- }
+ public static void writeUTF8Len(int len, DataOutput dos) throws IOException {
+ dos.write((len >>> 8) & 0xFF);
+ dos.write((len >>> 0) & 0xFF);
+ }
}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
index a15540e..0e9038a 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
@@ -59,7 +59,7 @@
int lowerCaseUTF8Len = 0;
int pos = start;
for (int i = 0; i < size; i++) {
- char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ char c = Character.toLowerCase(StringUtils.charAt(data, pos));
lowerCaseUTF8Len += StringUtils.getModifiedUTF8Len(c);
pos += StringUtils.charSize(data, pos);
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
index f01622b..8f49c74 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
@@ -60,8 +60,7 @@
int currLength = 0;
while (currLength < tokenLength) {
// case insensitive comparison
- if (StringUtils.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != StringUtils
- .toLowerCase(StringUtils.charAt(data, tokenStart + offset))) {
+ if (Character.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != Character.toLowerCase(StringUtils.charAt(data, tokenStart + offset))) {
tokenCount--;
break;
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
index 8817fba..43f89c7 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
@@ -45,7 +45,7 @@
int numRegGrams = tokenLength - numPreChars - numPostChars;
int pos = start;
for (int i = 0; i < numRegGrams; i++) {
- hash ^= StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ hash ^= Character.toLowerCase(StringUtils.charAt(data, pos));
hash *= GOLDEN_RATIO_32;
pos += StringUtils.charSize(data, pos);
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
index cb5d6f6..747b65d 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
@@ -67,7 +67,7 @@
int pos = start;
hash = GOLDEN_RATIO_32;
for (int i = 0; i < tokenLength; i++) {
- hash ^= StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ hash ^= Character.toLowerCase(StringUtils.charAt(data, pos));
hash *= GOLDEN_RATIO_32;
pos += StringUtils.charSize(data, pos);
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
index 82a0275..746ee1d 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
@@ -73,8 +73,7 @@
tokenCount++; // assume found
int offset = 0;
for (int j = 0; j < gramLength; j++) {
- if (StringUtils.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != StringUtils
- .toLowerCase(StringUtils.charAt(data, tmpIndex + offset))) {
+ if (Character.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != Character.toLowerCase(StringUtils.charAt(data, tmpIndex + offset))) {
tokenCount--;
break;
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
index 589cf6a..1b124dc 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
@@ -67,7 +67,7 @@
int pos = start;
for (int i = 0; i < numRegChars; i++) {
- char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ char c = Character.toLowerCase(StringUtils.charAt(data, pos));
StringUtils.writeCharAsModifiedUTF8(c, dos);
pos += StringUtils.charSize(data, pos);
}
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
index c157909..2a74145 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
@@ -38,7 +38,7 @@
StringUtils.writeUTF8Len(tokenUTF8Len, dos);
int pos = start;
for (int i = 0; i < tokenLength; i++) {
- char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+ char c = Character.toLowerCase(StringUtils.charAt(data, pos));
StringUtils.writeCharAsModifiedUTF8(c, dos);
pos += StringUtils.charSize(data, pos);
}