Eliminated StringUtils.toLowerCase()

git-svn-id: https://hyracks.googlecode.com/svn/trunk@599 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java
index d11245b..bcaeb22 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java
@@ -18,195 +18,95 @@
 import java.io.IOException;
 
 public class StringUtils {
-	public static char charAt(byte[] b, int s) {
-		int c = b[s] & 0xff;
-		switch (c >> 4) {
-		case 0:
-		case 1:
-		case 2:
-		case 3:
-		case 4:
-		case 5:
-		case 6:
-		case 7:
-			return (char) c;
+    public static char charAt(byte[] b, int s) {
+        int c = b[s] & 0xff;
+        switch (c >> 4) {
+            case 0:
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+            case 7:
+                return (char) c;
 
-		case 12:
-		case 13:
-			return (char) (((c & 0x1F) << 6) | ((b[s + 1]) & 0x3F));
+            case 12:
+            case 13:
+                return (char) (((c & 0x1F) << 6) | ((b[s + 1]) & 0x3F));
 
-		case 14:
-			return (char) (((c & 0x0F) << 12) | (((b[s + 1]) & 0x3F) << 6) | (((b[s + 2]) & 0x3F) << 0));
+            case 14:
+                return (char) (((c & 0x0F) << 12) | (((b[s + 1]) & 0x3F) << 6) | (((b[s + 2]) & 0x3F) << 0));
 
-		default:
-			throw new IllegalArgumentException();
-		}
-	}
+            default:
+                throw new IllegalArgumentException();
+        }
+    }
 
-	public static int charSize(byte[] b, int s) {
-		int c = b[s] & 0xff;
-		switch (c >> 4) {
-		case 0:
-		case 1:
-		case 2:
-		case 3:
-		case 4:
-		case 5:
-		case 6:
-		case 7:
-			return 1;
+    public static int charSize(byte[] b, int s) {
+        int c = b[s] & 0xff;
+        switch (c >> 4) {
+            case 0:
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+            case 7:
+                return 1;
 
-		case 12:
-		case 13:
-			return 2;
+            case 12:
+            case 13:
+                return 2;
 
-		case 14:
-			return 3;
-		}
-		throw new IllegalStateException();
-	}
+            case 14:
+                return 3;
+        }
+        throw new IllegalStateException();
+    }
 
-	public static int getModifiedUTF8Len(char c) {
-		if (c >= 0x0000 && c <= 0x007F) {
-			return 1;
-		} else if (c <= 0x07FF) {
-			return 2;
-		} else {
-			return 3;
-		}
-	}
+    public static int getModifiedUTF8Len(char c) {
+        if (c >= 0x0000 && c <= 0x007F) {
+            return 1;
+        } else if (c <= 0x07FF) {
+            return 2;
+        } else {
+            return 3;
+        }
+    }
 
-	public static int getStrLen(byte[] b, int s) {
-		int pos = s + 2;
-		int end = pos + getUTFLen(b, s);
-		int charCount = 0;
-		while (pos < end) {
-			charCount++;
-			pos += charSize(b, pos);
-		}
-		return charCount;
-	}
+    public static int getStrLen(byte[] b, int s) {
+        int pos = s + 2;
+        int end = pos + getUTFLen(b, s);
+        int charCount = 0;
+        while (pos < end) {
+            charCount++;
+            pos += charSize(b, pos);
+        }
+        return charCount;
+    }
 
-	public static int getUTFLen(byte[] b, int s) {
-		return ((b[s] & 0xff) << 8) + ((b[s + 1] & 0xff) << 0);
-	}
+    public static int getUTFLen(byte[] b, int s) {
+        return ((b[s] & 0xff) << 8) + ((b[s + 1] & 0xff) << 0);
+    }
 
-	public static char toLowerCase(char c) {
-		switch (c) {
-		case 'A':
-			return 'a';
-		case 'B':
-			return 'b';
-		case 'C':
-			return 'c';
-		case 'D':
-			return 'd';
-		case 'E':
-			return 'e';
-		case 'F':
-			return 'f';
-		case 'G':
-			return 'g';
-		case 'H':
-			return 'h';
-		case 'I':
-			return 'i';
-		case 'J':
-			return 'j';
-		case 'K':
-			return 'k';
-		case 'L':
-			return 'l';
-		case 'M':
-			return 'm';
-		case 'N':
-			return 'n';
-		case 'O':
-			return 'o';
-		case 'P':
-			return 'p';
-		case 'Q':
-			return 'q';
-		case 'R':
-			return 'r';
-		case 'S':
-			return 's';
-		case 'T':
-			return 't';
-		case 'U':
-			return 'u';
-		case 'V':
-			return 'v';
-		case 'W':
-			return 'w';
-		case 'X':
-			return 'x';
-		case 'Y':
-			return 'y';
-		case 'Z':
-			return 'z';
-		case 'Ä':
-			return 'ä';
-		case 'Ǟ':
-			return 'ǟ';
-		case 'Ë':
-			return 'ë';
-		case 'Ḧ':
-			return 'ḧ';
-		case 'Ï':
-			return 'ï';
-		case 'Ḯ':
-			return 'ḯ';
-		case 'Ö':
-			return 'ö';
-		case 'Ȫ':
-			return 'ȫ';
-		case 'Ṏ':
-			return 'ṏ';
-		case 'Ü':
-			return 'ü';
-		case 'Ǖ':
-			return 'ǖ';
-		case 'Ǘ':
-			return 'ǘ';
-		case 'Ǚ':
-			return 'ǚ';
-		case 'Ǜ':
-			return 'ǜ';
-		case 'Ṳ':
-			return 'ṳ';
-		case 'Ṻ':
-			return 'ṻ';
-		case 'Ẅ':
-			return 'ẅ';
-		case 'Ẍ':
-			return 'ẍ';
-		case 'Ÿ':
-			return 'ÿ';
-		default:
-			// since I probably missed some chars above
-			// use Java to convert to lower case to be safe
-			return Character.toLowerCase(c);
-		}
-	}
+    public static void writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
 
-	public static void writeCharAsModifiedUTF8(char c, DataOutput dos)
-			throws IOException {
+        if (c >= 0x0000 && c <= 0x007F) {
+            dos.writeByte(c);
+        } else if (c <= 0x07FF) {
+            dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
+            dos.writeByte((byte) (0x80 | (c & 0x3F)));
+        } else {
+            dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
+            dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
+            dos.writeByte((byte) (0x80 | (c & 0x3F)));
+        }
+    }
 
-		if (c >= 0x0000 && c <= 0x007F) {
-			dos.writeByte(c);
-		} else if (c <= 0x07FF) {
-			dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
-			dos.writeByte((byte) (0x80 | (c & 0x3F)));
-		} else {
-			dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
-			dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
-			dos.writeByte((byte) (0x80 | (c & 0x3F)));
-		}
-	}
-
-	public static void writeUTF8Len(int len, DataOutput dos) throws IOException {
-		dos.write((len >>> 8) & 0xFF);
-		dos.write((len >>> 0) & 0xFF);
-	}
+    public static void writeUTF8Len(int len, DataOutput dos) throws IOException {
+        dos.write((len >>> 8) & 0xFF);
+        dos.write((len >>> 0) & 0xFF);
+    }
 }
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
index a15540e..0e9038a 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
@@ -59,7 +59,7 @@
         int lowerCaseUTF8Len = 0;
         int pos = start;
         for (int i = 0; i < size; i++) {
-            char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+            char c = Character.toLowerCase(StringUtils.charAt(data, pos));
             lowerCaseUTF8Len += StringUtils.getModifiedUTF8Len(c);
             pos += StringUtils.charSize(data, pos);
         }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
index f01622b..8f49c74 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
@@ -60,8 +60,7 @@
                     int currLength = 0;
                     while (currLength < tokenLength) {
                         // case insensitive comparison
-                        if (StringUtils.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != StringUtils
-                                .toLowerCase(StringUtils.charAt(data, tokenStart + offset))) {
+                        if (Character.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != Character.toLowerCase(StringUtils.charAt(data, tokenStart + offset))) {
                             tokenCount--;
                             break;
                         }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
index 8817fba..43f89c7 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
@@ -45,7 +45,7 @@
         int numRegGrams = tokenLength - numPreChars - numPostChars;
         int pos = start;
         for (int i = 0; i < numRegGrams; i++) {
-            hash ^= StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+            hash ^= Character.toLowerCase(StringUtils.charAt(data, pos));
             hash *= GOLDEN_RATIO_32;
             pos += StringUtils.charSize(data, pos);
         }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
index cb5d6f6..747b65d 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
@@ -67,7 +67,7 @@
         int pos = start;
         hash = GOLDEN_RATIO_32;
         for (int i = 0; i < tokenLength; i++) {
-            hash ^= StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+            hash ^= Character.toLowerCase(StringUtils.charAt(data, pos));
             hash *= GOLDEN_RATIO_32;
             pos += StringUtils.charSize(data, pos);
         }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
index 82a0275..746ee1d 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
@@ -73,8 +73,7 @@
                 tokenCount++; // assume found
                 int offset = 0;
                 for (int j = 0; j < gramLength; j++) {
-                    if (StringUtils.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != StringUtils
-                            .toLowerCase(StringUtils.charAt(data, tmpIndex + offset))) {
+                    if (Character.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != Character.toLowerCase(StringUtils.charAt(data, tmpIndex + offset))) {
                         tokenCount--;
                         break;
                     }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
index 589cf6a..1b124dc 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
@@ -67,7 +67,7 @@
 
         int pos = start;
         for (int i = 0; i < numRegChars; i++) {
-            char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+            char c = Character.toLowerCase(StringUtils.charAt(data, pos));
             StringUtils.writeCharAsModifiedUTF8(c, dos);
             pos += StringUtils.charSize(data, pos);
         }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
index c157909..2a74145 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
@@ -38,7 +38,7 @@
         StringUtils.writeUTF8Len(tokenUTF8Len, dos);
         int pos = start;
         for (int i = 0; i < tokenLength; i++) {
-            char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
+            char c = Character.toLowerCase(StringUtils.charAt(data, pos));
             StringUtils.writeCharAsModifiedUTF8(c, dos);
             pos += StringUtils.charSize(data, pos);
         }