Merged -r 438:524 from trunk into branch

git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_indexes@525 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-examples/text-example/texthelper/pom.xml b/hyracks-examples/text-example/texthelper/pom.xml
index 5dba565..99a4c4b 100644
--- a/hyracks-examples/text-example/texthelper/pom.xml
+++ b/hyracks-examples/text-example/texthelper/pom.xml
@@ -2,25 +2,25 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>edu.uci.ics.hyracks.examples.text</groupId>
   <artifactId>texthelper</artifactId>
-  <version>0.1.5</version>
+  <version>0.1.7-SNAPSHOT</version>
 
   <parent>
     <groupId>edu.uci.ics.hyracks.examples</groupId>
     <artifactId>text-example</artifactId>
-    <version>0.1.5</version>
+    <version>0.1.7-SNAPSHOT</version>
   </parent>
 
   <dependencies>
   	<dependency>
   		<groupId>edu.uci.ics.hyracks</groupId>
   		<artifactId>hyracks-dataflow-std</artifactId>
-  		<version>0.1.5</version>
+  		<version>0.1.7-SNAPSHOT</version>
   		<scope>compile</scope>
   	</dependency>
   	<dependency>
   		<groupId>edu.uci.ics.hyracks</groupId>
   		<artifactId>hyracks-api</artifactId>
-  		<version>0.1.5</version>
+  		<version>0.1.7-SNAPSHOT</version>
   		<scope>compile</scope>
   	</dependency>
   </dependencies>
diff --git a/hyracks-examples/text-example/texthelper/src/main/java/edu/uci/ics/hyracks/examples/text/WordTupleParserFactory.java b/hyracks-examples/text-example/texthelper/src/main/java/edu/uci/ics/hyracks/examples/text/WordTupleParserFactory.java
index 65b9011..4d7cc1f 100644
--- a/hyracks-examples/text-example/texthelper/src/main/java/edu/uci/ics/hyracks/examples/text/WordTupleParserFactory.java
+++ b/hyracks-examples/text-example/texthelper/src/main/java/edu/uci/ics/hyracks/examples/text/WordTupleParserFactory.java
@@ -85,6 +85,7 @@
                 return false;
             }
 
+            boolean wordStarted = false;
             int p = start;
             while (true) {
                 if (p >= end) {
@@ -96,16 +97,57 @@
                     p -= (s - start);
                 }
                 char ch = buffer[p];
-                if (Character.isWhitespace(ch)) {
+                if (isNonWordChar(ch)) {
                     fStart = start;
                     fEnd = p;
                     start = p + 1;
-                    return true;
+                    if (wordStarted) {
+                        return true;
+                    }
+                } else {
+                    wordStarted = true;
                 }
                 ++p;
             }
         }
 
+        private boolean isNonWordChar(char ch) {
+            switch (ch) {
+                case '.':
+                case ',':
+                case '!':
+                case '@':
+                case '#':
+                case '$':
+                case '%':
+                case '^':
+                case '&':
+                case '*':
+                case '(':
+                case ')':
+                case '+':
+                case '=':
+                case ':':
+                case ';':
+                case '"':
+                case '\'':
+                case '{':
+                case '}':
+                case '[':
+                case ']':
+                case '|':
+                case '\\':
+                case '/':
+                case '<':
+                case '>':
+                case '?':
+                case '~':
+                case '`':
+                    return true;
+            }
+            return Character.isWhitespace(ch);
+        }
+
         private boolean readMore() throws IOException {
             if (start > 0) {
                 System.arraycopy(buffer, start, buffer, 0, end - start);