Merged -r 438:524 from trunk into branch
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_indexes@525 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-examples/text-example/texthelper/pom.xml b/hyracks-examples/text-example/texthelper/pom.xml
index 5dba565..99a4c4b 100644
--- a/hyracks-examples/text-example/texthelper/pom.xml
+++ b/hyracks-examples/text-example/texthelper/pom.xml
@@ -2,25 +2,25 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks.examples.text</groupId>
<artifactId>texthelper</artifactId>
- <version>0.1.5</version>
+ <version>0.1.7-SNAPSHOT</version>
<parent>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>text-example</artifactId>
- <version>0.1.5</version>
+ <version>0.1.7-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.1.5</version>
+ <version>0.1.7-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.1.5</version>
+ <version>0.1.7-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
diff --git a/hyracks-examples/text-example/texthelper/src/main/java/edu/uci/ics/hyracks/examples/text/WordTupleParserFactory.java b/hyracks-examples/text-example/texthelper/src/main/java/edu/uci/ics/hyracks/examples/text/WordTupleParserFactory.java
index 65b9011..4d7cc1f 100644
--- a/hyracks-examples/text-example/texthelper/src/main/java/edu/uci/ics/hyracks/examples/text/WordTupleParserFactory.java
+++ b/hyracks-examples/text-example/texthelper/src/main/java/edu/uci/ics/hyracks/examples/text/WordTupleParserFactory.java
@@ -85,6 +85,7 @@
return false;
}
+ boolean wordStarted = false;
int p = start;
while (true) {
if (p >= end) {
@@ -96,16 +97,57 @@
p -= (s - start);
}
char ch = buffer[p];
- if (Character.isWhitespace(ch)) {
+ if (isNonWordChar(ch)) {
fStart = start;
fEnd = p;
start = p + 1;
- return true;
+ if (wordStarted) {
+ return true;
+ }
+ } else {
+ wordStarted = true;
}
++p;
}
}
+ private boolean isNonWordChar(char ch) {
+ switch (ch) {
+ case '.':
+ case ',':
+ case '!':
+ case '@':
+ case '#':
+ case '$':
+ case '%':
+ case '^':
+ case '&':
+ case '*':
+ case '(':
+ case ')':
+ case '+':
+ case '=':
+ case ':':
+ case ';':
+ case '"':
+ case '\'':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case '|':
+ case '\\':
+ case '/':
+ case '<':
+ case '>':
+ case '?':
+ case '~':
+ case '`':
+ return true;
+ }
+ return Character.isWhitespace(ch);
+ }
+
private boolean readMore() throws IOException {
if (start > 0) {
System.arraycopy(buffer, start, buffer, 0, end - start);