ASTERIXDB-1478: fix the utf8 reader.
1. Fix the ASTERIXDB-1478.
2. Add the utf8 testCases.
Change-Id: Idb302dc604fcd71811de550d3d4bd727c81a13ee
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1077
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Yingyi Bu <buyingyi@gmail.com>
diff --git a/asterixdb/asterix-app/data/adm-load/utf8.adm b/asterixdb/asterix-app/data/adm-load/utf8.adm
new file mode 100644
index 0000000..2621bc4
--- /dev/null
+++ b/asterixdb/asterix-app/data/adm-load/utf8.adm
@@ -0,0 +1,100 @@
+{"id":"1","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"2","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"3","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"4","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"5","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"6","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"7","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"8","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"9","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"10","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"11","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"12","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"13","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"14","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"15","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"16","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"17","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"18","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"19","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"20","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"21","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"22","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"23","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"24","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"25","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"26","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"27","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"28","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"29","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"30","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"31","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"32","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"33","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"34","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"35","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"36","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"37","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"38","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"39","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"40","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"41","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"42","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"43","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"44","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"45","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"46","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"47","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"48","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"49","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"50","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"51","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"52","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"53","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"54","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"55","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"56","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"57","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"58","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"59","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"60","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"61","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"62","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"63","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"64","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"65","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"66","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"67","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"68","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"69","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"70","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"71","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"72","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"73","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"74","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"75","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"76","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"77","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"78","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"79","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"80","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"81","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"82","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"83","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"84","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"85","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"86","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"87","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"88","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"89","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"90","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"91","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"92","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"93","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"94","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"95","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"96","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"97","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"98","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"99","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"100","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql
new file mode 100644
index 0000000..bcd3d46
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse test if exists;
+create dataverse test
+use dataverse test;
+
+create type DocType as open {
+ id: string,
+ description: string?
+};
+
+create dataset Doc (DocType)
+primary key id;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql
new file mode 100644
index 0000000..4d4f4e5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ *
+ * CSV file loading utf8
+ * Expected result: success
+ *
+ */
+
+use dataverse test;
+
+load dataset Doc
+using localfs
+(("path"="asterix_nc1://data/adm-load/utf8.adm"),("format"="adm"));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql
new file mode 100644
index 0000000..95507bf
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse test;
+
+let $s := count(
+for $i in dataset Doc
+return $i)
+return $s
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm
new file mode 100644
index 0000000..29d6383
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm
@@ -0,0 +1 @@
+100
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
index cf5bda3..749965e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -6560,6 +6560,11 @@
<output-dir compare="Text">adm_binary</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="load">
+ <compilation-unit name="utf8">
+ <output-dir compare="Text">utf8</output-dir>
+ </compilation-unit>
+ </test-case>
</test-group>
<test-group name="hints">
<test-case FilePath="hints">
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java
index 94333d1..8e166c0 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java
@@ -38,6 +38,7 @@
private CharBuffer charBuffer = CharBuffer.allocate(ExternalDataConstants.DEFAULT_BUFFER_SIZE);
private CharsetDecoder decoder;
private boolean done = false;
+ private boolean remaining = false;
public AsterixInputStreamReader(AsterixInputStream in) {
this.in = in;
@@ -75,6 +76,7 @@
charBuffer.clear();
while (charBuffer.position() == 0) {
if (byteBuffer.hasRemaining()) {
+ remaining = true;
decoder.decode(byteBuffer, charBuffer, false);
System.arraycopy(charBuffer.array(), 0, cbuf, offset, charBuffer.position());
if (charBuffer.position() > 0) {
@@ -97,8 +99,13 @@
done = true;
return len;
}
- byteBuffer.position(len);
+ if (remaining) {
+ byteBuffer.position(len + byteBuffer.position());
+ } else {
+ byteBuffer.position(len);
+ }
byteBuffer.flip();
+ remaining = false;
decoder.decode(byteBuffer, charBuffer, false);
System.arraycopy(charBuffer.array(), 0, cbuf, offset, charBuffer.position());
}
diff --git a/asterixdb/asterix-external-data/src/test/resources/results/beer.txt b/asterixdb/asterix-external-data/src/test/resources/results/beer.txt
index bcb3631..5a7983d 100644
--- a/asterixdb/asterix-external-data/src/test/resources/results/beer.txt
+++ b/asterixdb/asterix-external-data/src/test/resources/results/beer.txt
@@ -1450,7 +1450,7 @@
{ "name": "Baron Helles Bock", "abv": 6.4, "ibu": 0.0, "srm": 0.0, "upc": 0, "type": "beer", "brewery_id": "baron_brewing_company", "updated": "2010-07-22 20:00:20", "description": "The Helles-Bock is similar to a traditional Maibock. Bocks are traditionally brewed in the winter / early spring months and are served during the spring / early summer months. The Helles Bock has a copper golden color with a brilliant white head. The body showcases a clean sweet maltiness that is offset by just enough hops to balance it. Very smooth and easy, drinkable yet deceptive at 6.4%.\r\n\r\nAll ingredients for the beer are imported from Germany. Brewed in accordance to the German Beer Purity Law (Reinheitsgebot) of 1516.", "style": "German-Style Heller Bock/Maibock", "category": "German Lager" }
{ "id": "baron_brewing_company-baron_helles_bock", "flags": 0, "expiration": 0, "cas": 244367687683, "rev": 1, "vbid": 27, "dtype": 1 }
"baron_brewing_company-baron_helles_bock"
-{ "name": "Basil T's Brew Pub and Italian Grill", "city": "Toms River", "state": "New Jersey", "code": "8753", "country": "United States", "phone": "1-732-244-7566", "website": "", "type": "rewery", "updated": "2010-07-22 20:00:20", "description": "", "address": [ "1171 Hooper Avenue" ], "geo": { "accuracy": "RANGE_INTERPOLATED", "lat": 39.9767, "lon": -74.1829 } }
+{ "name": "Basil T's Brew Pub and Italian Grill", "city": "Toms River", "state": "New Jersey", "code": "8753", "country": "United States", "phone": "1-732-244-7566", "website": "", "type": "brewery", "updated": "2010-07-22 20:00:20", "description": "", "address": [ "1171 Hooper Avenue" ], "geo": { "accuracy": "RANGE_INTERPOLATED", "lat": 39.9767, "lon": -74.1829 } }
{ "id": "basil_t_s_brew_pub_and_italian_grill", "flags": 0, "expiration": 0, "cas": 244364410882, "rev": 1, "vbid": 20, "dtype": 1 }
"basil_t_s_brew_pub_and_italian_grill"
{ "name": "Nieuw Ligt Grand Cru 2006", "abv": 12.0, "ibu": 0.0, "srm": 0.0, "upc": 0, "type": "beer", "brewery_id": "stadsbrouwerij_de_hemel", "updated": "2010-07-22 20:00:20", "description": "" }