Fix Decoding of byte[] Records
Change-Id: I71c3d8b8dfa5a98123725f139247d2b5ce10012e
Reviewed-on: https://asterix-gerrit.ics.uci.edu/951
Reviewed-by: Yingyi Bu <buyingyi@gmail.com>
Reviewed-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
index f174962..33f9673 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/CharArrayRecord.java
@@ -69,7 +69,7 @@
throw new IOException(
"Record is too large!. Maximum record size is " + ExternalDataConstants.MAX_RECORD_SIZE);
}
- int newSize = Math.min((int)(len * ExternalDataConstants.DEFAULT_BUFFER_INCREMENT_FACTOR),
+ int newSize = Math.min((int) (len * ExternalDataConstants.DEFAULT_BUFFER_INCREMENT_FACTOR),
ExternalDataConstants.MAX_RECORD_SIZE);
value = Arrays.copyOf(value, newSize);
}
@@ -88,7 +88,7 @@
@Override
public String toString() {
- return String.valueOf(value, 0, size);
+ return String.valueOf(value, 0, size == 0 ? 0 : size - 1);
}
public void endRecord() throws IOException {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/DCPMessageToRecordConverter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/DCPMessageToRecordConverter.java
index 6ce5e98..01466fd 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/DCPMessageToRecordConverter.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/DCPMessageToRecordConverter.java
@@ -39,17 +39,15 @@
import com.couchbase.client.deps.io.netty.buffer.ByteBuf;
import com.couchbase.client.deps.io.netty.util.ReferenceCountUtil;
-public class DCPMessageToRecordConverter
- implements IRecordToRecordWithMetadataAndPKConverter<DCPRequest, char[]> {
+public class DCPMessageToRecordConverter implements IRecordToRecordWithMetadataAndPKConverter<DCPRequest, char[]> {
private final RecordWithMetadataAndPK<char[]> recordWithMetadata;
private final CharArrayRecord value;
private final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
- private final ByteBuffer bytes = ByteBuffer.allocateDirect(ExternalDataConstants.DEFAULT_BUFFER_SIZE);
+ private final ByteBuffer bytes = ByteBuffer.allocate(ExternalDataConstants.DEFAULT_BUFFER_SIZE);
private final CharBuffer chars = CharBuffer.allocate(ExternalDataConstants.DEFAULT_BUFFER_SIZE);
- private static final IAType[] CB_META_TYPES = new IAType[] { /*ID*/BuiltinType.ASTRING,
- /*VBID*/BuiltinType.AINT32, /*SEQ*/BuiltinType.AINT64, /*CAS*/BuiltinType.AINT64,
- /*EXPIRATION*/BuiltinType.AINT32,
+ private static final IAType[] CB_META_TYPES = new IAType[] { /*ID*/BuiltinType.ASTRING, /*VBID*/BuiltinType.AINT32,
+ /*SEQ*/BuiltinType.AINT64, /*CAS*/BuiltinType.AINT64, /*EXPIRATION*/BuiltinType.AINT32,
/*FLAGS*/BuiltinType.AINT32, /*REV*/BuiltinType.AINT64, /*LOCK*/BuiltinType.AINT32 };
private static final int[] PK_INDICATOR = { 1 };
private static final int[] PK_INDEXES = { 0 };
@@ -105,16 +103,22 @@
int position = content.readerIndex();
final int limit = content.writerIndex();
final int contentSize = content.readableBytes();
+ bytes.clear();
while (position < limit) {
- bytes.clear();
chars.clear();
if ((contentSize - position) < bytes.capacity()) {
bytes.limit(contentSize - position);
}
- content.getBytes(position, bytes);
+ content.getBytes(position + bytes.position(), bytes);
position += bytes.position();
bytes.flip();
decoder.decode(bytes, chars, false);
+ if (bytes.hasRemaining()) {
+ bytes.compact();
+ position -= bytes.position();
+ } else {
+ bytes.clear();
+ }
chars.flip();
record.append(chars);
}
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ByteBufUTF8DecodeTest.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ByteBufUTF8DecodeTest.java
new file mode 100644
index 0000000..c238f1c
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ByteBufUTF8DecodeTest.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser.test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.input.record.CharArrayRecord;
+import org.apache.asterix.external.input.record.converter.DCPMessageToRecordConverter;
+import org.apache.asterix.external.input.record.reader.stream.SemiStructuredRecordReader;
+import org.apache.asterix.external.input.stream.LocalFSInputStream;
+import org.apache.asterix.external.util.FileSystemWatcher;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.couchbase.client.deps.io.netty.buffer.ByteBuf;
+import com.couchbase.client.deps.io.netty.buffer.UnpooledByteBufAllocator;
+
+public class ByteBufUTF8DecodeTest {
+
+ private final int BUFFER_SIZE = 8; // Small buffer size to ensure multiple loop execution in the decode call
+ private final int KB32 = 32768;
+ private final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
+ private final ByteBuffer bytes = ByteBuffer.allocate(BUFFER_SIZE);
+ private final CharBuffer chars = CharBuffer.allocate(BUFFER_SIZE);
+ private final CharArrayRecord value = new CharArrayRecord();
+ private final ByteBuf nettyBuffer = UnpooledByteBufAllocator.DEFAULT.heapBuffer(KB32, Integer.MAX_VALUE);
+
+ @Test
+ public void eatGlass() {
+ try {
+ String fileName = getClass().getResource("/ICanEatGlass.txt").toURI().getPath();
+ try (BufferedReader br = new BufferedReader(new FileReader(new File(fileName)))) {
+ for (String line; (line = br.readLine()) != null;) {
+ process(line);
+ }
+ }
+ } catch (Throwable e) {
+ e.printStackTrace();
+ Assert.fail(e.getMessage());
+ }
+ }
+
+ @Test
+ public void testDecodingJsonRecords() throws URISyntaxException, IOException {
+ String jsonFileName = "/record.json";
+ List<Path> paths = new ArrayList<>();
+ paths.add(Paths.get(getClass().getResource(jsonFileName).toURI()));
+ FileSystemWatcher watcher = new FileSystemWatcher(paths, null, false);
+ LocalFSInputStream in = new LocalFSInputStream(watcher);
+ try (SemiStructuredRecordReader recordReader = new SemiStructuredRecordReader(in, "{", "}")) {
+ while (recordReader.hasNext()) {
+ try {
+ IRawRecord<char[]> record = recordReader.next();
+ process(record.toString());
+ } catch (Throwable th) {
+ th.printStackTrace();
+ Assert.fail(th.getMessage());
+ }
+ }
+ }
+ }
+
+ private void process(String input) throws IOException {
+ value.reset();
+ nettyBuffer.clear();
+ nettyBuffer.writeBytes(input.getBytes(StandardCharsets.UTF_8));
+ DCPMessageToRecordConverter.set(nettyBuffer, decoder, bytes, chars, value);
+ Assert.assertEquals(input, value.toString());
+ }
+}
diff --git a/asterixdb/asterix-external-data/src/test/resources/ICanEatGlass.txt b/asterixdb/asterix-external-data/src/test/resources/ICanEatGlass.txt
new file mode 100644
index 0000000..a3d9ca6
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/test/resources/ICanEatGlass.txt
@@ -0,0 +1,149 @@
+Sanskrit: काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥
+Sanskrit (standard transcription): kācaṃ śaknomyattum; nopahinasti mām.
+Classical Greek: ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει.
+Greek (monotonic): Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα.
+Greek (polytonic): Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα.
+Latin: Vitrum edere possum; mihi non nocet.
+Old French: Je puis mangier del voirre. Ne me nuit.
+French: Je peux manger du verre, ça ne me fait pas mal.
+Provençal / Occitan: Pòdi manjar de veire, me nafrariá pas.
+Québécois: J'peux manger d'la vitre, ça m'fa pas mal.
+Walloon: Dji pou magnî do vêre, çoula m' freut nén må.
+Picard: Ch'peux mingi du verre, cha m'foé mie n'ma.
+Kreyòl Ayisyen (Haitï): Mwen kap manje vè, li pa blese'm.
+Basque: Kristala jan dezaket, ez dit minik ematen.
+Catalan / Català: Puc menjar vidre, que no em fa mal.
+Spanish: Puedo comer vidrio, no me hace daño.
+Aragonés: Puedo minchar beire, no me'n fa mal.
+Galician: Eu podo xantar cristais e non cortarme.
+European Portuguese: Posso comer vidro, não me faz mal.
+Brazilian Portuguese: Posso comer vidro, não me machuca.
+Caboverdiano/Kabuverdianu (Cape Verde): M' podê cumê vidru, ca ta maguâ-m'.
+Papiamentu: Ami por kome glas anto e no ta hasimi daño.
+Italian: Posso mangiare il vetro e non mi fa male.
+Milanese: Sôn bôn de magnà el véder, el me fa minga mal.
+Roman: Me posso magna' er vetro, e nun me fa male.
+Napoletano: M' pozz magna' o'vetr, e nun m' fa mal.
+Venetian: Mi posso magnare el vetro, no'l me fa mae.
+Zeneise (Genovese): Pòsso mangiâ o veddro e o no me fà mâ.
+Sicilian: Puotsu mangiari u vitru, nun mi fa mali.
+Romansch (Grischun): Jau sai mangiar vaider, senza che quai fa donn a mai.
+Romanian: Pot să mănânc sticlă și ea nu mă rănește.
+Esperanto: Mi povas manĝi vitron, ĝi ne damaĝas min.
+Cornish: Mý a yl dybry gwéder hag éf ny wra ow ankenya.
+Welsh: Dw i'n gallu bwyta gwydr, 'dyw e ddim yn gwneud dolur i mi.
+Manx Gaelic: Foddym gee glonney agh cha jean eh gortaghey mee.
+Old Irish (Ogham): ᚛᚛ᚉᚑᚅᚔᚉᚉᚔᚋ ᚔᚈᚔ ᚍᚂᚐᚅᚑ ᚅᚔᚋᚌᚓᚅᚐ᚜
+Old Irish (Latin): Con·iccim ithi nglano. Ním·géna.
+Irish: Is féidir liom gloinne a ithe. Ní dhéanann sí dochar ar bith dom.
+Ulster Gaelic: Ithim-sa gloine agus ní miste damh é.
+Scottish Gaelic: S urrainn dhomh gloinne ithe; cha ghoirtich i mi.
+Anglo-Saxon (Runes): ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬
+Anglo-Saxon (Latin): Ic mæg glæs eotan ond hit ne hearmiað me.
+Middle English: Ich canne glas eten and hit hirtiþ me nouȝt.
+English: I can eat glass and it doesn't hurt me.
+English (IPA): [aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː] (Received Pronunciation)
+English (Braille): ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑
+Jamaican: Mi kian niam glas han i neba hot mi.
+Lalland Scots / Doric: Ah can eat gless, it disnae hurt us.
+Gothic: ЌЌЌ ЌЌЌЍ Ќ̈ЍЌЌ, ЌЌ ЌЌЍ ЍЌ ЌЌЌЌ ЌЍЌЌЌЌЌ.
+Old Norse (Runes): ᛖᚴ ᚷᛖᛏ ᛖᛏᛁ ᚧ ᚷᛚᛖᚱ ᛘᚾ ᚦᛖᛋᛋ ᚨᚧ ᚡᛖ ᚱᚧᚨ ᛋᚨᚱ
+Old Norse (Latin): Ek get etið gler án þess að verða sár.
+Norsk / Norwegian (Nynorsk): Eg kan eta glas utan å skada meg.
+Norsk / Norwegian (Bokmål): Jeg kan spise glass uten å skade meg.
+Føroyskt / Faroese: Eg kann eta glas, skaðaleysur.
+Íslenska / Icelandic: Ég get etið gler án þess að meiða mig.
+Svenska / Swedish: Jag kan äta glas utan att skada mig.
+Dansk / Danish: Jeg kan spise glas, det gør ikke ondt på mig.
+Sønderjysk: Æ ka æe glass uhen at det go mæ naue.
+Frysk / Frisian: Ik kin glês ite, it docht me net sear.
+Nederlands / Dutch: Ik kan glas eten, het doet mij geen kwaad.
+Kirchröadsj/Bôchesserplat: Iech ken glaas èèse, mer 't deet miech jing pieng.
+Afrikaans: Ek kan glas eet, maar dit doen my nie skade nie.
+Lëtzebuergescht / Luxemburgish: Ech kan Glas iessen, daat deet mir nët wei.
+Deutsch / German: Ich kann Glas essen, ohne mir zu schaden.
+Ruhrdeutsch: Ich kann Glas verkasematuckeln, ohne dattet mich wat jucken tut.
+Langenfelder Platt: Isch kann Jlaas kimmeln, uuhne datt mich datt weh dääd.
+Lausitzer Mundart ("Lusatian"): Ich koann Gloos assn und doas dudd merr ni wii.
+Odenwälderisch: Iech konn glaasch voschbachteln ohne dass es mir ebbs daun doun dud.
+Sächsisch / Saxon: 'sch kann Glos essn, ohne dass'sch mer wehtue.
+Pfälzisch: Isch konn Glass fresse ohne dasses mer ebbes ausmache dud.
+Schwäbisch / Swabian: I kå Glas frässa, ond des macht mr nix!
+Deutsch (Voralberg): I ka glas eassa, ohne dass mar weh tuat.
+Bayrisch / Bavarian: I koh Glos esa, und es duard ma ned wei.
+Allemannisch: I kaun Gloos essen, es tuat ma ned weh.
+Schwyzerdütsch (Zürich): Ich chan Glaas ässe, das schadt mir nöd.
+Schwyzerdütsch (Luzern): Ech cha Glâs ässe, das schadt mer ned.
+Hungarian: Meg tudom enni az üveget, nem lesz tőle bajom.
+Suomi / Finnish: Voin syödä lasia, se ei vahingoita minua.
+Sami (Northern): Sáhtán borrat lása, dat ii leat bávččas.
+Erzian: Мон ярсан суликадо, ды зыян эйстэнзэ а ули.
+Northern Karelian: Mie voin syvvä lasie ta minla ei ole kipie.
+Southern Karelian: Minä voin syvvä st'oklua dai minule ei ole kibie.
+Estonian: Ma võin klaasi süüa, see ei tee mulle midagi.
+Latvian: Es varu ēst stiklu, tas man nekaitē.
+Lithuanian: Aš galiu valgyti stiklą ir jis manęs nežeidžia.
+Czech: Mohu jíst sklo, neublíží mi.
+Slovak: Môžem jesť sklo. Nezraní ma.
+Polska / Polish: Mogę jeść szkło i mi nie szkodzi.
+Slovenian: Lahko jem steklo, ne da bi mi škodovalo.
+Bosnian, Croatian, Montenegrin and Serbian (Latin): Ja mogu jesti staklo, i to mi ne šteti.
+Bosnian, Montenegrin and Serbian (Cyrillic): Ја могу јести стакло, и то ми не штети.
+Macedonian: Можам да јадам стакло, а не ме штета.
+Russian: Я могу есть стекло, оно мне не вредит.
+Belarusian (Cyrillic): Я магу есці шкло, яно мне не шкодзіць.
+Belarusian (Lacinka): Ja mahu jeści škło, jano mne ne škodzić.
+Ukrainian: Я можу їсти скло, і воно мені не зашкодить.
+Bulgarian: Мога да ям стъкло, то не ми вреди.
+Georgian: მინას ვჭამ და არა მტკივა.
+Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։.
+Albanian: Unë mund të ha qelq dhe nuk më gjen gjë.
+Turkish: Cam yiyebilirim, bana zararı dokunmaz.
+Turkish (Ottoman): جام ييه بلورم بڭا ضررى طوقونمز
+Bangla / Bengali: আমি কাঁচ খেতে পারি, তাতে আমার কোনো ক্ষতি হয় না।
+Marathi: मी काच खाऊ शकतो, मला ते दुखत नाही.
+Kannada: ನನಗೆ ಹಾನಿ ಆಗದೆ, ನಾನು ಗಜನ್ನು ತಿನಬಹುದು.
+Hindi: मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती.
+Tamil: நான் கண்ணாடி சாப்பிடுவேன், அதனால் எனக்கு ஒரு கேடும் வராது.
+Telugu: నేను గాజు తినగలను మరియు అలా చేసినా నాకు ఏమి ఇబ్బంది లేదు.
+Sinhalese: මට වීදුරු කෑමට හැකියි. එයින් මට කිසි හානියක් සිදු නොවේ.
+Urdu: میں کانچ کھا سکتا ہوں اور مجھے تکلیف نہیں ہوتی ۔
+Pashto: زه شيشه خوړلې شم، هغه ما نه خوږوي
+Farsi: .من می توانم بدونِ احساس درد شيشه بخورم
+Arabic: أنا قادر على أكل الزجاج و هذا لا يؤلمني.
+Maltese: Nista' niekol il-ħġieġ u ma jagħmilli xejn.
+Hebrew: אני יכול לאכול זכוכית וזה לא מזיק לי.
+Yiddish: איך קען עסן גלאָז און עס טוט מיר נישט װײ.
+Twi: Metumi awe tumpan, ɜnyɜ me hwee.
+Hausa (Latin): Inā iya taunar gilāshi kuma in gamā lāfiyā.
+Hausa (Ajami): إِنا إِىَ تَونَر غِلَاشِ كُمَ إِن غَمَا لَافِىَا
+Yoruba: Mo lè je̩ dígí, kò ní pa mí lára.
+Lingala: Nakokí kolíya biténi bya milungi, ekosála ngáí mabé tɛ́.
+(Ki)Swahili: Naweza kula bilauri na sikunyui.
+Malay: Saya boleh makan kaca dan ia tidak mencederakan saya.
+Tagalog: Kaya kong kumain nang bubog at hindi ako masaktan.
+Chamorro: Siña yo' chumocho krestat, ti ha na'lalamen yo'.
+Fijian: Au rawa ni kana iloilo, ia au sega ni vakacacani kina.
+Javanese: Aku isa mangan beling tanpa lara.
+Burmese: က္ယ္ဝန္တော္၊က္ယ္ဝန္မ မ္ယက္စားနုိင္သည္။ ၎က္ရောင့္ ထိခုိက္မ္ဟု မရ္ဟိပာ။.
+Vietnamese (quốc ngữ): Tôi có thể ăn thủy tinh mà không hại gì.
+Vietnamese (nôm): 些 ࣎ 世 咹 水 晶 ও 空 ࣎ 害 咦.
+Khmer: ខ្ញុំអាចញុំកញ្ចក់បាន ដោយគ្មានបញ្ហារ.
+Lao: ຂອ້ຍກິນແກ້ວໄດ້ໂດຍທີ່ມັນບໍ່ໄດ້ເຮັດໃຫ້ຂອ້ຍເຈັບ.
+Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ.
+Mongolian (Cyrillic): Би шил идэй чадна, надад хортой биш.
+Mongolian (Classic): ᠪᠢ ᠰᠢᠯᠢ ᠢᠳᠡᠶᠦ ᠴᠢᠳᠠᠨᠠ ᠂ ᠨᠠᠳᠤᠷ ᠬᠣᠤᠷᠠᠳᠠᠢ ᠪᠢᠰᠢ.
+Nepali: म काँच खान सक्छू र मलाई केहि नी हुन्न् ।.
+Tibetan: ཤེལ་སྒོ་ཟ་ནས་ང་ན་གི་མ་རེད།.
+Chinese: 我能吞下玻璃而不伤身体。.
+Taiwanese: Góa ē-tàng chia̍h po-lê, mā bē tio̍h-siong.
+Japanese: 私はガラスを食べられます。それは私を傷つけません。.
+Korean: 나는 유리를 먹을 수 있어요. 그래도 아프지 않아요.
+Bislama: Mi save kakae glas, hemi no save katem mi.
+Hawaiian: Hiki iaʻu ke ʻai i ke aniani; ʻaʻole nō lā au e ʻeha.
+Marquesan: E koʻana e kai i te karahi, mea ʻā, ʻaʻe hauhau.
+Inuktitut: ᐊᓕᒍᖅ ᓂᕆᔭᕌᖓᒃᑯ ᓱᕋᙱᑦᑐᓐᓇᖅᑐᖓ.
+Chinook Jargon: Naika məkmək kakshət labutay, pi weyk ukuk munk-sik nay.
+Navajo: Tsésǫʼ yishą́ągo bííníshghah dóó doo shił neezgai da.
+Lojban: mi kakne le nu citka le blaci .iku'i le se go'i na xrani mi.
+Nórdicg: Ljœr ye caudran créneþ ý jor cẃran.
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/test/resources/record.json b/asterixdb/asterix-external-data/src/test/resources/record.json
new file mode 100644
index 0000000..9b32a5d
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/test/resources/record.json
@@ -0,0 +1,375 @@
+{
+ "quoted_status": {
+ "in_reply_to_status_id_str": null,
+ "in_reply_to_status_id": null,
+ "possibly_sensitive": false,
+ "coordinates": null,
+ "created_at": "Wed Sep 02 07:24:48 +0000 2015",
+ "truncated": false,
+ "in_reply_to_user_id_str": null,
+ "source": "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>",
+ "retweet_count": 0,
+ "retweeted": false,
+ "geo": null,
+ "filter_level": "low",
+ "in_reply_to_screen_name": null,
+ "entities": {
+ "urls": [
+ {
+ "expanded_url": "http://www.bigdata-insider.de/infrastruktur/articles/498946/?cmp=sm-tw-swyn&utm_source=twitter&utm_medium=sm&utm_campaign=twitter-swyn",
+ "display_url": "bigdata-insider.de/infrastruktur/…",
+ "indices": [
+ 54,
+ 76
+ ],
+ "url": "http://t.co/8inseWDWIE"
+ }
+ ],
+ "hashtags": [
+ {
+ "indices": [
+ 16,
+ 22
+ ],
+ "text": "NoSQL"
+ },
+ {
+ "indices": [
+ 24,
+ 36
+ ],
+ "text": "Datenbanken"
+ }
+ ],
+ "user_mentions": [
+ {
+ "name": "EnterpriseDB_DE",
+ "indices": [
+ 77,
+ 93
+ ],
+ "id": 1219531897,
+ "screen_name": "EnterpriseDB_DE",
+ "id_str": "1219531897"
+ }
+ ],
+ "trends": [],
+ "symbols": []
+ },
+ "id_str": "638975848138285056",
+ "in_reply_to_user_id": null,
+ "favorite_count": 0,
+ "id": 638975848138285000,
+ "text": "Relationale und #NoSQL- #Datenbanken wachsen zusammen http://t.co/8inseWDWIE @EnterpriseDB_DE",
+ "place": null,
+ "contributors": null,
+ "lang": "de",
+ "user": {
+ "utc_offset": null,
+ "friends_count": 1440,
+ "profile_image_url_https": "https://pbs.twimg.com/profile_images/494807363572875265/EUm9CELG_normal.jpeg",
+ "listed_count": 54,
+ "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
+ "default_profile_image": false,
+ "favourites_count": 11,
+ "description": "BigData-Insider.de – Entscheiderwissen für Big Data Professionals",
+ "created_at": "Mon Jun 30 10:40:17 +0000 2014",
+ "is_translator": false,
+ "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
+ "protected": false,
+ "screen_name": "bigdata_insider",
+ "id_str": "2596163432",
+ "profile_link_color": "177535",
+ "id": 2596163432,
+ "geo_enabled": false,
+ "profile_background_color": "55965F",
+ "lang": "de",
+ "profile_sidebar_border_color": "FFFFFF",
+ "profile_text_color": "333333",
+ "verified": false,
+ "profile_image_url": "http://pbs.twimg.com/profile_images/494807363572875265/EUm9CELG_normal.jpeg",
+ "time_zone": null,
+ "url": "http://www.bigdata-insider.de",
+ "contributors_enabled": false,
+ "profile_background_tile": false,
+ "profile_banner_url": "https://pbs.twimg.com/profile_banners/2596163432/1405605723",
+ "statuses_count": 325,
+ "follow_request_sent": null,
+ "followers_count": 817,
+ "profile_use_background_image": false,
+ "default_profile": false,
+ "following": null,
+ "name": "BigData-Insider",
+ "location": "Augsburg, Germany",
+ "profile_sidebar_fill_color": "DDEEF6",
+ "notifications": null
+ },
+ "favorited": false
+ },
+ "in_reply_to_status_id_str": null,
+ "in_reply_to_status_id": null,
+ "created_at": "Wed Sep 02 08:17:29 +0000 2015",
+ "in_reply_to_user_id_str": null,
+ "source": "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>",
+ "quoted_status_id": 638975848138285000,
+ "retweet_count": 0,
+ "retweeted": false,
+ "geo": null,
+ "filter_level": "low",
+ "in_reply_to_screen_name": null,
+ "id_str": "638989106882736128",
+ "in_reply_to_user_id": null,
+ "favorite_count": 0,
+ "id": 638989106882736100,
+ "text": "RT: Datenbanken im IoT-Zeitalter - mehr lesen auf @bigdata_insider https://t.co/Yt0Pzij3tK",
+ "place": null,
+ "lang": "de",
+ "favorited": false,
+ "possibly_sensitive": false,
+ "coordinates": null,
+ "truncated": false,
+ "timestamp_ms": "1441181849581",
+ "entities": {
+ "urls": [
+ {
+ "expanded_url": "https://twitter.com/bigdata_insider/status/638975848138285056",
+ "display_url": "twitter.com/bigdata_inside…",
+ "indices": [
+ 68,
+ 91
+ ],
+ "url": "https://t.co/Yt0Pzij3tK"
+ }
+ ],
+ "hashtags": [],
+ "user_mentions": [
+ {
+ "name": "BigData-Insider",
+ "indices": [
+ 50,
+ 66
+ ],
+ "id": 2596163432,
+ "screen_name": "bigdata_insider",
+ "id_str": "2596163432"
+ }
+ ],
+ "trends": [],
+ "symbols": []
+ },
+ "quoted_status_id_str": "638975848138285056",
+ "contributors": null,
+ "user": {
+ "utc_offset": 7200,
+ "friends_count": 382,
+ "profile_image_url_https": "https://pbs.twimg.com/profile_images/600331462982946816/IzBC43SR_normal.png",
+ "listed_count": 22,
+ "profile_background_image_url": "http://abs.twimg.com/images/themes/theme14/bg.gif",
+ "default_profile_image": false,
+ "favourites_count": 56,
+ "description": "EnterpriseDB ist weltgrößter und führender Anbieter von Enterprise Lösungen und Services basierend auf PostgreSQL, die fortschrittlichste Open Source Datenbank.",
+ "created_at": "Mon Feb 25 18:37:11 +0000 2013",
+ "is_translator": false,
+ "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme14/bg.gif",
+ "protected": false,
+ "screen_name": "EnterpriseDB_DE",
+ "id_str": "1219531897",
+ "profile_link_color": "EC7224",
+ "id": 1219531897,
+ "geo_enabled": false,
+ "profile_background_color": "EC7224",
+ "lang": "de",
+ "profile_sidebar_border_color": "FFFFFF",
+ "profile_text_color": "333333",
+ "verified": false,
+ "profile_image_url": "http://pbs.twimg.com/profile_images/600331462982946816/IzBC43SR_normal.png",
+ "time_zone": "Berlin",
+ "url": "http://www.enterprisedb.com",
+ "contributors_enabled": false,
+ "profile_background_tile": false,
+ "statuses_count": 941,
+ "follow_request_sent": null,
+ "followers_count": 336,
+ "profile_use_background_image": true,
+ "default_profile": false,
+ "following": null,
+ "name": "EnterpriseDB_DE",
+ "location": "Berlin, Germany",
+ "profile_sidebar_fill_color": "DDEEF6",
+ "notifications": null
+ }
+}
+{
+ "in_reply_to_status_id_str": null,
+ "in_reply_to_status_id": null,
+ "created_at": "Fri May 06 12:36:44 +0000 2016",
+ "in_reply_to_user_id_str": null,
+ "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>",
+ "retweeted_status": {
+ "in_reply_to_status_id_str": null,
+ "in_reply_to_status_id": null,
+ "created_at": "Fri May 06 11:09:20 +0000 2016",
+ "in_reply_to_user_id_str": null,
+ "source": "<a href=\"http://jp.techcrunch.com/\" rel=\"nofollow\">TC Japan RTbot</a>",
+ "retweet_count": 4,
+ "retweeted": false,
+ "geo": null,
+ "filter_level": "low",
+ "in_reply_to_screen_name": null,
+ "is_quote_status": false,
+ "id_str": "728542158676852736",
+ "in_reply_to_user_id": null,
+ "favorite_count": 3,
+ "id": 728542158676852700,
+ "text": "16shares: Bashoが時系列データ専用NoSQLデータベースRiak TSをオープンソース化してIoTへの浸透をねらう https://t.co/vYi3iI3XkZ",
+ "place": null,
+ "lang": "ja",
+ "favorited": false,
+ "possibly_sensitive": false,
+ "coordinates": null,
+ "truncated": false,
+ "entities": {
+ "urls": [
+ {
+ "expanded_url": "http://jp.techcrunch.com/2016/05/06/20160505basho-open-sources-its-riak-ts-database-for-the-internet-of-things/",
+ "display_url": "jp.techcrunch.com/2016/05/06/201…",
+ "indices": [
+ 65,
+ 88
+ ],
+ "url": "https://t.co/vYi3iI3XkZ"
+ }
+ ],
+ "hashtags": [],
+ "user_mentions": [],
+ "symbols": []
+ },
+ "contributors": null,
+ "user": {
+ "utc_offset": 32400,
+ "friends_count": 456,
+ "profile_image_url_https": "https://pbs.twimg.com/profile_images/542903207098212352/S02CeC4c_normal.png",
+ "listed_count": 4277,
+ "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
+ "default_profile_image": false,
+ "favourites_count": 420,
+ "description": "TechCrunch Japanの公式アカウントです",
+ "created_at": "Fri Apr 22 10:46:18 +0000 2011",
+ "is_translator": false,
+ "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
+ "protected": false,
+ "screen_name": "jptechcrunch",
+ "id_str": "286106104",
+ "profile_link_color": "0A9E01",
+ "id": 286106104,
+ "geo_enabled": false,
+ "profile_background_color": "FFFFFF",
+ "lang": "ja",
+ "profile_sidebar_border_color": "C0DEED",
+ "profile_text_color": "333333",
+ "verified": false,
+ "profile_image_url": "http://pbs.twimg.com/profile_images/542903207098212352/S02CeC4c_normal.png",
+ "time_zone": "Tokyo",
+ "url": "http://jp.techcrunch.com",
+ "contributors_enabled": false,
+ "profile_background_tile": false,
+ "profile_banner_url": "https://pbs.twimg.com/profile_banners/286106104/1427898894",
+ "statuses_count": 24997,
+ "follow_request_sent": null,
+ "followers_count": 58290,
+ "profile_use_background_image": true,
+ "default_profile": false,
+ "following": null,
+ "name": "TechCrunch Japan",
+ "location": "Tokyo",
+ "profile_sidebar_fill_color": "DDEEF6",
+ "notifications": null
+ }
+ },
+ "retweet_count": 0,
+ "retweeted": false,
+ "geo": null,
+ "filter_level": "low",
+ "in_reply_to_screen_name": null,
+ "is_quote_status": false,
+ "id_str": "728564152130658304",
+ "in_reply_to_user_id": null,
+ "favorite_count": 0,
+ "id": 728564152130658300,
+ "text": "RT @jptechcrunch: 16shares: Bashoが時系列データ専用NoSQLデータベースRiak TSをオープンソース化してIoTへの浸透をねらう https://t.co/vYi3iI3XkZ",
+ "place": null,
+ "lang": "ja",
+ "favorited": false,
+ "possibly_sensitive": false,
+ "coordinates": null,
+ "truncated": false,
+ "timestamp_ms": "1462538204592",
+ "entities": {
+ "urls": [
+ {
+ "expanded_url": "http://jp.techcrunch.com/2016/05/06/20160505basho-open-sources-its-riak-ts-database-for-the-internet-of-things/",
+ "display_url": "jp.techcrunch.com/2016/05/06/201…",
+ "indices": [
+ 83,
+ 106
+ ],
+ "url": "https://t.co/vYi3iI3XkZ"
+ }
+ ],
+ "hashtags": [],
+ "user_mentions": [
+ {
+ "name": "TechCrunch Japan",
+ "indices": [
+ 3,
+ 16
+ ],
+ "id": 286106104,
+ "screen_name": "jptechcrunch",
+ "id_str": "286106104"
+ }
+ ],
+ "symbols": []
+ },
+ "contributors": null,
+ "user": {
+ "utc_offset": -25200,
+ "friends_count": 184,
+ "profile_image_url_https": "https://pbs.twimg.com/profile_images/615865274592432128/fYOAh2iR_normal.jpg",
+ "listed_count": 10,
+ "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
+ "default_profile_image": false,
+ "favourites_count": 1523,
+ "description": "自作系、ランニング、筋トレを主とする生態系 。宗教上の理由でASRock、nVIDIAを崇拝。水冷化に向けて倹約中の身。炭酸飲料は血液。今後ともよろしく……",
+ "created_at": "Fri Jun 26 19:56:49 +0000 2015",
+ "is_translator": false,
+ "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
+ "protected": false,
+ "screen_name": "774Inside_X79",
+ "id_str": "3257040751",
+ "profile_link_color": "0084B4",
+ "id": 3257040751,
+ "geo_enabled": true,
+ "profile_background_color": "C0DEED",
+ "lang": "ja",
+ "profile_sidebar_border_color": "C0DEED",
+ "profile_text_color": "333333",
+ "verified": false,
+ "profile_image_url": "http://pbs.twimg.com/profile_images/615865274592432128/fYOAh2iR_normal.jpg",
+ "time_zone": "Pacific Time (US & Canada)",
+ "url": "http://twpf.jp/774Inside_X79",
+ "contributors_enabled": false,
+ "profile_background_tile": false,
+ "profile_banner_url": "https://pbs.twimg.com/profile_banners/3257040751/1458988346",
+ "statuses_count": 3694,
+ "follow_request_sent": null,
+ "followers_count": 144,
+ "profile_use_background_image": true,
+ "default_profile": true,
+ "following": null,
+ "name": "ラォ",
+ "location": "背後",
+ "profile_sidebar_fill_color": "DDEEF6",
+ "notifications": null
+ }
+}