Add string function repeat and split.
Change-Id: Ib9de5a59807d5ff51fa5d72444053f87cf8dd289
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1141
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <tillw@apache.org>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
index a1746cc..27454e3 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
@@ -222,6 +222,8 @@
import org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionWithFlagDescriptor;
import org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceDescriptor;
import org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceWithFlagsDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.StringRepeatDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.StringSplitDescriptor;
import org.apache.asterix.runtime.evaluators.functions.StringStartsWithDescriptor;
import org.apache.asterix.runtime.evaluators.functions.StringToCodePointDescriptor;
import org.apache.asterix.runtime.evaluators.functions.StringTrim2Descriptor;
@@ -513,6 +515,8 @@
functionsToInjectUnkownHandling.add(StringLTrim2Descriptor.FACTORY);
functionsToInjectUnkownHandling.add(StringRTrim2Descriptor.FACTORY);
functionsToInjectUnkownHandling.add(StringPositionDescriptor.FACTORY);
+ functionsToInjectUnkownHandling.add(StringRepeatDescriptor.FACTORY);
+ functionsToInjectUnkownHandling.add(StringSplitDescriptor.FACTORY);
// Constructors
functionsToInjectUnkownHandling.add(ABooleanConstructorDescriptor.FACTORY);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp
new file mode 100644
index 0000000..17904c7
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat/repeat.1.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+ "a": repeat(" new ", 2),
+ "b": repeat(" abcx ", 0),
+ "c": repeat("", 2),
+ "d": repeat(null, 2),
+ "e": repeat("asc", null),
+ "f": repeat(missing, 2),
+ "g": repeat("asc", missing),
+ "h": repeat(null, null),
+ "i": repeat(missing, missing)
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp
new file mode 100644
index 0000000..0701ce3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/repeat_error/repeat_error.1.query.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+SELECT VALUE repeat(" new ", -1);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp
new file mode 100644
index 0000000..3fb2307
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/split/split.3.query.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+ 'a': split("abc", "b"),
+ 'b': split("abc", "abc"),
+ 'c': split("abc", "x"),
+ 'd': split("abc", "a"),
+ 'e': split("abc", "bc"),
+ 'f': split("abc", ""),
+ 'g': split("", ""),
+ 'h': split("", "abc"),
+ 'i': split("", null),
+ 'j': split(null, "a"),
+ 'k': split("a", missing),
+ 'l': split(missing, 'a'),
+ 'm': split(null, missing),
+ 'n': split(null, null),
+ 'o': split(missing, missing)
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm
new file mode 100644
index 0000000..7b8b7fc
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/repeat/repeat.1.adm
@@ -0,0 +1 @@
+{ "a": " new new ", "b": "", "c": "", "d": null, "e": null, "h": null }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm
new file mode 100644
index 0000000..3de2947
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/split/split.1.adm
@@ -0,0 +1 @@
+{ "a": [ "a", "c" ], "b": [ "", "" ], "c": [ "abc" ], "d": [ "", "bc" ], "e": [ "a", "" ], "f": [ "a", "b", "c" ], "g": [ ], "h": [ "" ], "i": null, "j": null, "n": null }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 01a036c..fb9a8e8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -5351,6 +5351,17 @@
</compilation-unit>
</test-case>
<test-case FilePath="string">
+ <compilation-unit name="repeat">
+ <output-dir compare="Text">repeat</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="string">
+ <compilation-unit name="repeat_error">
+ <output-dir compare="Text">repeat</output-dir>
+ <expected-error>repeat: expects a non-negative repeating number but got -1</expected-error>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="string">
<compilation-unit name="regexp_replace">
<output-dir compare="Text">replace22</output-dir>
</compilation-unit>
@@ -5386,6 +5397,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="string">
+ <compilation-unit name="split">
+ <output-dir compare="Text">split</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="string">
<compilation-unit name="start-with1">
<output-dir compare="Text">start-with1</output-dir>
</compilation-unit>
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
index dc2412c..da6ee02 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
@@ -78,7 +78,6 @@
import org.apache.asterix.om.typecomputer.impl.OpenRecordConstructorResultType;
import org.apache.asterix.om.typecomputer.impl.OrderedListConstructorTypeComputer;
import org.apache.asterix.om.typecomputer.impl.OrderedListOfAInt32TypeComputer;
-import org.apache.asterix.om.typecomputer.impl.OrderedListOfAInt64TypeComputer;
import org.apache.asterix.om.typecomputer.impl.OrderedListOfAIntervalTypeComputer;
import org.apache.asterix.om.typecomputer.impl.OrderedListOfAPointTypeComputer;
import org.apache.asterix.om.typecomputer.impl.OrderedListOfAStringTypeComputer;
@@ -90,9 +89,11 @@
import org.apache.asterix.om.typecomputer.impl.ScalarVersionOfAggregateResultType;
import org.apache.asterix.om.typecomputer.impl.StringBooleanTypeComputer;
import org.apache.asterix.om.typecomputer.impl.StringInt32TypeComputer;
+import org.apache.asterix.om.typecomputer.impl.StringIntToStringTypeComputer;
import org.apache.asterix.om.typecomputer.impl.StringStringTypeComputer;
+import org.apache.asterix.om.typecomputer.impl.StringToInt64ListTypeComputer;
+import org.apache.asterix.om.typecomputer.impl.StringToStringListTypeComputer;
import org.apache.asterix.om.typecomputer.impl.SubsetCollectionTypeComputer;
-import org.apache.asterix.om.typecomputer.impl.Substring2TypeComputer;
import org.apache.asterix.om.typecomputer.impl.SubstringTypeComputer;
import org.apache.asterix.om.typecomputer.impl.SwitchCaseComputer;
import org.apache.asterix.om.typecomputer.impl.UnaryBinaryInt64TypeComputer;
@@ -310,6 +311,10 @@
"string-concat", 1);
public static final FunctionIdentifier STRING_JOIN = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
"string-join", 2);
+ public static final FunctionIdentifier STRING_REPEAT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
+ "repeat", 2);
+ public static final FunctionIdentifier STRING_SPLIT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "split",
+ 2);
public static final FunctionIdentifier DATASET = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "dataset", 1);
public static final FunctionIdentifier FEED_COLLECT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
@@ -884,10 +889,10 @@
addFunction(STRING_LIKE, BooleanFunctionTypeComputer.INSTANCE, true);
addFunction(STRING_CONTAINS, ABooleanTypeComputer.INSTANCE, true);
- addFunction(STRING_TO_CODEPOINT, OrderedListOfAInt64TypeComputer.INSTANCE, true);
+ addFunction(STRING_TO_CODEPOINT, StringToInt64ListTypeComputer.INSTANCE, true);
addFunction(CODEPOINT_TO_STRING, AStringTypeComputer.INSTANCE, true);
addFunction(STRING_CONCAT, AStringTypeComputer.INSTANCE, true);
- addFunction(SUBSTRING2, Substring2TypeComputer.INSTANCE, true);
+ addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE, true);
addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, true);
addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true);
addFunction(STRING_UPPERCASE, StringStringTypeComputer.INSTANCE, true);
@@ -913,6 +918,8 @@
addFunction(SUBSTRING_AFTER, StringStringTypeComputer.INSTANCE, true);
addPrivateFunction(STRING_EQUAL, StringBooleanTypeComputer.INSTANCE, true);
addFunction(STRING_JOIN, AStringTypeComputer.INSTANCE, true);
+ addFunction(STRING_REPEAT, StringIntToStringTypeComputer.INSTANCE, true);
+ addFunction(STRING_SPLIT, StringToStringListTypeComputer.INSTANCE, true);
addPrivateFunction(ORDERED_LIST_CONSTRUCTOR, OrderedListConstructorTypeComputer.INSTANCE, true);
addFunction(POINT_CONSTRUCTOR, APointTypeComputer.INSTANCE, true);
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/Substring2TypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
similarity index 91%
rename from asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/Substring2TypeComputer.java
rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
index e2e812e..7bb83d0 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/Substring2TypeComputer.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
@@ -25,8 +25,8 @@
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-public class Substring2TypeComputer extends AbstractResultTypeComputer {
- public static final Substring2TypeComputer INSTANCE = new Substring2TypeComputer();
+public class StringIntToStringTypeComputer extends AbstractResultTypeComputer {
+ public static final StringIntToStringTypeComputer INSTANCE = new StringIntToStringTypeComputer();
@Override
public void checkArgType(int argIndex, IAType type) throws AlgebricksException {
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java
new file mode 100644
index 0000000..b01ac71
--- /dev/null
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToInt64ListTypeComputer.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.om.typecomputer.impl;
+
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+
+public class StringToInt64ListTypeComputer extends AbstractStringTypeComputer {
+
+ public static final StringToInt64ListTypeComputer INSTANCE = new StringToInt64ListTypeComputer();
+
+ private StringToInt64ListTypeComputer() {
+ }
+
+ @Override
+ protected IAType getResultType(ILogicalExpression expr, IAType... strippedInputTypes) throws AlgebricksException {
+ return new AOrderedListType(BuiltinType.AINT64, null);
+ }
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java
new file mode 100644
index 0000000..4891330
--- /dev/null
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringToStringListTypeComputer.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.om.typecomputer.impl;
+
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+
+public class StringToStringListTypeComputer extends AbstractStringTypeComputer {
+
+ public static final StringToStringListTypeComputer INSTANCE = new StringToStringListTypeComputer();
+
+ private StringToStringListTypeComputer() {
+ }
+
+ @Override
+ protected IAType getResultType(ILogicalExpression expr, IAType... strippedInputTypes) throws AlgebricksException {
+ return new AOrderedListType(BuiltinType.ASTRING, null);
+ }
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java
new file mode 100644
index 0000000..0f4c0de
--- /dev/null
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRepeatDescriptor.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.runtime.evaluators.functions;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.asterix.om.functions.AsterixBuiltinFunctions;
+import org.apache.asterix.om.functions.IFunctionDescriptor;
+import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.EnumDeserializer;
+import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
+import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.VoidPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public class StringRepeatDescriptor extends AbstractScalarFunctionDynamicDescriptor {
+ private static final long serialVersionUID = 1L;
+ public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() {
+ @Override
+ public IFunctionDescriptor createFunctionDescriptor() {
+ return new StringRepeatDescriptor();
+ }
+ };
+
+ @Override
+ public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args)
+ throws AlgebricksException {
+ return new IScalarEvaluatorFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws AlgebricksException {
+ return new IScalarEvaluator() {
+ // Argument evaluators.
+ private IScalarEvaluator evalString = args[0].createScalarEvaluator(ctx);
+ private IScalarEvaluator evalStart = args[1].createScalarEvaluator(ctx);
+
+ // Argument pointers.
+ private IPointable argString = new VoidPointable();
+ private IPointable argNumber = new VoidPointable();
+
+ // For outputting the result.
+ private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
+ private DataOutput out = resultStorage.getDataOutput();
+ private byte[] tempLengthArray = new byte[5];
+
+ @Override
+ public void evaluate(IFrameTupleReference tuple, IPointable result) throws AlgebricksException {
+ resultStorage.reset();
+
+ // Calls argument evaluators.
+ evalStart.evaluate(tuple, argNumber);
+ evalString.evaluate(tuple, argString);
+
+ // Gets the repeating times.
+ int repeatingTimes = 0;
+ byte[] bytes = argNumber.getByteArray();
+ int offset = argNumber.getStartOffset();
+ try {
+ repeatingTimes = ATypeHierarchy.getIntegerValue(bytes, offset);
+ } catch (HyracksDataException e1) {
+ throw new AlgebricksException(e1);
+ }
+ // Checks repeatingTimes. It should be a non-negative value.
+ if (repeatingTimes < 0) {
+ throw new AlgebricksException(StringRepeatDescriptor.this.getIdentifier().getName()
+ + ": expects a non-negative repeating number but got " + repeatingTimes + ".");
+ }
+
+ // Gets the input string.
+ bytes = argString.getByteArray();
+ offset = argString.getStartOffset();
+ // Checks the type of the string argument.
+ if (bytes[offset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
+ throw new AlgebricksException(StringRepeatDescriptor.this.getIdentifier().getName()
+ + ": expects type STRING for the first argument but got "
+ + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes[offset]));
+ }
+
+ // Calculates the result string length.
+ int inputLen = UTF8StringUtil.getUTFLength(bytes, offset + 1);
+ int resultLen = Math.multiplyExact(inputLen, repeatingTimes); // Can throw overflow exception.
+ int cbytes = UTF8StringUtil.encodeUTF8Length(resultLen, tempLengthArray, 0);
+
+ // Writes the output string.
+ int inputStringStart = offset + 1 + UTF8StringUtil.getNumBytesToStoreLength(inputLen);
+ try {
+ out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+ out.write(tempLengthArray, 0, cbytes);
+ for (int numRepeats = 0; numRepeats < repeatingTimes; ++numRepeats) {
+ out.write(bytes, inputStringStart, inputLen);
+ }
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ result.set(resultStorage);
+ }
+ };
+ }
+ };
+ }
+
+ @Override
+ public FunctionIdentifier getIdentifier() {
+ return AsterixBuiltinFunctions.STRING_REPEAT;
+ }
+
+}
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java
new file mode 100644
index 0000000..8cd3a5b
--- /dev/null
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringSplitDescriptor.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.runtime.evaluators.functions;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.asterix.builders.OrderedListBuilder;
+import org.apache.asterix.om.functions.AsterixBuiltinFunctions;
+import org.apache.asterix.om.functions.IFunctionDescriptor;
+import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.EnumDeserializer;
+import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.primitive.VoidPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public class StringSplitDescriptor extends AbstractScalarFunctionDynamicDescriptor {
+
+ private static final long serialVersionUID = 1L;
+
+ public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() {
+ @Override
+ public IFunctionDescriptor createFunctionDescriptor() {
+ return new StringSplitDescriptor();
+ }
+ };
+
+ @Override
+ public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
+ return new IScalarEvaluatorFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws AlgebricksException {
+ return new IScalarEvaluator() {
+ // Argument evaluators.
+ private final IScalarEvaluator stringEval = args[0].createScalarEvaluator(ctx);
+ private final IScalarEvaluator patternEval = args[1].createScalarEvaluator(ctx);
+
+ // Argument pointers.
+ private final IPointable argString = new VoidPointable();
+ private final IPointable argPattern = new VoidPointable();
+ private final UTF8StringPointable argStrPtr = new UTF8StringPointable();
+ private final UTF8StringPointable argPatternPtr = new UTF8StringPointable();
+
+ // For an output string item.
+ private final ArrayBackedValueStorage itemStorge = new ArrayBackedValueStorage();
+ private final DataOutput itemOut = itemStorge.getDataOutput();
+ private final byte[] tempLengthArray = new byte[5];
+
+ // For the output list of strings.
+ private final AOrderedListType intListType = new AOrderedListType(BuiltinType.ASTRING, null);
+ private final OrderedListBuilder listBuilder = new OrderedListBuilder();
+ private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
+ private final DataOutput out = resultStorage.getDataOutput();
+
+ @Override
+ public void evaluate(IFrameTupleReference tuple, IPointable result) throws AlgebricksException {
+ try {
+ resultStorage.reset();
+ // Calls argument evaluators.
+ stringEval.evaluate(tuple, argString);
+ patternEval.evaluate(tuple, argPattern);
+
+ // Gets the bytes of the source string.
+ byte[] srcString = argString.getByteArray();
+ int srcOffset = argString.getStartOffset();
+ int srcLen = argString.getLength();
+ // Type check for the first argument.
+ if (srcString[srcOffset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
+ throw new AlgebricksException(StringSplitDescriptor.this.getIdentifier().getName()
+ + ": expects input type STRING for the first argument but got "
+ + EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(srcString[srcOffset]));
+ }
+
+ // Gets the bytes of the pattern string.
+ byte[] patternString = argPattern.getByteArray();
+ int patternOffset = argPattern.getStartOffset();
+ int patternLen = argPattern.getLength();
+ // Type check for the second argument.
+ if (patternString[patternOffset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
+ throw new AlgebricksException(StringSplitDescriptor.this.getIdentifier().getName()
+ + ": expects input type STRING for the second argument but got "
+ + EnumDeserializer.ATYPETAGDESERIALIZER
+ .deserialize(patternString[patternOffset]));
+ }
+
+ // Sets the UTF8 String pointables.
+ argStrPtr.set(srcString, srcOffset + 1, srcLen - 1);
+ argPatternPtr.set(patternString, patternOffset + 1, patternLen - 1);
+
+ // Gets the string length of the source string.
+ int inputStringLen = UTF8StringUtil.getUTFLength(srcString, srcOffset + 1);
+ int inputStringStart = srcOffset + 1
+ + UTF8StringUtil.getNumBytesToStoreLength(inputStringLen);
+ // Gets the string length of the pattern string.
+ int inputPatternLen = UTF8StringUtil.getUTFLength(patternString, patternOffset + 1);
+ // Handles the case that the pattern is "".
+ boolean emptyStringPattern = inputPatternLen == 0;
+
+ // Builds a list of strings.
+ listBuilder.reset(intListType);
+ int itemStrStart = 0;
+ int nextMatchStart;
+ while (itemStrStart < inputStringLen && (nextMatchStart = UTF8StringPointable
+ .find(argStrPtr, argPatternPtr, false, itemStrStart)) >= 0) {
+ // Adds an item string.
+ addItemString(srcString, inputStringStart, itemStrStart,
+ emptyStringPattern ? nextMatchStart + 1 : nextMatchStart);
+ itemStrStart = nextMatchStart + (emptyStringPattern ? 1 : inputPatternLen);
+ }
+ if (!emptyStringPattern) {
+ addItemString(srcString, inputStringStart, itemStrStart, inputStringLen);
+ }
+ listBuilder.write(out, true);
+ result.set(resultStorage);
+ } catch (IOException e1) {
+ throw new AlgebricksException(e1);
+ }
+ }
+
+ private void addItemString(byte[] srcString, int inputStringStart, int itemStartOffset,
+ int nextMatchStart) throws IOException {
+ int itemLen = nextMatchStart - itemStartOffset;
+ int cbytes = UTF8StringUtil.encodeUTF8Length(itemLen, tempLengthArray, 0);
+ itemStorge.reset();
+ itemOut.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+ itemOut.write(tempLengthArray, 0, cbytes);
+ if (itemLen > 0) {
+ itemOut.write(srcString, inputStringStart + itemStartOffset, itemLen);
+ }
+ listBuilder.addItem(itemStorge);
+ }
+ };
+ }
+ };
+ }
+
+ @Override
+ public FunctionIdentifier getIdentifier() {
+ return AsterixBuiltinFunctions.STRING_SPLIT;
+ }
+
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 8592bd2..0850b04 100644
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -178,23 +178,40 @@
}
/**
- * return the byte offset of the first character of the matching string. Not including the MetaLength
- *
- * @param src
- * @param pattern
- * @param ignoreCase
- * @return
+ * @param src,
+ * the source string.
+ * @param pattern,
+ * the pattern string.
+ * @param ignoreCase,
+ * to ignore case or not.
+ * @return the byte offset of the first character of the matching string. Not including the MetaLength.
*/
public static int find(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) {
+ return find(src, pattern, ignoreCase, 0);
+ }
+
+ /**
+ * @param src,
+ * the source string.
+ * @param pattern,
+ * the pattern string.
+ * @param ignoreCase,
+ * to ignore case or not.
+ * @param startMatch,
+ * the start offset.
+ * @return the byte offset of the first character of the matching string after <code>startMatchPos}</code>.
+ * Not including the MetaLength.
+ */
+ public static int find(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase, int startMatch) {
+ int startMatchPos = startMatch;
final int srcUtfLen = src.getUTF8Length();
final int pttnUtfLen = pattern.getUTF8Length();
final int srcStart = src.getMetaDataLength();
final int pttnStart = pattern.getMetaDataLength();
- int startMatch = 0;
int maxStart = srcUtfLen - pttnUtfLen;
- while (startMatch <= maxStart) {
- int c1 = startMatch;
+ while (startMatchPos <= maxStart) {
+ int c1 = startMatchPos;
int c2 = 0;
while (c1 < srcUtfLen && c2 < pttnUtfLen) {
char ch1 = src.charAt(srcStart + c1);
@@ -209,9 +226,9 @@
c2 += pattern.charSize(pttnStart + c2);
}
if (c2 == pttnUtfLen) {
- return startMatch;
+ return startMatchPos;
}
- startMatch += src.charSize(srcStart + startMatch);
+ startMatchPos += src.charSize(srcStart + startMatchPos);
}
return -1;
}