cleanup hivesterix poms and assembly
diff --git a/hivesterix/hivesterix-dist/pom.xml b/hivesterix/hivesterix-dist/pom.xml
index 83de061..1ea7e43 100644
--- a/hivesterix/hivesterix-dist/pom.xml
+++ b/hivesterix/hivesterix-dist/pom.xml
@@ -1,102 +1,29 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>hivesterix-dist</artifactId>
<name>hivesterix-dist</name>
<parent>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>hivesterix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
- </parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix</artifactId>
+ <version>0.2.7-SNAPSHOT</version>
+ </parent>
<dependencies>
<dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>4.8.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>0.20.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-cli</artifactId>
- <version>0.11.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-common</artifactId>
- <version>0.11.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>0.11.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-hwi</artifactId>
- <version>0.11.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-jdbc</artifactId>
- <version>0.11.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-metastore</artifactId>
- <version>0.11.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-service</artifactId>
- <version>0.11.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-shims</artifactId>
- <version>0.11.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-serde</artifactId>
- <version>0.11.0</version>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ <version>2.5</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
@@ -115,13 +42,6 @@
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.90.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
<version>0.2.7-SNAPSHOT</version>
@@ -289,6 +209,23 @@
</executions>
</plugin>
<plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.2-beta-5</version>
+ <executions>
+ <execution>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>attached</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.5</version>
<configuration>
diff --git a/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
index aeb3fb4..f7d8092 100755
--- a/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
+++ b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
@@ -1,17 +1,12 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<assembly>
<id>binary-assembly</id>
<formats>
@@ -21,20 +16,22 @@
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
- <directory>target/appassembler/bin</directory>
+ <directory>src/main/resources/conf</directory>
+ <outputDirectory>conf</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>src/main/resources/scripts</directory>
<outputDirectory>bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>target/appassembler/lib</directory>
<outputDirectory>lib</outputDirectory>
- </fileSet>
- <fileSet>
- <directory>target</directory>
- <outputDirectory>lib</outputDirectory>
<includes>
<include>*.jar</include>
</includes>
+ <fileMode>0755</fileMode>
</fileSet>
</fileSets>
-</assembly>
+</assembly>
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
index a7ccb5a..64a3f12 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
@@ -116,10 +116,10 @@
import edu.uci.ics.hivesterix.runtime.exec.HyracksExecutionEngine;
import edu.uci.ics.hivesterix.runtime.exec.IExecutionEngine;
-@SuppressWarnings({ "deprecation", "unchecked" })
+@SuppressWarnings({ "deprecation", "unchecked", "rawtypes" })
public class Driver implements CommandProcessor {
- // hive-sterix
+ // hivesterix
private IExecutionEngine engine;
private boolean hivesterix = false;
private Set<Task> executedConditionalTsks = new HashSet<Task>();
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
deleted file mode 100644
index 23a842a..0000000
--- a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
+++ /dev/null
@@ -1,773 +0,0 @@
-<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<configuration>
-
- <!-- Hive Configuration can either be stored in this file or in the hadoop
- configuration files -->
- <!-- that are implied by Hadoop setup variables. -->
- <!-- Aside from Hadoop setup variables - this file is provided as a convenience
- so that Hive -->
- <!-- users do not have to edit hadoop configuration files (that may be managed
- as a centralized -->
- <!-- resource). -->
-
- <!-- Hive Execution Parameters -->
- <property>
- <name>mapred.reduce.tasks</name>
- <value>-1</value>
- <description>The default number of reduce tasks per job. Typically set
- to a prime close to the number of available hosts. Ignored when
- mapred.job.tracker is "local". Hadoop set this to 1 by default,
- whereas hive uses -1 as its default value.
- By setting this property to -1, Hive will automatically figure out what
- should be the number of reducers.
- </description>
- </property>
-
- <property>
- <name>hive.hyracks.connectorpolicy</name>
- <value>PIPELINING</value>
- </property>
-
- <property>
- <name>hive.hyracks.parrallelism</name>
- <value>4</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external</name>
- <value>true</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external.memory</name>
- <value>33554432</value>
- </property>
-
- <property>
- <name>hive.algebricks.sort.memory</name>
- <value>33554432</value>
- </property>
-
- <property>
- <name>hive.exec.reducers.bytes.per.reducer</name>
- <value>1000000000</value>
- <description>size per reducer.The default is 1G, i.e if the input size
- is 10G, it will use 10 reducers.</description>
- </property>
-
- <property>
- <name>hive.exec.reducers.max</name>
- <value>999</value>
- <description>max number of reducers will be used. If the one
- specified in the configuration parameter mapred.reduce.tasks is
- negative, hive will use this one as the max number of reducers when
- automatically determine number of reducers.</description>
- </property>
-
- <property>
- <name>hive.exec.scratchdir</name>
- <value>/hive-${user.name}</value>
- <description>Scratch space for Hive jobs</description>
- </property>
-
- <property>
- <name>hive.test.mode</name>
- <value>false</value>
- <description>whether hive is running in test mode. If yes, it turns on
- sampling and prefixes the output tablename</description>
- </property>
-
- <property>
- <name>hive.test.mode.prefix</name>
- <value>test_</value>
- <description>if hive is running in test mode, prefixes the output
- table by this string</description>
- </property>
-
- <!-- If the input table is not bucketed, the denominator of the tablesample
- is determinied by the parameter below -->
- <!-- For example, the following query: -->
- <!-- INSERT OVERWRITE TABLE dest -->
- <!-- SELECT col1 from src -->
- <!-- would be converted to -->
- <!-- INSERT OVERWRITE TABLE test_dest -->
- <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
- <property>
- <name>hive.test.mode.samplefreq</name>
- <value>32</value>
- <description>if hive is running in test mode and table is not
- bucketed, sampling frequency</description>
- </property>
-
- <property>
- <name>hive.test.mode.nosamplelist</name>
- <value></value>
- <description>if hive is running in test mode, dont sample the above
- comma seperated list of tables</description>
- </property>
-
- <property>
- <name>hive.metastore.local</name>
- <value>true</value>
- <description>controls whether to connect to remove metastore server or
- open a new metastore server in Hive Client JVM</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionURL</name>
- <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
- <description>JDBC connect string for a JDBC metastore</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionDriverName</name>
- <value>org.apache.derby.jdbc.EmbeddedDriver</value>
- <description>Driver class name for a JDBC metastore</description>
- </property>
-
- <property>
- <name>javax.jdo.PersistenceManagerFactoryClass</name>
- <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
- <description>class implementing the jdo persistence</description>
- </property>
-
- <property>
- <name>datanucleus.connectionPoolingType</name>
- <value>DBCP</value>
- <description>Uses a DBCP connection pool for JDBC metastore
- </description>
- </property>
-
- <property>
- <name>javax.jdo.option.DetachAllOnCommit</name>
- <value>true</value>
- <description>detaches all objects from session so that they can be
- used after transaction is committed</description>
- </property>
-
- <property>
- <name>javax.jdo.option.NonTransactionalRead</name>
- <value>true</value>
- <description>reads outside of transactions</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionUserName</name>
- <value>APP</value>
- <description>username to use against metastore database</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionPassword</name>
- <value>mine</value>
- <description>password to use against metastore database</description>
- </property>
-
- <property>
- <name>datanucleus.validateTables</name>
- <value>false</value>
- <description>validates existing schema against code. turn this on if
- you want to verify existing schema </description>
- </property>
-
- <property>
- <name>datanucleus.validateColumns</name>
- <value>false</value>
- <description>validates existing schema against code. turn this on if
- you want to verify existing schema </description>
- </property>
-
- <property>
- <name>datanucleus.validateConstraints</name>
- <value>false</value>
- <description>validates existing schema against code. turn this on if
- you want to verify existing schema </description>
- </property>
-
- <property>
- <name>datanucleus.storeManagerType</name>
- <value>rdbms</value>
- <description>metadata store type</description>
- </property>
-
- <property>
- <name>datanucleus.autoCreateSchema</name>
- <value>true</value>
- <description>creates necessary schema on a startup if one doesn't
- exist. set this to false, after creating it once</description>
- </property>
-
- <property>
- <name>datanucleus.autoStartMechanismMode</name>
- <value>checked</value>
- <description>throw exception if metadata tables are incorrect
- </description>
- </property>
-
- <property>
- <name>datanucleus.transactionIsolation</name>
- <value>read-committed</value>
- <description>Default transaction isolation level for identity
- generation. </description>
- </property>
-
- <property>
- <name>datanucleus.cache.level2</name>
- <value>false</value>
- <description>Use a level 2 cache. Turn this off if metadata is changed
- independently of hive metastore server</description>
- </property>
-
- <property>
- <name>datanucleus.cache.level2.type</name>
- <value>SOFT</value>
- <description>SOFT=soft reference based cache, WEAK=weak reference
- based cache.</description>
- </property>
-
- <property>
- <name>datanucleus.identifierFactory</name>
- <value>datanucleus</value>
- <description>Name of the identifier factory to use when generating
- table/column names etc. 'datanucleus' is used for backward
- compatibility</description>
- </property>
-
- <property>
- <name>hive.metastore.warehouse.dir</name>
- <value>/user/hivesterix</value>
- <description>location of default database for the warehouse
- </description>
- </property>
-
- <property>
- <name>hive.metastore.connect.retries</name>
- <value>5</value>
- <description>Number of retries while opening a connection to metastore
- </description>
- </property>
-
- <property>
- <name>hive.metastore.rawstore.impl</name>
- <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
- <description>Name of the class that implements
- org.apache.hadoop.hive.metastore.rawstore interface. This class is
- used to store and retrieval of raw metadata objects such as table,
- database</description>
- </property>
-
- <property>
- <name>hive.default.fileformat</name>
- <value>TextFile</value>
- <description>Default file format for CREATE TABLE statement. Options
- are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
- ... STORED AS <TEXTFILE|SEQUENCEFILE> to override</description>
- </property>
-
- <property>
- <name>hive.fileformat.check</name>
- <value>true</value>
- <description>Whether to check file format or not when loading data
- files</description>
- </property>
-
- <property>
- <name>hive.map.aggr</name>
- <value>true</value>
- <description>Whether to use map-side aggregation in Hive Group By
- queries</description>
- </property>
-
- <property>
- <name>hive.groupby.skewindata</name>
- <value>false</value>
- <description>Whether there is skew in data to optimize group by
- queries</description>
- </property>
-
- <property>
- <name>hive.groupby.mapaggr.checkinterval</name>
- <value>100000</value>
- <description>Number of rows after which size of the grouping
- keys/aggregation classes is performed</description>
- </property>
-
- <property>
- <name>hive.mapred.local.mem</name>
- <value>0</value>
- <description>For local mode, memory of the mappers/reducers
- </description>
- </property>
-
- <property>
- <name>hive.map.aggr.hash.percentmemory</name>
- <value>0.5</value>
- <description>Portion of total memory to be used by map-side grup
- aggregation hash table</description>
- </property>
-
- <property>
- <name>hive.map.aggr.hash.min.reduction</name>
- <value>0.5</value>
- <description>Hash aggregation will be turned off if the ratio between
- hash
- table size and input rows is bigger than this number. Set to 1 to make
- sure
- hash aggregation is never turned off.</description>
- </property>
-
- <property>
- <name>hive.optimize.cp</name>
- <value>true</value>
- <description>Whether to enable column pruner</description>
- </property>
-
- <property>
- <name>hive.optimize.ppd</name>
- <value>true</value>
- <description>Whether to enable predicate pushdown</description>
- </property>
-
- <property>
- <name>hive.optimize.pruner</name>
- <value>true</value>
- <description>Whether to enable the new partition pruner which depends
- on predicate pushdown. If this is disabled,
- the old partition pruner which is based on AST will be enabled.
- </description>
- </property>
-
- <property>
- <name>hive.optimize.groupby</name>
- <value>true</value>
- <description>Whether to enable the bucketed group by from bucketed
- partitions/tables.</description>
- </property>
-
- <property>
- <name>hive.join.emit.interval</name>
- <value>1000</value>
- <description>How many rows in the right-most join operand Hive should
- buffer before emitting the join result. </description>
- </property>
-
- <property>
- <name>hive.join.cache.size</name>
- <value>25000</value>
- <description>How many rows in the joining tables (except the streaming
- table) should be cached in memory. </description>
- </property>
-
- <property>
- <name>hive.mapjoin.bucket.cache.size</name>
- <value>100</value>
- <description>How many values in each keys in the map-joined table
- should be cached in memory. </description>
- </property>
-
- <property>
- <name>hive.mapjoin.maxsize</name>
- <value>100000</value>
- <description>Maximum # of rows of the small table that can be handled
- by map-side join. If the size is reached and hive.task.progress is
- set, a fatal error counter is set and the job will be killed.
- </description>
- </property>
-
- <property>
- <name>hive.mapjoin.cache.numrows</name>
- <value>25000</value>
- <description>How many rows should be cached by jdbm for map join.
- </description>
- </property>
-
- <property>
- <name>hive.optimize.skewjoin</name>
- <value>false</value>
- <description>Whether to enable skew join optimization. </description>
- </property>
-
- <property>
- <name>hive.skewjoin.key</name>
- <value>100000</value>
- <description>Determine if we get a skew key in join. If we see more
- than the specified number of rows with the same key in join operator,
- we think the key as a skew join key. </description>
- </property>
-
- <property>
- <name>hive.skewjoin.mapjoin.map.tasks</name>
- <value>10000</value>
- <description> Determine the number of map task used in the follow up
- map join job
- for a skew join. It should be used together with
- hive.skewjoin.mapjoin.min.split
- to perform a fine grained control.</description>
- </property>
-
- <property>
- <name>hive.skewjoin.mapjoin.min.split</name>
- <value>33554432</value>
- <description> Determine the number of map task at most used in the
- follow up map join job
- for a skew join by specifying the minimum split size. It should be used
- together with
- hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.</description>
- </property>
-
- <property>
- <name>hive.mapred.mode</name>
- <value>nonstrict</value>
- <description>The mode in which the hive operations are being
- performed. In strict mode, some risky queries are not allowed to run
- </description>
- </property>
-
- <property>
- <name>hive.exec.script.maxerrsize</name>
- <value>100000</value>
- <description>Maximum number of bytes a script is allowed to emit to
- standard error (per map-reduce task). This prevents runaway scripts
- from filling logs partitions to capacity </description>
- </property>
-
- <property>
- <name>hive.exec.script.allow.partial.consumption</name>
- <value>false</value>
- <description> When enabled, this option allows a user script to exit
- successfully without consuming all the data from the standard input.
- </description>
- </property>
-
- <property>
- <name>hive.script.operator.id.env.var</name>
- <value>HIVE_SCRIPT_OPERATOR_ID</value>
- <description> Name of the environment variable that holds the unique
- script operator ID in the user's transform function (the custom
- mapper/reducer that the user has specified in the query)
- </description>
- </property>
-
- <property>
- <name>hive.exec.compress.output</name>
- <value>false</value>
- <description> This controls whether the final outputs of a query (to a
- local/hdfs file or a hive table) is compressed. The compression codec
- and other options are determined from hadoop config variables
- mapred.output.compress* </description>
- </property>
-
- <property>
- <name>hive.exec.compress.intermediate</name>
- <value>false</value>
- <description> This controls whether intermediate files produced by
- hive between multiple map-reduce jobs are compressed. The compression
- codec and other options are determined from hadoop config variables
- mapred.output.compress* </description>
- </property>
-
- <property>
- <name>hive.exec.parallel</name>
- <value>false</value>
- <description>Whether to execute jobs in parallel</description>
- </property>
-
- <property>
- <name>hive.exec.parallel.thread.number</name>
- <value>8</value>
- <description>How many jobs at most can be executed in parallel
- </description>
- </property>
-
- <property>
- <name>hive.hwi.war.file</name>
- <value>lib\hive-hwi-0.7.0.war</value>
- <description>This sets the path to the HWI war file, relative to
- ${HIVE_HOME}. </description>
- </property>
-
- <property>
- <name>hive.hwi.listen.host</name>
- <value>0.0.0.0</value>
- <description>This is the host address the Hive Web Interface will
- listen on</description>
- </property>
-
- <property>
- <name>hive.hwi.listen.port</name>
- <value>9999</value>
- <description>This is the port the Hive Web Interface will listen on
- </description>
- </property>
-
- <property>
- <name>hive.exec.pre.hooks</name>
- <value></value>
- <description>Pre Execute Hook for Tests</description>
- </property>
-
- <property>
- <name>hive.merge.mapfiles</name>
- <value>true</value>
- <description>Merge small files at the end of a map-only job
- </description>
- </property>
-
- <property>
- <name>hive.merge.mapredfiles</name>
- <value>false</value>
- <description>Merge small files at the end of a map-reduce job
- </description>
- </property>
-
- <property>
- <name>hive.heartbeat.interval</name>
- <value>1000</value>
- <description>Send a heartbeat after this interval - used by mapjoin
- and filter operators</description>
- </property>
-
- <property>
- <name>hive.merge.size.per.task</name>
- <value>256000000</value>
- <description>Size of merged files at the end of the job</description>
- </property>
-
- <property>
- <name>hive.merge.size.smallfiles.avgsize</name>
- <value>16000000</value>
- <description>When the average output file size of a job is less than
- this number, Hive will start an additional map-reduce job to merge
- the output files into bigger files. This is only done for map-only
- jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
- hive.merge.mapredfiles is true.</description>
- </property>
-
- <property>
- <name>hive.script.auto.progress</name>
- <value>false</value>
- <description>Whether Hive Tranform/Map/Reduce Clause should
- automatically send progress information to TaskTracker to avoid the
- task getting killed because of inactivity. Hive sends progress
- information when the script is outputting to stderr. This option
- removes the need of periodically producing stderr messages, but users
- should be cautious because this may prevent infinite loops in the
- scripts to be killed by TaskTracker. </description>
- </property>
-
- <property>
- <name>hive.script.serde</name>
- <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
- <description>The default serde for trasmitting input data to and
- reading output data from the user scripts. </description>
- </property>
-
- <property>
- <name>hive.script.recordreader</name>
- <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
- <description>The default record reader for reading data from the user
- scripts. </description>
- </property>
-
- <property>
- <name>hive.script.recordwriter</name>
- <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
- <description>The default record writer for writing data to the user
- scripts. </description>
- </property>
-
- <property>
- <name>hive.input.format</name>
- <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
- <description>The default input format, if it is not specified, the
- system assigns it. It is set to HiveInputFormat for hadoop versions
- 17, 18 and 19, whereas it is set to CombinedHiveInputFormat for
- hadoop 20. The user can always overwrite it - if there is a bug in
- CombinedHiveInputFormat, it can always be manually set to
- HiveInputFormat. </description>
- </property>
-
- <property>
- <name>hive.udtf.auto.progress</name>
- <value>false</value>
- <description>Whether Hive should automatically send progress
- information to TaskTracker when using UDTF's to prevent the task
- getting killed because of inactivity. Users should be cautious
- because this may prevent TaskTracker from killing tasks with infinte
- loops. </description>
- </property>
-
- <property>
- <name>hive.mapred.reduce.tasks.speculative.execution</name>
- <value>true</value>
- <description>Whether speculative execution for reducers should be
- turned on. </description>
- </property>
-
- <property>
- <name>hive.exec.counters.pull.interval</name>
- <value>1000</value>
- <description>The interval with which to poll the JobTracker for the
- counters the running job. The smaller it is the more load there will
- be on the jobtracker, the higher it is the less granular the caught
- will be.</description>
- </property>
-
- <property>
- <name>hive.enforce.bucketing</name>
- <value>false</value>
- <description>Whether bucketing is enforced. If true, while inserting
- into the table, bucketing is enforced. </description>
- </property>
-
- <property>
- <name>hive.enforce.sorting</name>
- <value>false</value>
- <description>Whether sorting is enforced. If true, while inserting
- into the table, sorting is enforced. </description>
- </property>
-
- <property>
- <name>hive.metastore.ds.connection.url.hook</name>
- <value></value>
- <description>Name of the hook to use for retriving the JDO connection
- URL. If empty, the value in javax.jdo.option.ConnectionURL is used
- </description>
- </property>
-
- <property>
- <name>hive.metastore.ds.retry.attempts</name>
- <value>1</value>
- <description>The number of times to retry a metastore call if there
- were a connection error</description>
- </property>
-
- <property>
- <name>hive.metastore.ds.retry.interval</name>
- <value>1000</value>
- <description>The number of miliseconds between metastore retry
- attempts</description>
- </property>
-
- <property>
- <name>hive.metastore.server.min.threads</name>
- <value>200</value>
- <description>Minimum number of worker threads in the Thrift server's
- pool.</description>
- </property>
-
- <property>
- <name>hive.metastore.server.max.threads</name>
- <value>100000</value>
- <description>Maximum number of worker threads in the Thrift server's
- pool.</description>
- </property>
-
- <property>
- <name>hive.metastore.server.tcp.keepalive</name>
- <value>true</value>
- <description>Whether to enable TCP keepalive for the metastore server.
- Keepalive will prevent accumulation of half-open connections.
- </description>
- </property>
-
- <property>
- <name>hive.optimize.reducededuplication</name>
- <value>true</value>
- <description>Remove extra map-reduce jobs if the data is already
- clustered by the same key which needs to be used again. This should
- always be set to true. Since it is a new feature, it has been made
- configurable.</description>
- </property>
-
- <property>
- <name>hive.exec.dynamic.partition</name>
- <value>false</value>
- <description>Whether or not to allow dynamic partitions in DML/DDL.
- </description>
- </property>
-
- <property>
- <name>hive.exec.dynamic.partition.mode</name>
- <value>strict</value>
- <description>In strict mode, the user must specify at least one static
- partition in case the user accidentally overwrites all partitions.
- </description>
- </property>
-
- <property>
- <name>hive.exec.max.dynamic.partitions</name>
- <value>1000</value>
- <description>Maximum number of dynamic partitions allowed to be
- created in total.</description>
- </property>
-
- <property>
- <name>hive.exec.max.dynamic.partitions.pernode</name>
- <value>100</value>
- <description>Maximum number of dynamic partitions allowed to be
- created in each mapper/reducer node.</description>
- </property>
-
- <property>
- <name>hive.default.partition.name</name>
- <value>__HIVE_DEFAULT_PARTITION__</value>
- <description>The default partition name in case the dynamic partition
- column value is null/empty string or anyother values that cannot be
- escaped. This value must not contain any special character used in
- HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
- dynamic partition value should not contain this value to avoid
- confusions.</description>
- </property>
-
- <property>
- <name>fs.har.impl</name>
- <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
- <description>The implementation for accessing Hadoop Archives. Note
- that this won't be applicable to Hadoop vers less than 0.20
- </description>
- </property>
-
- <property>
- <name>hive.archive.enabled</name>
- <value>false</value>
- <description>Whether archiving operations are permitted</description>
- </property>
-
- <property>
- <name>hive.archive.har.parentdir.settable</name>
- <value>false</value>
- <description>In new Hadoop versions, the parent directory must be set
- while
- creating a HAR. Because this functionality is hard to detect with just
- version
- numbers, this conf var needs to be set manually.</description>
- </property>
-
- <!-- HBase Storage Handler Parameters -->
-
- <property>
- <name>hive.hbase.wal.enabled</name>
- <value>true</value>
- <description>Whether writes to HBase should be forced to the
- write-ahead log. Disabling this improves HBase write performance at
- the risk of lost writes in case of a crash.</description>
- </property>
-
-</configuration>
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
index eab38a6..6f195f5 100644
--- a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
@@ -31,7 +31,7 @@
# FATAL, ERROR, WARN, INFO, DEBUG
#
#------------------------------------------------------------------------------
-log4j.rootCategory=INFO, S
+log4j.rootCategory=FATAL, S
log4j.logger.com.dappit.Dapper.parser=ERROR
log4j.logger.org.w3c.tidy=FATAL
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-site.xml b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-site.xml
new file mode 100644
index 0000000..ccfcd74
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-site.xml
@@ -0,0 +1,5189 @@
+<?xml version="1.0"?>
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+ <configuration>
+ <!-- Hivesterix Execution Parameters -->
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
+
+ <property>
+ <name>hive.hyracks.parrallelism</name>
+ <value>4</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external.memory</name>
+ <value>33554432</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.sort.memory</name>
+ <value>33554432</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.framesize</name>
+ <value>32768</value>
+ </property>
+
+ <!-- Hive Execution Parameters -->
+ <property>
+ <name>mapred.reduce.tasks</name>
+ <value>-1</value>
+ <description>The default number of reduce tasks per job. Typically
+ set
+ to a prime close to the number of available hosts. Ignored when
+ mapred.job.tracker is "local". Hadoop set this to 1 by default,
+ whereas hive uses -1 as its default value.
+ By setting this property
+ to -1, Hive will automatically figure out
+ what should be the number
+ of reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.bytes.per.reducer</name>
+ <value>1000000000</value>
+ <description>size per reducer.The default is 1G, i.e if the input
+ size is 10G, it will use 10 reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.max</name>
+ <value>999</value>
+ <description>max number of reducers will be used. If the one
+ specified in the configuration parameter mapred.reduce.tasks is
+ negative, hive will use this one as the max number of reducers when
+ automatically determine number of reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.header</name>
+ <value>false</value>
+ <description>Whether to print the names of the columns in query
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.current.db</name>
+ <value>false</value>
+ <description>Whether to include the current database in the hive
+ prompt.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.prompt</name>
+ <value>hive</value>
+ <description>Command line prompt configuration value. Other hiveconf
+ can be used in
+ this configuration value. Variable substitution will
+ only be invoked at
+ the hive
+ cli startup.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.pretty.output.num.cols</name>
+ <value>-1</value>
+ <description>The number of columns to use when formatting output
+ generated
+ by the DESCRIBE PRETTY table_name command. If the value of
+ this
+ property
+ is -1, then hive will use the auto-detected terminal
+ width.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.scratchdir</name>
+ <value>/tmp/hive-${user.name}</value>
+ <description>Scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.exec.local.scratchdir</name>
+ <value>/tmp/${user.name}</value>
+ <description>Local scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.test.mode</name>
+ <value>false</value>
+ <description>whether hive is running in test mode. If yes, it turns
+ on sampling and prefixes the output tablename
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.prefix</name>
+ <value>test_</value>
+ <description>if hive is running in test mode, prefixes the output
+ table by this string
+ </description>
+ </property>
+
+ <!-- If the input table is not bucketed, the denominator of the tablesample
+ is determinied by the parameter below -->
+ <!-- For example, the following query: -->
+ <!-- INSERT OVERWRITE TABLE dest -->
+ <!-- SELECT col1 from src -->
+ <!-- would be converted to -->
+ <!-- INSERT OVERWRITE TABLE test_dest -->
+ <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
+ <property>
+ <name>hive.test.mode.samplefreq</name>
+ <value>32</value>
+ <description>if hive is running in test mode and table is not
+ bucketed, sampling frequency
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.nosamplelist</name>
+ <value></value>
+ <description>if hive is running in test mode, dont sample the above
+ comma seperated list of tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.uris</name>
+ <value></value>
+ <description>Thrift uri for the remote metastore. Used by metastore
+ client to connect to remote metastore.
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionURL</name>
+ <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
+ <description>JDBC connect string for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionDriverName</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>Driver class name for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.PersistenceManagerFactoryClass</name>
+ <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
+ <description>class implementing the jdo persistence</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.DetachAllOnCommit</name>
+ <value>true</value>
+ <description>detaches all objects from session so that they can be
+ used after transaction is committed
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.NonTransactionalRead</name>
+ <value>true</value>
+ <description>reads outside of transactions</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionUserName</name>
+ <value>APP</value>
+ <description>username to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionPassword</name>
+ <value>mine</value>
+ <description>password to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.Multithreaded</name>
+ <value>true</value>
+ <description>Set this to true if multiple threads access metastore
+ through JDO concurrently.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.connectionPoolingType</name>
+ <value>DBCP</value>
+ <description>Uses a DBCP connection pool for JDBC metastore
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateTables</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateColumns</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateConstraints</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.storeManagerType</name>
+ <value>rdbms</value>
+ <description>metadata store type</description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoCreateSchema</name>
+ <value>true</value>
+ <description>creates necessary schema on a startup if one doesn't
+ exist. set this to false, after creating it once
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoStartMechanismMode</name>
+ <value>checked</value>
+ <description>throw exception if metadata tables are incorrect
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.transactionIsolation</name>
+ <value>read-committed</value>
+ <description>Default transaction isolation level for identity
+ generation.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2</name>
+ <value>false</value>
+ <description>Use a level 2 cache. Turn this off if metadata is
+ changed independently of hive metastore server
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2.type</name>
+ <value>SOFT</value>
+ <description>SOFT=soft reference based cache, WEAK=weak reference
+ based cache.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.identifierFactory</name>
+ <value>datanucleus</value>
+ <description>Name of the identifier factory to use when generating
+ table/column names etc. 'datanucleus' is used for backward
+ compatibility
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.plugin.pluginRegistryBundleCheck</name>
+ <value>LOG</value>
+ <description>Defines what happens when plugin bundles are found and
+ are duplicated [EXCEPTION|LOG|NONE]
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.warehouse.dir</name>
+ <value>/user/hive/warehouse</value>
+ <description>location of default database for the warehouse
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.execute.setugi</name>
+ <value>false</value>
+ <description>In unsecure mode, setting this property to true will
+ cause the metastore to execute DFS operations using the client's
+ reported user and group permissions. Note that this property must be
+ set on both the client and server sides. Further note that its best
+ effort. If client sets its to true and server sets it to false,
+ client setting will be ignored.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.listeners</name>
+ <value></value>
+ <description>list of comma seperated listeners for metastore events.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.inherit.table.properties</name>
+ <value></value>
+ <description>list of comma seperated keys occurring in table
+ properties which will get inherited to newly created partitions. *
+ implies all the keys will get inherited.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.export.location</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, it is the location to which the metadata will be exported.
+ The default is an empty string, which results in the metadata being
+ exported to the current user's home directory on HDFS.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.move.exported.metadata.to.trash</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, this setting determines if the metadata that is exported
+ will subsequently be moved to the user's trash directory alongside
+ the dropped table data. This ensures that the metadata will be
+ cleaned up along with the dropped table data.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.name.whitelist.pattern</name>
+ <value></value>
+ <description>Partition names will be checked against this regex
+ pattern and rejected if not matched.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.end.function.listeners</name>
+ <value></value>
+ <description>list of comma separated listeners for the end of
+ metastore functions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.expiry.duration</name>
+ <value>0</value>
+ <description>Duration after which events expire from events table (in
+ seconds)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.clean.freq</name>
+ <value>0</value>
+ <description>Frequency at which timer task runs to purge expired
+ events in metastore(in seconds).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.connect.retries</name>
+ <value>5</value>
+ <description>Number of retries while opening a connection to
+ metastore
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.failure.retries</name>
+ <value>3</value>
+ <description>Number of retries upon failure of Thrift metastore calls
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.connect.retry.delay</name>
+ <value>1</value>
+ <description>Number of seconds for the client to wait between
+ consecutive connection attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.socket.timeout</name>
+ <value>20</value>
+ <description>MetaStore Client socket timeout in seconds</description>
+ </property>
+
+ <property>
+ <name>hive.metastore.rawstore.impl</name>
+ <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+ <description>Name of the class that implements
+ org.apache.hadoop.hive.metastore.rawstore interface. This class is
+ used to store and retrieval of raw metadata objects such as table,
+ database
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.max</name>
+ <value>300</value>
+ <description>Maximum number of objects (tables/partitions) can be
+ retrieved from metastore in one batch. The higher the number, the
+ less the number of round trips is needed to the Hive metastore
+ server, but it may also cause higher memory requirement at the
+ client side.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.table.partition.max</name>
+ <value>1000</value>
+ <description>Maximum number of table partitions that metastore
+ internally retrieves in one batch.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.default.fileformat</name>
+ <value>TextFile</value>
+ <description>Default file format for CREATE TABLE statement. Options
+ are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
+ ... STORED AS <TEXTFILE|SEQUENCEFILE> to override
+ </description>
+ </property>
+
+ <property>
+ <name>hive.fileformat.check</name>
+ <value>true</value>
+ <description>Whether to check file format or not when loading data
+ files
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr</name>
+ <value>true</value>
+ <description>Whether to use map-side aggregation in Hive Group By
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.skewindata</name>
+ <value>false</value>
+ <description>Whether there is skew in data to optimize group by
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.multigroupby.common.distincts</name>
+ <value>true</value>
+ <description>Whether to optimize a multi-groupby query with the same
+ distinct.
+ Consider a query like:
+
+ from src
+ insert overwrite table dest1
+ select col1, count(distinct colx) group by
+ col1
+ insert overwrite table
+ dest2 select col2, count(distinct colx) group by
+ col2;
+
+ With this
+ parameter set to true, first we spray by the distinct value
+ (colx),
+ and then
+ perform the 2 groups bys. This makes sense if map-side
+ aggregation is
+ turned off. However,
+ with maps-side aggregation, it
+ might be useful in some cases to treat
+ the 2 inserts independently,
+ thereby performing the query above in 2MR jobs instead of 3 (due to
+ spraying by distinct key first).
+ If this parameter is turned off, we
+ dont consider the fact that the
+ distinct key is the same across
+ different MR jobs.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.mapaggr.checkinterval</name>
+ <value>100000</value>
+ <description>Number of rows after which size of the grouping
+ keys/aggregation classes is performed
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.local.mem</name>
+ <value>0</value>
+ <description>For local mode, memory of the mappers/reducers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
+ <value>0.3</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table, when this group by is followed by map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
+ <value>0.9</value>
+ <description>The max memory to be used by map-side grup aggregation
+ hash table, if the memory usage is higher than this number, force to
+ flush data
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.percentmemory</name>
+ <value>0.5</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.min.reduction</name>
+ <value>0.5</value>
+ <description>Hash aggregation will be turned off if the ratio between
+ hash
+ table size and input rows is bigger than this number. Set to 1
+ to make
+ sure
+ hash aggregation is never turned off.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.cp</name>
+ <value>true</value>
+ <description>Whether to enable column pruner</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter</name>
+ <value>false</value>
+ <description>Whether to enable automatic use of indexes</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.groupby</name>
+ <value>false</value>
+ <description>Whether to enable optimization of group-by queries using
+ Aggregate indexes.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd</name>
+ <value>true</value>
+ <description>Whether to enable predicate pushdown</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd.storage</name>
+ <value>true</value>
+ <description>Whether to push predicates down into storage handlers.
+ Ignored when hive.optimize.ppd is false.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ppd.recognizetransivity</name>
+ <value>true</value>
+ <description>Whether to transitively replicate predicate filters over
+ equijoin conditions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.groupby</name>
+ <value>true</value>
+ <description>Whether to enable the bucketed group by from bucketed
+ partitions/tables.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin.compiletime</name>
+ <value>false</value>
+ <description>Whether to create a separate plan for skewed keys for
+ the tables in the join.
+ This is based on the skewed keys stored in
+ the metadata. At compile
+ time, the plan is broken
+ into different
+ joins: one for the skewed keys, and the other for the
+ remaining keys.
+ And then,
+ a union is performed for the 2 joins generated above. So
+ unless the
+ same skewed key is present
+ in both the joined tables, the
+ join for the skewed key will be
+ performed as a map-side join.
+
+ The main
+ difference between this paramater and hive.optimize.skewjoin
+ is that
+ this parameter
+ uses the skew information stored in the metastore to
+ optimize the plan
+ at compile time itself.
+ If there is no skew
+ information in the metadata, this parameter will
+ not have any affect.
+ Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
+ should be set to true.
+ Ideally, hive.optimize.skewjoin should be
+ renamed as
+ hive.optimize.skewjoin.runtime, but not doing
+ so for
+ backward compatibility.
+
+ If the skew information is correctly stored
+ in the metadata,
+ hive.optimize.skewjoin.compiletime
+ would change the
+ query plan to take care of it, and
+ hive.optimize.skewjoin will be a
+ no-op.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.union.remove</name>
+ <value>false</value>
+ <description>
+ Whether to remove the union and push the operators
+ between union and the
+ filesink above
+ union. This avoids an extra scan
+ of the output by union. This is
+ independently useful for union
+ queries, and specially useful when
+ hive.optimize.skewjoin.compiletime is set
+ to true, since an
+ extra
+ union is inserted.
+
+ The merge is triggered if either of
+ hive.merge.mapfiles or
+ hive.merge.mapredfiles is set to true.
+ If the
+ user has set hive.merge.mapfiles to true and
+ hive.merge.mapredfiles
+ to false, the idea was the
+ number of reducers are few, so the number
+ of files anyway are small.
+ However, with this optimization,
+ we are
+ increasing the number of files possibly by a big margin. So, we
+ merge aggresively.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.supports.subdirectories</name>
+ <value>false</value>
+ <description>Whether the version of hadoop which is running supports
+ sub-directories for tables/partitions.
+ Many hive optimizations can be
+ applied if the hadoop version supports
+ sub-directories for
+ tables/partitions. It was added by MAPREDUCE-1501
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multigroupby.singlemr</name>
+ <value>true</value>
+ <description>Whether to optimize multi group by query to generate
+ single M/R
+ job plan. If the multi group by query has common group by
+ keys, it will
+ be
+ optimized to generate single M/R job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. The
+ only downside to this
+ is that
+ it limits the number of mappers to the number of files.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted.testmode</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. If
+ the test mode is set, the plan
+ is not converted, but a query property is set to denote the same.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.new.job.grouping.set.cardinality</name>
+ <value>30</value>
+ <description>
+ Whether a new map-reduce job should be launched for
+ grouping
+ sets/rollups/cubes.
+ For a query like: select a, b, c,
+ count(1) from T group by a, b, c with
+ rollup;
+ 4 rows are created per
+ row: (a, b, c), (a, b, null), (a, null, null),
+ (null, null, null).
+ This can lead to explosion across map-reduce boundary if the
+ cardinality
+ of T is very high,
+ and map-side aggregation does not do a
+ very good job.
+
+ This parameter decides if hive should add an
+ additional map-reduce job.
+ If the grouping set
+ cardinality (4 in the
+ example above), is more than this value, a new MR job is
+ added under
+ the
+ assumption that the orginal group by will reduce the data size.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.emit.interval</name>
+ <value>1000</value>
+ <description>How many rows in the right-most join operand Hive should
+ buffer before emitting the join result.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.cache.size</name>
+ <value>25000</value>
+ <description>How many rows in the joining tables (except the
+ streaming table) should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.bucket.cache.size</name>
+ <value>100</value>
+ <description>How many values in each keys in the map-joined table
+ should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.cache.numrows</name>
+ <value>25000</value>
+ <description>How many rows should be cached by jdbm for map join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin</name>
+ <value>false</value>
+ <description>Whether to enable skew join optimization.
+ The algorithm
+ is as follows: At runtime, detect the keys with a large
+ skew. Instead
+ of
+ processing those keys, store them temporarily in a hdfs directory.
+ In a
+ follow-up map-reduce
+ job, process those skewed keys. The same key
+ need not be skewed for all
+ the tables, and so,
+ the follow-up
+ map-reduce job (for the skewed keys) would be much
+ faster, since it
+ would be a
+ map-join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.key</name>
+ <value>100000</value>
+ <description>Determine if we get a skew key in join. If we see more
+ than the specified number of rows with the same key in join
+ operator,
+ we think the key as a skew join key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.map.tasks</name>
+ <value>10000</value>
+ <description> Determine the number of map task used in the follow up
+ map join job
+ for a skew join. It should be used together with
+ hive.skewjoin.mapjoin.min.split
+ to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.min.split</name>
+ <value>33554432</value>
+ <description> Determine the number of map task at most used in the
+ follow up map join job
+ for a skew join by specifying the minimum
+ split size. It should be used
+ together with
+ hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.mode</name>
+ <value>nonstrict</value>
+ <description>The mode in which the hive operations are being
+ performed.
+ In strict mode, some risky queries are not allowed to run.
+ They
+ include:
+ Cartesian Product.
+ No partition being picked up for a
+ query.
+ Comparing bigints and strings.
+ Comparing bigints and doubles.
+ Orderby without limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for bucketed map-side join, and it
+ cannot be performed,
+ should the query fail or not ? For eg, if the
+ buckets in the tables being
+ joined are
+ not a multiple of each other,
+ bucketed map-side join cannot be
+ performed, and the
+ query will fail if
+ hive.enforce.bucketmapjoin is set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.maxerrsize</name>
+ <value>100000</value>
+ <description>Maximum number of bytes a script is allowed to emit to
+ standard error (per map-reduce task). This prevents runaway scripts
+ from filling logs partitions to capacity
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.allow.partial.consumption</name>
+ <value>false</value>
+ <description> When enabled, this option allows a user script to exit
+ successfully without consuming all the data from the standard input.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.id.env.var</name>
+ <value>HIVE_SCRIPT_OPERATOR_ID</value>
+ <description> Name of the environment variable that holds the unique
+ script operator ID in the user's transform function (the custom
+ mapper/reducer that the user has specified in the query)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.truncate.env</name>
+ <value>false</value>
+ <description>Truncate each environment variable for external script
+ in scripts operator to 20KB (to fit system limits)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.output</name>
+ <value>false</value>
+ <description> This controls whether the final outputs of a query (to
+ a local/hdfs file or a hive table) is compressed. The compression
+ codec and other options are determined from hadoop config variables
+ mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.intermediate</name>
+ <value>false</value>
+ <description> This controls whether intermediate files produced by
+ hive between multiple map-reduce jobs are compressed. The
+ compression codec and other options are determined from hadoop
+ config variables mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel</name>
+ <value>false</value>
+ <description>Whether to execute jobs in parallel</description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel.thread.number</name>
+ <value>8</value>
+ <description>How many jobs at most can be executed in parallel
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rowoffset</name>
+ <value>false</value>
+ <description>Whether to provide the row offset virtual column
+ </description>
+ </property>
+
+ <property>
+ <name>hive.task.progress</name>
+ <value>false</value>
+ <description>Whether Hive should periodically update task progress
+ counters during execution. Enabling this allows task progress to be
+ monitored more closely in the job tracker, but may impose a
+ performance penalty. This flag is automatically set to true for jobs
+ with hive.exec.dynamic.partition set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.war.file</name>
+ <value>lib/hive-hwi-@VERSION@.war</value>
+ <description>This sets the path to the HWI war file, relative to
+ ${HIVE_HOME}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.host</name>
+ <value>0.0.0.0</value>
+ <description>This is the host address the Hive Web Interface will
+ listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.port</name>
+ <value>9999</value>
+ <description>This is the port the Hive Web Interface will listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.pre.hooks</name>
+ <value></value>
+ <description>Comma-separated list of pre-execution hooks to be
+ invoked for each statement. A pre-execution hook is specified as the
+ name of a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.post.hooks</name>
+ <value></value>
+ <description>Comma-separated list of post-execution hooks to be
+ invoked for each statement. A post-execution hook is specified as
+ the name of a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.failure.hooks</name>
+ <value></value>
+ <description>Comma-separated list of on-failure hooks to be invoked
+ for each statement. An on-failure hook is specified as the name of
+ Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.init.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks to be invoked at the
+ beginning of HMSHandler initialization. Aninit hook is specified as
+ the name of Java class which extends
+ org.apache.hadoop.hive.metastore.MetaStoreInitListener.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.publishers</name>
+ <value></value>
+ <description>Comma-separated list of statistics publishers to be
+ invoked on counters on each job. A client stats publisher is
+ specified as the name of a Java class which implements the
+ org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.counters</name>
+ <value></value>
+ <description>Subset of counters that should be of interest for
+ hive.client.stats.publishers (when one wants to limit their
+ publishing). Non-display names should be used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapfiles</name>
+ <value>true</value>
+ <description>Merge small files at the end of a map-only job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapredfiles</name>
+ <value>false</value>
+ <description>Merge small files at the end of a map-reduce job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.heartbeat.interval</name>
+ <value>1000</value>
+ <description>Send a heartbeat after this interval - used by mapjoin
+ and filter operators
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.size.per.task</name>
+ <value>256000000</value>
+ <description>Size of merged files at the end of the job</description>
+ </property>
+
+ <property>
+ <name>hive.merge.smallfiles.avgsize</name>
+ <value>16000000</value>
+ <description>When the average output file size of a job is less than
+ this number, Hive will start an additional map-reduce job to merge
+ the output files into bigger files. This is only done for map-only
+ jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
+ hive.merge.mapredfiles is true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.smalltable.filesize</name>
+ <value>25000000</value>
+ <description>The threshold for the input file size of the small
+ tables; if the file size is smaller than this threshold, it will try
+ to convert the common join into map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ignore.mapjoin.hint</name>
+ <value>true</value>
+ <description>Ignore the mapjoin hint</description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.localtask.max.memory.usage</name>
+ <value>0.90</value>
+ <description>This number means how much memory the local task can
+ take to hold the key/value into in-memory hash table; If the local
+ task's memory usage is more than this number, the local task will be
+ abort by themself. It means the data of small table is too large to
+ be hold in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
+ <value>0.55</value>
+ <description>This number means how much memory the local task can
+ take to hold the key/value into in-memory hash table when this map
+ join followed by a group by; If the local task's memory usage is
+ more than this number, the local task will be abort by themself. It
+ means the data of small table is too large to be hold in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.check.memory.rows</name>
+ <value>100000</value>
+ <description>The number means after how many rows processed it needs
+ to check the memory usage
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file size
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>true</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file
+ size. If this
+ paramater is on, and the sum of size for n-1 of the
+ tables/partitions for a n-way join is smaller than the
+ specified
+ size, the join is directly converted to a mapjoin (there is no
+ conditional task).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask.size</name>
+ <value>10000000</value>
+ <description>If hive.auto.convert.join.noconditionaltask is off, this
+ parameter does not take affect. However, if it
+ is on, and the sum of
+ size for n-1 of the tables/partitions for a
+ n-way join is smaller
+ than this size, the join is directly
+ converted to a mapjoin(there is
+ no conditional task). The default is 10MB
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.mapjoin.mapreduce</name>
+ <value>false</value>
+ <description>If hive.auto.convert.join is off, this parameter does
+ not take
+ affect. If it is on, and if there are map-join jobs followed
+ by a
+ map-reduce
+ job (for e.g a group by), each map-only job is merged
+ with the
+ following
+ map-reduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive Tranform/Map/Reduce Clause should
+ automatically send progress information to TaskTracker to avoid the
+ task getting killed because of inactivity. Hive sends progress
+ information when the script is outputting to stderr. This option
+ removes the need of periodically producing stderr messages, but
+ users should be cautious because this may prevent infinite loops in
+ the scripts to be killed by TaskTracker.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.serde</name>
+ <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
+ <description>The default serde for trasmitting input data to and
+ reading output data from the user scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.binary.record.max.length</name>
+ <value>1000</value>
+ <description>Read from a binary stream and treat each
+ hive.binary.record.max.length bytes as a record.
+ The last record
+ before the end of stream can have less than
+ hive.binary.record.max.length bytes
+ </description>
+ </property>
+
+
+ <property>
+ <name>hive.script.recordreader</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
+ <description>The default record reader for reading data from the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.recordwriter</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
+ <description>The default record writer for writing data to the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.input.format</name>
+ <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
+ <description>The default input format. Set this to HiveInputFormat if
+ you encounter problems with CombineHiveInputFormat.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.udtf.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive should automatically send progress
+ information to TaskTracker when using UDTF's to prevent the task
+ getting killed because of inactivity. Users should be cautious
+ because this may prevent TaskTracker from killing tasks with infinte
+ loops.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.reduce.tasks.speculative.execution</name>
+ <value>true</value>
+ <description>Whether speculative execution for reducers should be
+ turned on.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.counters.pull.interval</name>
+ <value>1000</value>
+ <description>The interval with which to poll the JobTracker for the
+ counters the running job. The smaller it is the more load there will
+ be on the jobtracker, the higher it is the less granular the caught
+ will be.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.location</name>
+ <value>/tmp/${user.name}</value>
+ <description>
+ Location of Hive run time structured log file
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.enable.plan.progress</name>
+ <value>true</value>
+ <description>
+ Whether to log the plan's progress every time a job's
+ progress is checked.
+ These logs are written to the location specified
+ by
+ hive.querylog.location
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.plan.progress.interval</name>
+ <value>60000</value>
+ <description>
+ The interval to wait between logging the plan's progress
+ in
+ milliseconds.
+ If there is a whole number percentage change in the
+ progress of the
+ mappers or the reducers,
+ the progress is logged
+ regardless of this value.
+ The actual interval will be the ceiling of
+ (this value divided by the
+ value of
+ hive.exec.counters.pull.interval)
+ multiplied by the value of hive.exec.counters.pull.interval
+ I.e. if
+ it is not divide evenly by the value of
+ hive.exec.counters.pull.interval it will be
+ logged less frequently
+ than specified.
+ This only has an effect if
+ hive.querylog.enable.plan.progress is set to
+ true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketing</name>
+ <value>false</value>
+ <description>Whether bucketing is enforced. If true, while inserting
+ into the table, bucketing is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sorting</name>
+ <value>false</value>
+ <description>Whether sorting is enforced. If true, while inserting
+ into the table, sorting is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.bucketingsorting</name>
+ <value>true</value>
+ <description>If hive.enforce.bucketing or hive.enforce.sorting is
+ true, dont create a reducer for enforcing
+ bucketing/sorting for
+ queries of the form:
+ insert overwrite table T2 select * from T1;
+ where T1 and T2 are bucketed/sorted by the same keys into the same
+ number
+ of buckets.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sortmergebucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for sort-merge bucketed map-side join,
+ and it cannot be performed,
+ should the query fail or not ?
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join</name>
+ <value>false</value>
+ <description>Will the join be automatically converted to a sort-merge
+ join, if the joined tables pass
+ the criteria for sort-merge join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
+ </name>
+ <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
+ </value>
+ <description>The policy to choose the big table for automatic
+ conversion to sort-merge join.
+ By default, the table with the largest
+ partitions is assigned the big
+ table. All policies are:
+ . based on
+ position of the table - the leftmost table is selected
+ org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
+ . based on
+ total size (all the partitions selected in the query) of
+ the table
+ org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
+ . based on average size (all the partitions selected in the query)
+ of the table
+ org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
+ New policies can be added in future.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.connection.url.hook</name>
+ <value></value>
+ <description>Name of the hook to use for retriving the JDO connection
+ URL. If empty, the value in javax.jdo.option.ConnectionURL is used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a metastore call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between metastore retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.min.threads</name>
+ <value>200</value>
+ <description>Minimum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.max.threads</name>
+ <value>100000</value>
+ <description>Maximum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the metastore
+ server. Keepalive will prevent accumulation of half-open
+ connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.sasl.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will be secured
+ with SASL. Clients must authenticate with Kerberos.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.thrift.framed.transport.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will use
+ TFramedTransport. When false (default) a standard TTransport is
+ used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.keytab.file</name>
+ <value></value>
+ <description>The path to the Kerberos Keytab file containing the
+ metastore thrift server's service principal.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.principal</name>
+ <value>hive-metastore/_HOST@EXAMPLE.COM</value>
+ <description>The service principal for the metastore thrift server.
+ The special string _HOST will be replaced automatically with the
+ correct host name.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.class</name>
+ <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
+ <description>The delegation token store implementation. Set to
+ org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
+ cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.connectString
+ </name>
+ <value>localhost:2181</value>
+ <description>The ZooKeeper token store connect string.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
+ <value>/hive/cluster/delegation</value>
+ <description>The root path for token store data.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.acl</name>
+ <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
+ </value>
+ <description>ACL for token store entries. List comma separated all
+ server principals for the cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.cache.pinobjtypes</name>
+ <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
+ </value>
+ <description>List of comma separated metastore object types that
+ should be pinned in the cache
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication</name>
+ <value>true</value>
+ <description>Remove extra map-reduce jobs if the data is already
+ clustered by the same key which needs to be used again. This should
+ always be set to true. Since it is a new feature, it has been made
+ configurable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication.min.reducer</name>
+ <value>4</value>
+ <description>Reduce deduplication merges two RSs by moving
+ key/parts/reducer-num of the child RS to parent RS.
+ That means if
+ reducer-num of the child RS is fixed (order by or forced
+ bucketing)
+ and small, it can make very slow, single MR.
+ The optimization will be
+ disabled if number of reducers is less than
+ specified value.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition</name>
+ <value>true</value>
+ <description>Whether or not to allow dynamic partitions in DML/DDL.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition.mode</name>
+ <value>strict</value>
+ <description>In strict mode, the user must specify at least one
+ static partition in case the user accidentally overwrites all
+ partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions</name>
+ <value>1000</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in total.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions.pernode</name>
+ <value>100</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in each mapper/reducer node.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.created.files</name>
+ <value>100000</value>
+ <description>Maximum number of HDFS files created by all
+ mappers/reducers in a MapReduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.default.partition.name</name>
+ <value>__HIVE_DEFAULT_PARTITION__</value>
+ <description>The default partition name in case the dynamic partition
+ column value is null/empty string or anyother values that cannot be
+ escaped. This value must not contain any special character used in
+ HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that
+ the dynamic partition value should not contain this value to avoid
+ confusions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbclass</name>
+ <value>jdbc:derby</value>
+ <description>The default database that stores temporary hive
+ statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.autogather</name>
+ <value>true</value>
+ <description>A flag to gather statistics automatically during the
+ INSERT OVERWRITE command.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbcdriver</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>The JDBC driver for the database that stores temporary
+ hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbconnectionstring</name>
+ <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
+ <description>The default connection string for the database that
+ stores temporary hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.publisher</name>
+ <value></value>
+ <description>The Java class (implementing the StatsPublisher
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.aggregator</name>
+ <value></value>
+ <description>The Java class (implementing the StatsAggregator
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbc.timeout</name>
+ <value>30</value>
+ <description>Timeout value (number of seconds) used by JDBC
+ connection and statements.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.max</name>
+ <value>0</value>
+ <description>Maximum number of retries when stats
+ publisher/aggregator got an exception updating intermediate
+ database. Default is no tries on failures.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.wait</name>
+ <value>3000</value>
+ <description>The base waiting window (in milliseconds) before the
+ next retry. The actual wait time is calculated by baseWindow *
+ failues baseWindow * (failure 1) * (random number between
+ [0.0,1.0]).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.reliable</name>
+ <value>false</value>
+ <description>Whether queries will fail because stats cannot be
+ collected completely accurately.
+ If this is set to true,
+ reading/writing from/into a partition may fail
+ becuase the stats
+ could not be computed accurately.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.tablekeys</name>
+ <value>false</value>
+ <description>Whether join and group by keys on tables are derived and
+ maintained in the QueryPlan.
+ This is useful to identify how tables
+ are accessed and to determine if
+ they should be bucketed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.scancols</name>
+ <value>false</value>
+ <description>Whether column accesses are tracked in the QueryPlan.
+ This is useful to identify how tables are accessed and to determine
+ if there are wasted columns that can be trimmed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.ndv.error</name>
+ <value>20.0</value>
+ <description>Standard error expressed in percentage. Provides a
+ tradeoff between accuracy and compute cost.A lower value for error
+ indicates higher accuracy and a higher compute cost.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.key.prefix.max.length</name>
+ <value>200</value>
+ <description>
+ Determines if when the prefix of the key used for
+ intermediate stats
+ collection
+ exceeds a certain length, a hash of the
+ key is used instead. If the value
+ < 0 then hashing
+ is never used,
+ if the value >= 0 then hashing is used only when the
+ key prefixes
+ length
+ exceeds that value. The key prefix is defined as everything
+ preceding the
+ task ID in the key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.support.concurrency</name>
+ <value>false</value>
+ <description>Whether hive supports concurrency or not. A zookeeper
+ instance must be up and running for the default hive lock manager to
+ support read-write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.numretries</name>
+ <value>100</value>
+ <description>The number of times you want to try to get all the locks
+ </description>
+ </property>
+
+ <property>
+ <name>hive.unlock.numretries</name>
+ <value>10</value>
+ <description>The number of times you want to retry to do one unlock
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.sleep.between.retries</name>
+ <value>60</value>
+ <description>The sleep time (in seconds) between various retries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.quorum</name>
+ <value></value>
+ <description>The list of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.client.port</name>
+ <value>2181</value>
+ <description>The port of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.session.timeout</name>
+ <value>600000</value>
+ <description>Zookeeper client's session timeout. The client is
+ disconnected, and as a result, all locks released, if a heartbeat is
+ not sent in the timeout.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.namespace</name>
+ <value>hive_zookeeper_namespace</value>
+ <description>The parent node under which all zookeeper nodes are
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.clean.extra.nodes</name>
+ <value>false</value>
+ <description>Clean extra nodes at the end of the session.
+ </description>
+ </property>
+
+ <property>
+ <name>fs.har.impl</name>
+ <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
+ <description>The implementation for accessing Hadoop Archives. Note
+ that this won't be applicable to Hadoop vers less than 0.20
+ </description>
+ </property>
+
+ <property>
+ <name>hive.archive.enabled</name>
+ <value>false</value>
+ <description>Whether archiving operations are permitted</description>
+ </property>
+
+ <property>
+ <name>hive.fetch.output.serde</name>
+ <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
+ <description>The serde used by FetchTask to serialize the fetch
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.mode.local.auto</name>
+ <value>false</value>
+ <description> Let hive determine whether to run in local mode
+ automatically
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.drop.ignorenonexistent</name>
+ <value>true</value>
+ <description>
+ Do not report an error if DROP TABLE/VIEW specifies a
+ non-existent
+ table/view
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.show.job.failure.debug.info</name>
+ <value>true</value>
+ <description>
+ If a job fails, whether to provide a link in the CLI to
+ the task with
+ the
+ most failures, along with debugging hints if
+ applicable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.progress.timeout</name>
+ <value>0</value>
+ <description>
+ How long to run autoprogressor for the script/UDTF
+ operators (in
+ seconds).
+ Set to 0 for forever.
+ </description>
+ </property>
+
+ <!-- HBase Storage Handler Parameters -->
+
+ <property>
+ <name>hive.hbase.wal.enabled</name>
+ <value>true</value>
+ <description>Whether writes to HBase should be forced to the
+ write-ahead log. Disabling this improves HBase write performance at
+ the risk of lost writes in case of a crash.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.table.parameters.default</name>
+ <value></value>
+ <description>Default property values for newly created tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.entity.separator</name>
+ <value>@</value>
+ <description>Separator used to construct names of tables and
+ partitions. For example, dbname@tablename@partitionname
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.createtablelike.properties.whitelist</name>
+ <value></value>
+ <description>Table Properties to copy over when executing a Create
+ Table Like.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute</name>
+ <value>true</value>
+ <description>This enables substitution using syntax like ${var}
+ ${system:var} and ${env:var}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute.depth</name>
+ <value>40</value>
+ <description>The maximum replacements the substitution engine will
+ do.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.conf.validation</name>
+ <value>true</value>
+ <description>Eables type checking for registered hive configurations
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.enabled</name>
+ <value>false</value>
+ <description>enable or disable the hive client authorization
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.user.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some users
+ whenever a table gets created.
+ An example like
+ "userX,userY:select;userZ:create" will grant select
+ privilege to
+ userX and userY,
+ and grant create privilege to userZ whenever a new
+ table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.group.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some groups
+ whenever a table gets created.
+ An example like
+ "groupX,groupY:select;groupZ:create" will grant select
+ privilege to
+ groupX and groupY,
+ and grant create privilege to groupZ whenever a
+ new table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.role.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some roles
+ whenever a table gets created.
+ An example like
+ "roleX,roleY:select;roleZ:create" will grant select
+ privilege to
+ roleX and roleY,
+ and grant create privilege to roleZ whenever a new
+ table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.owner.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to the owner
+ whenever a table gets created.
+ An example like "select,drop" will
+ grant select and drop privilege to
+ the owner of the table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.authorization.storage.checks</name>
+ <value>false</value>
+ <description>Should the metastore do authorization checks against the
+ underlying storage
+ for operations like drop-partition (disallow the
+ drop-partition if the
+ user in
+ question doesn't have permissions to
+ delete the corresponding directory
+ on the storage).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.error.on.empty.partition</name>
+ <value>false</value>
+ <description>Whether to throw an excpetion if dynamic partition
+ insert generates empty results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.file.ignore.hdfs</name>
+ <value>false</value>
+ <description>True the hdfs location stored in the index file will be
+ igbored at runtime.
+ If the data got moved or the name of the cluster
+ got changed, the
+ index data should still be usable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.minsize</name>
+ <value>5368709120</value>
+ <description>Minimum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.maxsize</name>
+ <value>-1</value>
+ <description>Maximum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ A negative number is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.size</name>
+ <value>10737418240</value>
+ <description>The maximum number of bytes that a query using the
+ compact index can read. Negative value is equivalent to infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.entries</name>
+ <value>10000000</value>
+ <description>The maximum number of index entries to read during a
+ query that uses the compact index. Negative value is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.binary.search</name>
+ <value>true</value>
+ <description>Whether or not to use a binary search to find the
+ entries in an index table that match the filter, where possible
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exim.uri.scheme.whitelist</name>
+ <value>hdfs,pfile</value>
+ <description>A comma separated list of acceptable URI schemes for
+ import and export.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.mapred.only.operation</name>
+ <value>false</value>
+ <description>This param is to control whether or not only do lock on
+ queries
+ that need to execute at least one mapred job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.row.max.size</name>
+ <value>100000</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ how much size we need to guarantee
+ each row to have at least.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.limit.file</name>
+ <value>10</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ maximum number of files we can
+ sample.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.enable</name>
+ <value>false</value>
+ <description>Whether to enable to optimization to trying a smaller
+ subset of data for simple LIMIT first.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.fetch.max</name>
+ <value>50000</value>
+ <description>Maximum number of rows allowed for a smaller subset of
+ data for simple LIMIT, if it is a fetch query.
+ Insert queries are not
+ restricted by this limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.rework.mapredwork</name>
+ <value>false</value>
+ <description>should rework the mapred work or not.
+ This is first
+ introduced by SymlinkTextInputFormat to replace symlink
+ files with
+ real paths at compile time.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.concatenate.check.index</name>
+ <value>true</value>
+ <description>If this sets to true, hive will throw error when doing
+ 'alter table tbl_name [partSpec] concatenate' on a table/partition
+ that has indexes on it. The reason the user want to set this to true
+ is because it can help user to avoid handling all index drop,
+ recreation,
+ rebuild work. This is very helpful for tables with
+ thousands of partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.sample.seednumber</name>
+ <value>0</value>
+ <description>A number used to percentage sampling. By changing this
+ number, user will change the subsets
+ of data sampled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.io.exception.handlers</name>
+ <value></value>
+ <description>A list of io exception handler class names. This is used
+ to construct a list exception handlers to handle exceptions thrown
+ by record readers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.label</name>
+ <value>_c</value>
+ <description>String used as a prefix when auto generating column
+ alias.
+ By default the prefix label will be appended with a column
+ position
+ number to form the column alias. Auto generation would
+ happen if an
+ aggregate function is used in a select clause without an
+ explicit
+ alias.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.includefuncname</name>
+ <value>false</value>
+ <description>Whether to include function name in the column alias
+ auto generated by hive.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.perf.logger</name>
+ <value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
+ <description>The class responsible logging client side performance
+ metrics. Must be a subclass of
+ org.apache.hadoop.hive.ql.log.PerfLogger
+ </description>
+ </property>
+
+ <property>
+ <name>hive.start.cleanup.scratchdir</name>
+ <value>false</value>
+ <description>To cleanup the hive scratchdir while starting the hive
+ server
+ </description>
+ </property>
+
+ <property>
+ <name>hive.output.file.extension</name>
+ <value></value>
+ <description>String used as a file extension for output files. If not
+ set, defaults to the codec extension for text files (e.g. ".gz"), or
+ no extension otherwise.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.insert.into.multilevel.dirs</name>
+ <value>false</value>
+ <description>Where to insert into multilevel directories like
+ "insert
+ directory '/HIVEFT25686/chinna/' from table"
+ </description>
+ </property>
+
+ <property>
+ <name>hive.warehouse.subdir.inherit.perms</name>
+ <value>false</value>
+ <description>Set this to true if the the table directories should
+ inherit the
+ permission of the warehouse or database directory instead
+ of being created
+ with the permissions derived from dfs umask
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.job.debug.capture.stacktraces</name>
+ <value>true</value>
+ <description>Whether or not stack traces parsed from the task logs of
+ a sampled failed task for
+ each failed job should be stored in the
+ SessionState
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.driver.run.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks which implement
+ HiveDriverRunHook and will be run at the
+ beginning and end of
+ Driver.run, these will be run in the order specified
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.output.format</name>
+ <value>text</value>
+ <description>
+ The data format to use for DDL output. One of "text"
+ (for human
+ readable text) or "json" (for a json object).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.transform.escape.input</name>
+ <value>false</value>
+ <description>
+ This adds an option to escape special chars (newlines,
+ carriage returns
+ and
+ tabs) when they are passed to the user script.
+ This is useful if the hive
+ tables
+ can contain data that contains
+ special characters.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rcfile.use.explicit.header</name>
+ <value>true</value>
+ <description>
+ If this is set the header for RC Files will simply be
+ RCF. If this is
+ not
+ set the header will be that borrowed from sequence
+ files, e.g. SEQ-
+ followed
+ by the input and output RC File formats.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multi.insert.move.tasks.share.dependencies</name>
+ <value>false</value>
+ <description>
+ If this is set all move tasks for tables/partitions (not
+ directories)
+ at the end of a
+ multi-insert query will only begin once
+ the dependencies for all these move
+ tasks have been
+ met.
+ Advantages: If
+ concurrency is enabled, the locks will only be released once the
+ query has
+ finished, so with this config enabled, the time when the
+ table/partition is
+ generated will be much closer to when the lock on
+ it is released.
+ Disadvantages: If concurrency is not enabled, with
+ this disabled,
+ the tables/partitions which
+ are produced by this query
+ and finish earlier will be available for
+ querying
+ much earlier. Since
+ the locks are only released once the query finishes,
+ this
+ does not
+ apply if concurrency is enabled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.fetch.task.conversion</name>
+ <value>minimal</value>
+ <description>
+ Some select queries can be converted to single FETCH
+ task minimizing
+ latency.
+ Currently the query should be single sourced
+ not having any subquery and
+ should not have
+ any aggregations or
+ distincts (which incurrs RS), lateral views and
+ joins.
+ 1. minimal :
+ SELECT STAR, FILTER on partition columns, LIMIT only
+ 2. more :
+ SELECT, FILTER, LIMIT only (TABLESAMPLE, virtual columns)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a HMSHandler call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between HMSHandler retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.read.socket.timeout</name>
+ <value>10</value>
+ <description>Timeout for the HiveServer to close the connection if no
+ response from the client in N seconds, defaults to 10 seconds.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the Hive server.
+ Keepalive will prevent accumulation of half-open connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.decode.partition.name</name>
+ <value>false</value>
+ <description>Whether to show the unquoted partition names in query
+ results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file.
+ If the property is not
+ set, then logging will be initialized using
+ hive-log4j.properties
+ found on the classpath.
+ If the property is set, the value must be a
+ valid URI (java.net.URI,
+ e.g. "file:///tmp/my-logging.properties"),
+ which you can then
+ extract a URL from and pass to
+ PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file for execution mode(sub
+ command).
+ If the property is not set, then logging will be
+ initialized using
+ hive-exec-log4j.properties found on the classpath.
+ If the property is set, the value must be a valid URI (java.net.URI,
+ e.g. "file:///tmp/my-logging.properties"), which you can then
+ extract a URL from and pass to PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort</name>
+ <value>false</value>
+ <description>
+ If this is set, when writing partitions, the metadata
+ will include the
+ bucketing/sorting
+ properties with which the data was
+ written if any (this will not overwrite the
+ metadata
+ inherited from
+ the table if the table is bucketed/sorted)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
+ <value>false</value>
+ <description>
+ If this is set, when setting the number of reducers for
+ the map reduce
+ task which writes the
+ final output files, it will
+ choose a number which is a power of two,
+ unless the user specifies
+ the number of reducers to use using mapred.reduce.tasks. The number
+ of
+ reducers
+ may be set to a power of two, only to be followed by a
+ merge task
+ meaning preventing
+ anything from being inferred.
+ With
+ hive.exec.infer.bucket.sort set to true:
+ Advantages: If this is not
+ set, the number of buckets for partitions will seem
+ arbitrary,
+ which
+ means that the number of mappers used for optimized joins, for
+ example, will
+ be very low. With this set, since the number of buckets
+ used for any
+ partition is
+ a power of two, the number of mappers used
+ for optimized joins will
+ be the least
+ number of buckets used by any
+ partition being joined.
+ Disadvantages: This may mean a much larger or
+ much smaller number of reducers
+ being used in the
+ final map reduce
+ job, e.g. if a job was originally going to take 257
+ reducers,
+ it will
+ now take 512 reducers, similarly if the max number of reducers
+ is
+ 511,
+ and a job was going to use this many, it will now use 256
+ reducers.
+
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.orderby.position.alias</name>
+ <value>false</value>
+ <description>Whether to enable using Column Position Alias in Group
+ By or Order By
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.min.worker.threads</name>
+ <value>5</value>
+ <description>Minimum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.max.worker.threads</name>
+ <value>100</value>
+ <description>Maximum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.port</name>
+ <value>10000</value>
+ <description>Port number of HiveServer2 Thrift interface.
+ Can be
+ overridden by setting $HIVE_SERVER2_THRIFT_PORT
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.bind.host</name>
+ <value>localhost</value>
+ <description>Bind host on which to run the HiveServer2 Thrift
+ interface.
+ Can be overridden by setting
+ $HIVE_SERVER2_THRIFT_BIND_HOST
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication</name>
+ <value>NONE</value>
+ <description>
+ Client authentication types.
+ NONE: no authentication
+ check
+ LDAP: LDAP/AD based authentication
+ KERBEROS: Kerberos/GSSAPI
+ authentication
+ CUSTOM: Custom authentication provider
+ (Use with
+ property hive.server2.custom.authentication.class)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.custom.authentication.class</name>
+ <value></value>
+ <description>
+ Custom authentication class. Used when property
+ 'hive.server2.authentication' is set to 'CUSTOM'. Provided class
+ must be a proper implementation of the interface
+ org.apache.hive.service.auth.PasswdAuthenticationProvider.
+ HiveServer2
+ will call its Authenticate(user, passed) method to
+ authenticate
+ requests.
+ The implementation may optionally extend the
+ Hadoop's
+ org.apache.hadoop.conf.Configured class to grab Hive's
+ Configuration
+ object.
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.principal</name>
+ <value></value>
+ <description>
+ Kerberos server principal
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.keytab</name>
+ <value></value>
+ <description>
+ Kerberos keytab file for server principal
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.url</name>
+ <value></value>
+ <description>
+ LDAP connection URL
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.baseDN</name>
+ <value></value>
+ <description>
+ LDAP base DN
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.enable.doAs</name>
+ <value>true</value>
+ <description>
+ Setting this property to true will have hive server2
+ execute
+ hive operations as the user making the calls to it.
+ </description>
+ </property>
+
+
+ </configuration>
+
+ <!-- Hive Execution Parameters -->
+ <property>
+ <name>mapred.reduce.tasks</name>
+ <value>-1</value>
+ <description>The default number of reduce tasks per job. Typically set
+ to a prime close to the number of available hosts. Ignored when
+ mapred.job.tracker is "local". Hadoop set this to 1 by default,
+ whereas hive uses -1 as its default value.
+ By setting this property to
+ -1, Hive will automatically figure out what
+ should be the number of
+ reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.bytes.per.reducer</name>
+ <value>1000000000</value>
+ <description>size per reducer.The default is 1G, i.e if the input size
+ is 10G, it will use 10 reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.max</name>
+ <value>999</value>
+ <description>max number of reducers will be used. If the one
+ specified
+ in the configuration parameter mapred.reduce.tasks is
+ negative, hive
+ will use this one as the max number of reducers when
+ automatically
+ determine number of reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.header</name>
+ <value>false</value>
+ <description>Whether to print the names of the columns in query
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.current.db</name>
+ <value>false</value>
+ <description>Whether to include the current database in the hive
+ prompt.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.prompt</name>
+ <value>hive</value>
+ <description>Command line prompt configuration value. Other hiveconf
+ can be used in
+ this configuration value. Variable substitution will
+ only be invoked at
+ the hive
+ cli startup.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.pretty.output.num.cols</name>
+ <value>-1</value>
+ <description>The number of columns to use when formatting output
+ generated
+ by the DESCRIBE PRETTY table_name command. If the value of
+ this
+ property
+ is -1, then hive will use the auto-detected terminal
+ width.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.scratchdir</name>
+ <value>/tmp/hive-${user.name}</value>
+ <description>Scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.exec.local.scratchdir</name>
+ <value>/tmp/${user.name}</value>
+ <description>Local scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.test.mode</name>
+ <value>false</value>
+ <description>whether hive is running in test mode. If yes, it turns on
+ sampling and prefixes the output tablename
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.prefix</name>
+ <value>test_</value>
+ <description>if hive is running in test mode, prefixes the output
+ table by this string
+ </description>
+ </property>
+
+ <!-- If the input table is not bucketed, the denominator of the tablesample
+ is determinied by the parameter below -->
+ <!-- For example, the following query: -->
+ <!-- INSERT OVERWRITE TABLE dest -->
+ <!-- SELECT col1 from src -->
+ <!-- would be converted to -->
+ <!-- INSERT OVERWRITE TABLE test_dest -->
+ <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
+ <property>
+ <name>hive.test.mode.samplefreq</name>
+ <value>32</value>
+ <description>if hive is running in test mode and table is not
+ bucketed, sampling frequency
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.nosamplelist</name>
+ <value></value>
+ <description>if hive is running in test mode, dont sample the above
+ comma seperated list of tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.uris</name>
+ <value></value>
+ <description>Thrift uri for the remote metastore. Used by metastore
+ client to connect to remote metastore.
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionURL</name>
+ <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
+ <description>JDBC connect string for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionDriverName</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>Driver class name for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.PersistenceManagerFactoryClass</name>
+ <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
+ <description>class implementing the jdo persistence</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.DetachAllOnCommit</name>
+ <value>true</value>
+ <description>detaches all objects from session so that they can be
+ used after transaction is committed
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.NonTransactionalRead</name>
+ <value>true</value>
+ <description>reads outside of transactions</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionUserName</name>
+ <value>APP</value>
+ <description>username to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionPassword</name>
+ <value>mine</value>
+ <description>password to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.Multithreaded</name>
+ <value>true</value>
+ <description>Set this to true if multiple threads access metastore
+ through JDO concurrently.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.connectionPoolingType</name>
+ <value>DBCP</value>
+ <description>Uses a DBCP connection pool for JDBC metastore
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateTables</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateColumns</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateConstraints</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.storeManagerType</name>
+ <value>rdbms</value>
+ <description>metadata store type</description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoCreateSchema</name>
+ <value>true</value>
+ <description>creates necessary schema on a startup if one doesn't
+ exist. set this to false, after creating it once
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoStartMechanismMode</name>
+ <value>checked</value>
+ <description>throw exception if metadata tables are incorrect
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.transactionIsolation</name>
+ <value>read-committed</value>
+ <description>Default transaction isolation level for identity
+ generation.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2</name>
+ <value>false</value>
+ <description>Use a level 2 cache. Turn this off if metadata is changed
+ independently of hive metastore server
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2.type</name>
+ <value>SOFT</value>
+ <description>SOFT=soft reference based cache, WEAK=weak reference
+ based cache.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.identifierFactory</name>
+ <value>datanucleus</value>
+ <description>Name of the identifier factory to use when generating
+ table/column names etc. 'datanucleus' is used for backward
+ compatibility
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.plugin.pluginRegistryBundleCheck</name>
+ <value>LOG</value>
+ <description>Defines what happens when plugin bundles are found and
+ are duplicated [EXCEPTION|LOG|NONE]
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.warehouse.dir</name>
+ <value>/user/hive/warehouse</value>
+ <description>location of default database for the warehouse
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.execute.setugi</name>
+ <value>false</value>
+ <description>In unsecure mode, setting this property to true will
+ cause the metastore to execute DFS operations using the client's
+ reported user and group permissions. Note that this property must be
+ set on both the client and server sides. Further note that its best
+ effort. If client sets its to true and server sets it to false,
+ client setting will be ignored.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.listeners</name>
+ <value></value>
+ <description>list of comma seperated listeners for metastore events.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.inherit.table.properties</name>
+ <value></value>
+ <description>list of comma seperated keys occurring in table
+ properties which will get inherited to newly created partitions. *
+ implies all the keys will get inherited.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.export.location</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, it is the location to which the metadata will be exported.
+ The default is an empty string, which results in the metadata being
+ exported to the current user's home directory on HDFS.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.move.exported.metadata.to.trash</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, this setting determines if the metadata that is exported
+ will subsequently be moved to the user's trash directory alongside
+ the dropped table data. This ensures that the metadata will be
+ cleaned up along with the dropped table data.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.name.whitelist.pattern</name>
+ <value></value>
+ <description>Partition names will be checked against this regex
+ pattern and rejected if not matched.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.end.function.listeners</name>
+ <value></value>
+ <description>list of comma separated listeners for the end of
+ metastore functions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.expiry.duration</name>
+ <value>0</value>
+ <description>Duration after which events expire from events table (in
+ seconds)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.clean.freq</name>
+ <value>0</value>
+ <description>Frequency at which timer task runs to purge expired
+ events in metastore(in seconds).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.connect.retries</name>
+ <value>5</value>
+ <description>Number of retries while opening a connection to metastore
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.failure.retries</name>
+ <value>3</value>
+ <description>Number of retries upon failure of Thrift metastore calls
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.connect.retry.delay</name>
+ <value>1</value>
+ <description>Number of seconds for the client to wait between
+ consecutive connection attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.socket.timeout</name>
+ <value>20</value>
+ <description>MetaStore Client socket timeout in seconds</description>
+ </property>
+
+ <property>
+ <name>hive.metastore.rawstore.impl</name>
+ <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+ <description>Name of the class that implements
+ org.apache.hadoop.hive.metastore.rawstore interface. This class is
+ used to store and retrieval of raw metadata objects such as table,
+ database
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.max</name>
+ <value>300</value>
+ <description>Maximum number of objects (tables/partitions) can be
+ retrieved from metastore in one batch. The higher the number, the
+ less the number of round trips is needed to the Hive metastore
+ server, but it may also cause higher memory requirement at the client
+ side.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.table.partition.max</name>
+ <value>1000</value>
+ <description>Maximum number of table partitions that metastore
+ internally retrieves in one batch.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.default.fileformat</name>
+ <value>TextFile</value>
+ <description>Default file format for CREATE TABLE statement. Options
+ are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
+ ... STORED AS <TEXTFILE|SEQUENCEFILE> to override</description>
+ </property>
+
+ <property>
+ <name>hive.fileformat.check</name>
+ <value>true</value>
+ <description>Whether to check file format or not when loading data
+ files
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr</name>
+ <value>true</value>
+ <description>Whether to use map-side aggregation in Hive Group By
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.skewindata</name>
+ <value>false</value>
+ <description>Whether there is skew in data to optimize group by
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.multigroupby.common.distincts</name>
+ <value>true</value>
+ <description>Whether to optimize a multi-groupby query with the same
+ distinct.
+ Consider a query like:
+
+ from src
+ insert overwrite table dest1
+ select col1, count(distinct colx) group by
+ col1
+ insert overwrite table
+ dest2 select col2, count(distinct colx) group by
+ col2;
+
+ With this
+ parameter set to true, first we spray by the distinct value
+ (colx),
+ and then
+ perform the 2 groups bys. This makes sense if map-side
+ aggregation is turned
+ off. However,
+ with maps-side aggregation, it
+ might be useful in some cases to treat the
+ 2 inserts independently,
+ thereby performing the query above in 2MR jobs instead of 3 (due to
+ spraying
+ by distinct key first).
+ If this parameter is turned off, we
+ dont consider the fact that the
+ distinct key is the same across
+ different MR jobs.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.mapaggr.checkinterval</name>
+ <value>100000</value>
+ <description>Number of rows after which size of the grouping
+ keys/aggregation classes is performed
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.local.mem</name>
+ <value>0</value>
+ <description>For local mode, memory of the mappers/reducers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
+ <value>0.3</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table, when this group by is followed by map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
+ <value>0.9</value>
+ <description>The max memory to be used by map-side grup aggregation
+ hash table, if the memory usage is higher than this number, force to
+ flush data
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.percentmemory</name>
+ <value>0.5</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.min.reduction</name>
+ <value>0.5</value>
+ <description>Hash aggregation will be turned off if the ratio between
+ hash
+ table size and input rows is bigger than this number. Set to 1 to
+ make
+ sure
+ hash aggregation is never turned off.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.cp</name>
+ <value>true</value>
+ <description>Whether to enable column pruner</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter</name>
+ <value>false</value>
+ <description>Whether to enable automatic use of indexes</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.groupby</name>
+ <value>false</value>
+ <description>Whether to enable optimization of group-by queries using
+ Aggregate indexes.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd</name>
+ <value>true</value>
+ <description>Whether to enable predicate pushdown</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd.storage</name>
+ <value>true</value>
+ <description>Whether to push predicates down into storage handlers.
+ Ignored when hive.optimize.ppd is false.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ppd.recognizetransivity</name>
+ <value>true</value>
+ <description>Whether to transitively replicate predicate filters over
+ equijoin conditions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.groupby</name>
+ <value>true</value>
+ <description>Whether to enable the bucketed group by from bucketed
+ partitions/tables.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin.compiletime</name>
+ <value>false</value>
+ <description>Whether to create a separate plan for skewed keys for the
+ tables in the join.
+ This is based on the skewed keys stored in the
+ metadata. At compile time,
+ the plan is broken
+ into different joins: one
+ for the skewed keys, and the other for the
+ remaining keys. And then,
+ a
+ union is performed for the 2 joins generated above. So unless the
+ same skewed key is present
+ in both the joined tables, the join for the
+ skewed key will be
+ performed as a map-side join.
+
+ The main difference
+ between this paramater and hive.optimize.skewjoin is
+ that this
+ parameter
+ uses the skew information stored in the metastore to
+ optimize the plan at
+ compile time itself.
+ If there is no skew
+ information in the metadata, this parameter will
+ not have any affect.
+ Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
+ should
+ be set to true.
+ Ideally, hive.optimize.skewjoin should be
+ renamed as
+ hive.optimize.skewjoin.runtime, but not doing
+ so for
+ backward compatibility.
+
+ If the skew information is correctly stored in
+ the metadata,
+ hive.optimize.skewjoin.compiletime
+ would change the query
+ plan to take care of it, and hive.optimize.skewjoin
+ will be a no-op.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.union.remove</name>
+ <value>false</value>
+ <description>
+ Whether to remove the union and push the operators
+ between union and the
+ filesink above
+ union. This avoids an extra scan
+ of the output by union. This is
+ independently useful for union
+ queries, and specially useful when hive.optimize.skewjoin.compiletime
+ is set
+ to true, since an
+ extra union is inserted.
+
+ The merge is triggered
+ if either of hive.merge.mapfiles or
+ hive.merge.mapredfiles is set to
+ true.
+ If the user has set hive.merge.mapfiles to true and
+ hive.merge.mapredfiles to false, the idea was the
+ number of reducers
+ are few, so the number of files anyway are small.
+ However, with this
+ optimization,
+ we are increasing the number of files possibly by a big
+ margin. So, we
+ merge aggresively.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.supports.subdirectories</name>
+ <value>false</value>
+ <description>Whether the version of hadoop which is running supports
+ sub-directories for tables/partitions.
+ Many hive optimizations can be
+ applied if the hadoop version supports
+ sub-directories for
+ tables/partitions. It was added by MAPREDUCE-1501
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multigroupby.singlemr</name>
+ <value>false</value>
+ <description>Whether to optimize multi group by query to generate
+ single M/R
+ job plan. If the multi group by query has common group by
+ keys, it will
+ be
+ optimized to generate single M/R job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. The
+ only downside to this
+ is that
+ it limits the number of mappers to the number of files.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted.testmode</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. If
+ the test mode is set, the plan
+ is not converted, but a query property is set to denote the same.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.new.job.grouping.set.cardinality</name>
+ <value>30</value>
+ <description>
+ Whether a new map-reduce job should be launched for
+ grouping
+ sets/rollups/cubes.
+ For a query like: select a, b, c, count(1)
+ from T group by a, b, c with
+ rollup;
+ 4 rows are created per row: (a, b,
+ c), (a, b, null), (a, null, null),
+ (null, null, null).
+ This can lead to
+ explosion across map-reduce boundary if the cardinality
+ of T is very
+ high,
+ and map-side aggregation does not do a very good job.
+
+ This
+ parameter decides if hive should add an additional map-reduce job.
+ If
+ the grouping set
+ cardinality (4 in the example above), is more than
+ this value, a new MR job is
+ added under the
+ assumption that the orginal
+ group by will reduce the data size.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.emit.interval</name>
+ <value>1000</value>
+ <description>How many rows in the right-most join operand Hive should
+ buffer before emitting the join result.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.cache.size</name>
+ <value>25000</value>
+ <description>How many rows in the joining tables (except the streaming
+ table) should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.bucket.cache.size</name>
+ <value>100</value>
+ <description>How many values in each keys in the map-joined table
+ should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.cache.numrows</name>
+ <value>25000</value>
+ <description>How many rows should be cached by jdbm for map join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin</name>
+ <value>false</value>
+ <description>Whether to enable skew join optimization.
+ The algorithm is
+ as follows: At runtime, detect the keys with a large
+ skew. Instead of
+ processing those keys, store them temporarily in a hdfs directory. In
+ a
+ follow-up map-reduce
+ job, process those skewed keys. The same key
+ need not be skewed for all
+ the tables, and so,
+ the follow-up map-reduce
+ job (for the skewed keys) would be much faster,
+ since it would be a
+ map-join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.key</name>
+ <value>100000</value>
+ <description>Determine if we get a skew key in join. If we see more
+ than the specified number of rows with the same key in join operator,
+ we think the key as a skew join key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.map.tasks</name>
+ <value>10000</value>
+ <description> Determine the number of map task used in the follow up
+ map join job
+ for a skew join. It should be used together with
+ hive.skewjoin.mapjoin.min.split
+ to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.min.split</name>
+ <value>33554432</value>
+ <description> Determine the number of map task at most used in the
+ follow up map join job
+ for a skew join by specifying the minimum split
+ size. It should be used
+ together with
+ hive.skewjoin.mapjoin.map.tasks
+ to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.mode</name>
+ <value>nonstrict</value>
+ <description>The mode in which the hive operations are being
+ performed.
+ In strict mode, some risky queries are not allowed to run.
+ They
+ include:
+ Cartesian Product.
+ No partition being picked up for a
+ query.
+ Comparing bigints and strings.
+ Comparing bigints and doubles.
+ Orderby without limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for bucketed map-side join, and it
+ cannot be performed,
+ should the query fail or not ? For eg, if the
+ buckets in the tables being
+ joined are
+ not a multiple of each other,
+ bucketed map-side join cannot be
+ performed, and the
+ query will fail if
+ hive.enforce.bucketmapjoin is set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.maxerrsize</name>
+ <value>100000</value>
+ <description>Maximum number of bytes a script is allowed to emit to
+ standard error (per map-reduce task). This prevents runaway scripts
+ from filling logs partitions to capacity
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.allow.partial.consumption</name>
+ <value>false</value>
+ <description> When enabled, this option allows a user script to exit
+ successfully without consuming all the data from the standard input.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.id.env.var</name>
+ <value>HIVE_SCRIPT_OPERATOR_ID</value>
+ <description> Name of the environment variable that holds the unique
+ script operator ID in the user's transform function (the custom
+ mapper/reducer that the user has specified in the query)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.truncate.env</name>
+ <value>false</value>
+ <description>Truncate each environment variable for external script in
+ scripts operator to 20KB (to fit system limits)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.output</name>
+ <value>false</value>
+ <description> This controls whether the final outputs of a query (to a
+ local/hdfs file or a hive table) is compressed. The compression codec
+ and other options are determined from hadoop config variables
+ mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.intermediate</name>
+ <value>false</value>
+ <description> This controls whether intermediate files produced by
+ hive between multiple map-reduce jobs are compressed. The compression
+ codec and other options are determined from hadoop config variables
+ mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel</name>
+ <value>false</value>
+ <description>Whether to execute jobs in parallel</description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel.thread.number</name>
+ <value>8</value>
+ <description>How many jobs at most can be executed in parallel
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rowoffset</name>
+ <value>false</value>
+ <description>Whether to provide the row offset virtual column
+ </description>
+ </property>
+
+ <property>
+ <name>hive.task.progress</name>
+ <value>false</value>
+ <description>Whether Hive should periodically update task progress
+ counters during execution. Enabling this allows task progress to be
+ monitored more closely in the job tracker, but may impose a
+ performance penalty. This flag is automatically set to true for jobs
+ with hive.exec.dynamic.partition set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.war.file</name>
+ <value>lib/hive-hwi-@VERSION@.war</value>
+ <description>This sets the path to the HWI war file, relative to
+ ${HIVE_HOME}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.host</name>
+ <value>0.0.0.0</value>
+ <description>This is the host address the Hive Web Interface will
+ listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.port</name>
+ <value>9999</value>
+ <description>This is the port the Hive Web Interface will listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.pre.hooks</name>
+ <value></value>
+ <description>Comma-separated list of pre-execution hooks to be invoked
+ for each statement. A pre-execution hook is specified as the name of
+ a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.post.hooks</name>
+ <value></value>
+ <description>Comma-separated list of post-execution hooks to be
+ invoked for each statement. A post-execution hook is specified as the
+ name of a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.failure.hooks</name>
+ <value></value>
+ <description>Comma-separated list of on-failure hooks to be invoked
+ for each statement. An on-failure hook is specified as the name of
+ Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.init.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks to be invoked at the
+ beginning of HMSHandler initialization. Aninit hook is specified as
+ the name of Java class which extends
+ org.apache.hadoop.hive.metastore.MetaStoreInitListener.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.publishers</name>
+ <value></value>
+ <description>Comma-separated list of statistics publishers to be
+ invoked on counters on each job. A client stats publisher is
+ specified as the name of a Java class which implements the
+ org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.counters</name>
+ <value></value>
+ <description>Subset of counters that should be of interest for
+ hive.client.stats.publishers (when one wants to limit their
+ publishing). Non-display names should be used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapfiles</name>
+ <value>true</value>
+ <description>Merge small files at the end of a map-only job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapredfiles</name>
+ <value>false</value>
+ <description>Merge small files at the end of a map-reduce job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.heartbeat.interval</name>
+ <value>1000</value>
+ <description>Send a heartbeat after this interval - used by mapjoin
+ and filter operators
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.size.per.task</name>
+ <value>256000000</value>
+ <description>Size of merged files at the end of the job</description>
+ </property>
+
+ <property>
+ <name>hive.merge.smallfiles.avgsize</name>
+ <value>16000000</value>
+ <description>When the average output file size of a job is less than
+ this number, Hive will start an additional map-reduce job to merge
+ the output files into bigger files. This is only done for map-only
+ jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
+ hive.merge.mapredfiles is true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.smalltable.filesize</name>
+ <value>25000000</value>
+ <description>The threshold for the input file size of the small
+ tables; if the file size is smaller than this threshold, it will try
+ to convert the common join into map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ignore.mapjoin.hint</name>
+ <value>true</value>
+ <description>Ignore the mapjoin hint</description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.localtask.max.memory.usage</name>
+ <value>0.90</value>
+ <description>This number means how much memory the local task can take
+ to hold the key/value into in-memory hash table; If the local task's
+ memory usage is more than this number, the local task will be abort
+ by themself. It means the data of small table is too large to be hold
+ in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
+ <value>0.55</value>
+ <description>This number means how much memory the local task can take
+ to hold the key/value into in-memory hash table when this map join
+ followed by a group by; If the local task's memory usage is more than
+ this number, the local task will be abort by themself. It means the
+ data of small table is too large to be hold in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.check.memory.rows</name>
+ <value>100000</value>
+ <description>The number means after how many rows processed it needs
+ to check the memory usage
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file size
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>true</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file
+ size. If this
+ paramater is on, and the sum of size for n-1 of the
+ tables/partitions
+ for a n-way join is smaller than the
+ specified size, the join is
+ directly converted to a mapjoin (there is no
+ conditional task).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask.size</name>
+ <value>10000000</value>
+ <description>If hive.auto.convert.join.noconditionaltask is off, this
+ parameter does not take affect. However, if it
+ is on, and the sum of
+ size for n-1 of the tables/partitions for a n-way
+ join is smaller than
+ this size, the join is directly
+ converted to a mapjoin(there is no
+ conditional task). The default is 10MB
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.mapjoin.mapreduce</name>
+ <value>false</value>
+ <description>If hive.auto.convert.join is off, this parameter does not
+ take
+ affect. If it is on, and if there are map-join jobs followed by a
+ map-reduce
+ job (for e.g a group by), each map-only job is merged with
+ the
+ following
+ map-reduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive Tranform/Map/Reduce Clause should
+ automatically send progress information to TaskTracker to avoid the
+ task getting killed because of inactivity. Hive sends progress
+ information when the script is outputting to stderr. This option
+ removes the need of periodically producing stderr messages, but users
+ should be cautious because this may prevent infinite loops in the
+ scripts to be killed by TaskTracker.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.serde</name>
+ <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
+ <description>The default serde for trasmitting input data to and
+ reading output data from the user scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.binary.record.max.length</name>
+ <value>1000</value>
+ <description>Read from a binary stream and treat each
+ hive.binary.record.max.length bytes as a record.
+ The last record
+ before the end of stream can have less than
+ hive.binary.record.max.length bytes
+ </description>
+ </property>
+
+
+ <property>
+ <name>hive.script.recordreader</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
+ <description>The default record reader for reading data from the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.recordwriter</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
+ <description>The default record writer for writing data to the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.input.format</name>
+ <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
+ <description>The default input format. Set this to HiveInputFormat if
+ you encounter problems with CombineHiveInputFormat.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.udtf.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive should automatically send progress
+ information to TaskTracker when using UDTF's to prevent the task
+ getting killed because of inactivity. Users should be cautious
+ because this may prevent TaskTracker from killing tasks with infinte
+ loops.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.reduce.tasks.speculative.execution</name>
+ <value>true</value>
+ <description>Whether speculative execution for reducers should be
+ turned on.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.counters.pull.interval</name>
+ <value>1000</value>
+ <description>The interval with which to poll the JobTracker for the
+ counters the running job. The smaller it is the more load there will
+ be on the jobtracker, the higher it is the less granular the caught
+ will be.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.location</name>
+ <value>/tmp/${user.name}</value>
+ <description>
+ Location of Hive run time structured log file
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.enable.plan.progress</name>
+ <value>true</value>
+ <description>
+ Whether to log the plan's progress every time a job's
+ progress is checked.
+ These logs are written to the location specified
+ by
+ hive.querylog.location
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.plan.progress.interval</name>
+ <value>60000</value>
+ <description>
+ The interval to wait between logging the plan's progress
+ in
+ milliseconds.
+ If there is a whole number percentage change in the
+ progress of the
+ mappers or the reducers,
+ the progress is logged
+ regardless of this value.
+ The actual interval will be the ceiling of
+ (this value divided by the
+ value of
+ hive.exec.counters.pull.interval)
+ multiplied by the value of hive.exec.counters.pull.interval
+ I.e. if it
+ is not divide evenly by the value of
+ hive.exec.counters.pull.interval
+ it will be
+ logged less frequently than specified.
+ This only has an
+ effect if hive.querylog.enable.plan.progress is set to
+ true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketing</name>
+ <value>false</value>
+ <description>Whether bucketing is enforced. If true, while inserting
+ into the table, bucketing is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sorting</name>
+ <value>false</value>
+ <description>Whether sorting is enforced. If true, while inserting
+ into the table, sorting is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.bucketingsorting</name>
+ <value>true</value>
+ <description>If hive.enforce.bucketing or hive.enforce.sorting is
+ true, dont create a reducer for enforcing
+ bucketing/sorting for
+ queries of the form:
+ insert overwrite table T2 select * from T1;
+ where
+ T1 and T2 are bucketed/sorted by the same keys into the same number
+ of buckets.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sortmergebucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for sort-merge bucketed map-side join,
+ and it cannot be performed,
+ should the query fail or not ?
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join</name>
+ <value>false</value>
+ <description>Will the join be automatically converted to a sort-merge
+ join, if the joined tables pass
+ the criteria for sort-merge join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
+ </name>
+ <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
+ </value>
+ <description>The policy to choose the big table for automatic
+ conversion to sort-merge join.
+ By default, the table with the largest
+ partitions is assigned the big
+ table. All policies are:
+ . based on
+ position of the table - the leftmost table is selected
+ org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
+ . based on
+ total size (all the partitions selected in the query) of
+ the table
+ org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
+ . based on average size (all the partitions selected in the query) of
+ the table
+ org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
+ New policies can be added in future.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.connection.url.hook</name>
+ <value></value>
+ <description>Name of the hook to use for retriving the JDO connection
+ URL. If empty, the value in javax.jdo.option.ConnectionURL is used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a metastore call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between metastore retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.min.threads</name>
+ <value>200</value>
+ <description>Minimum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.max.threads</name>
+ <value>100000</value>
+ <description>Maximum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the metastore server.
+ Keepalive will prevent accumulation of half-open connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.sasl.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will be secured
+ with SASL. Clients must authenticate with Kerberos.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.thrift.framed.transport.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will use
+ TFramedTransport. When false (default) a standard TTransport is used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.keytab.file</name>
+ <value></value>
+ <description>The path to the Kerberos Keytab file containing the
+ metastore thrift server's service principal.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.principal</name>
+ <value>hive-metastore/_HOST@EXAMPLE.COM</value>
+ <description>The service principal for the metastore thrift server.
+ The special string _HOST will be replaced automatically with the
+ correct host name.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.class</name>
+ <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
+ <description>The delegation token store implementation. Set to
+ org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
+ cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.connectString
+ </name>
+ <value>localhost:2181</value>
+ <description>The ZooKeeper token store connect string.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
+ <value>/hive/cluster/delegation</value>
+ <description>The root path for token store data.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.acl</name>
+ <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
+ </value>
+ <description>ACL for token store entries. List comma separated all
+ server principals for the cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.cache.pinobjtypes</name>
+ <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
+ </value>
+ <description>List of comma separated metastore object types that
+ should be pinned in the cache
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication</name>
+ <value>true</value>
+ <description>Remove extra map-reduce jobs if the data is already
+ clustered by the same key which needs to be used again. This should
+ always be set to true. Since it is a new feature, it has been made
+ configurable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication.min.reducer</name>
+ <value>4</value>
+ <description>Reduce deduplication merges two RSs by moving
+ key/parts/reducer-num of the child RS to parent RS.
+ That means if
+ reducer-num of the child RS is fixed (order by or forced
+ bucketing)
+ and small, it can make very slow, single MR.
+ The optimization will be
+ disabled if number of reducers is less than
+ specified value.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition</name>
+ <value>true</value>
+ <description>Whether or not to allow dynamic partitions in DML/DDL.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition.mode</name>
+ <value>strict</value>
+ <description>In strict mode, the user must specify at least one static
+ partition in case the user accidentally overwrites all partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions</name>
+ <value>1000</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in total.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions.pernode</name>
+ <value>100</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in each mapper/reducer node.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.created.files</name>
+ <value>100000</value>
+ <description>Maximum number of HDFS files created by all
+ mappers/reducers in a MapReduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.default.partition.name</name>
+ <value>__HIVE_DEFAULT_PARTITION__</value>
+ <description>The default partition name in case the dynamic partition
+ column value is null/empty string or anyother values that cannot be
+ escaped. This value must not contain any special character used in
+ HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
+ dynamic partition value should not contain this value to avoid
+ confusions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbclass</name>
+ <value>jdbc:derby</value>
+ <description>The default database that stores temporary hive
+ statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.autogather</name>
+ <value>true</value>
+ <description>A flag to gather statistics automatically during the
+ INSERT OVERWRITE command.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbcdriver</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>The JDBC driver for the database that stores temporary
+ hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbconnectionstring</name>
+ <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
+ <description>The default connection string for the database that
+ stores temporary hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.publisher</name>
+ <value></value>
+ <description>The Java class (implementing the StatsPublisher
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.aggregator</name>
+ <value></value>
+ <description>The Java class (implementing the StatsAggregator
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbc.timeout</name>
+ <value>30</value>
+ <description>Timeout value (number of seconds) used by JDBC connection
+ and statements.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.max</name>
+ <value>0</value>
+ <description>Maximum number of retries when stats publisher/aggregator
+ got an exception updating intermediate database. Default is no tries
+ on failures.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.wait</name>
+ <value>3000</value>
+ <description>The base waiting window (in milliseconds) before the next
+ retry. The actual wait time is calculated by baseWindow * failues
+ baseWindow * (failure 1) * (random number between [0.0,1.0]).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.reliable</name>
+ <value>false</value>
+ <description>Whether queries will fail because stats cannot be
+ collected completely accurately.
+ If this is set to true,
+ reading/writing from/into a partition may fail
+ becuase the stats
+ could
+ not be computed accurately.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.tablekeys</name>
+ <value>false</value>
+ <description>Whether join and group by keys on tables are derived and
+ maintained in the QueryPlan.
+ This is useful to identify how tables are
+ accessed and to determine if
+ they should be bucketed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.scancols</name>
+ <value>false</value>
+ <description>Whether column accesses are tracked in the QueryPlan.
+ This is useful to identify how tables are accessed and to determine
+ if there are wasted columns that can be trimmed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.ndv.error</name>
+ <value>20.0</value>
+ <description>Standard error expressed in percentage. Provides a
+ tradeoff between accuracy and compute cost.A lower value for error
+ indicates higher accuracy and a higher compute cost.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.key.prefix.max.length</name>
+ <value>200</value>
+ <description>
+ Determines if when the prefix of the key used for
+ intermediate stats collection
+ exceeds a certain length, a hash of the
+ key is used instead. If the
+ value < 0 then hashing
+ is never used, if
+ the value >= 0 then hashing is used only when the key
+ prefixes length
+ exceeds that value. The key prefix is defined as everything preceding
+ the
+ task ID in the key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.support.concurrency</name>
+ <value>false</value>
+ <description>Whether hive supports concurrency or not. A zookeeper
+ instance must be up and running for the default hive lock manager to
+ support read-write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.numretries</name>
+ <value>100</value>
+ <description>The number of times you want to try to get all the locks
+ </description>
+ </property>
+
+ <property>
+ <name>hive.unlock.numretries</name>
+ <value>10</value>
+ <description>The number of times you want to retry to do one unlock
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.sleep.between.retries</name>
+ <value>60</value>
+ <description>The sleep time (in seconds) between various retries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.quorum</name>
+ <value></value>
+ <description>The list of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.client.port</name>
+ <value>2181</value>
+ <description>The port of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.session.timeout</name>
+ <value>600000</value>
+ <description>Zookeeper client's session timeout. The client is
+ disconnected, and as a result, all locks released, if a heartbeat is
+ not sent in the timeout.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.namespace</name>
+ <value>hive_zookeeper_namespace</value>
+ <description>The parent node under which all zookeeper nodes are
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.clean.extra.nodes</name>
+ <value>false</value>
+ <description>Clean extra nodes at the end of the session.
+ </description>
+ </property>
+
+ <property>
+ <name>fs.har.impl</name>
+ <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
+ <description>The implementation for accessing Hadoop Archives. Note
+ that this won't be applicable to Hadoop vers less than 0.20
+ </description>
+ </property>
+
+ <property>
+ <name>hive.archive.enabled</name>
+ <value>false</value>
+ <description>Whether archiving operations are permitted</description>
+ </property>
+
+ <property>
+ <name>hive.fetch.output.serde</name>
+ <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
+ <description>The serde used by FetchTask to serialize the fetch
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.mode.local.auto</name>
+ <value>false</value>
+ <description> Let hive determine whether to run in local mode
+ automatically
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.drop.ignorenonexistent</name>
+ <value>true</value>
+ <description>
+ Do not report an error if DROP TABLE/VIEW specifies a
+ non-existent
+ table/view
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.show.job.failure.debug.info</name>
+ <value>true</value>
+ <description>
+ If a job fails, whether to provide a link in the CLI to
+ the task with
+ the
+ most failures, along with debugging hints if
+ applicable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.progress.timeout</name>
+ <value>0</value>
+ <description>
+ How long to run autoprogressor for the script/UDTF
+ operators (in
+ seconds).
+ Set to 0 for forever.
+ </description>
+ </property>
+
+ <!-- HBase Storage Handler Parameters -->
+
+ <property>
+ <name>hive.hbase.wal.enabled</name>
+ <value>true</value>
+ <description>Whether writes to HBase should be forced to the
+ write-ahead log. Disabling this improves HBase write performance at
+ the risk of lost writes in case of a crash.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.table.parameters.default</name>
+ <value></value>
+ <description>Default property values for newly created tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.entity.separator</name>
+ <value>@</value>
+ <description>Separator used to construct names of tables and
+ partitions. For example, dbname@tablename@partitionname
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.createtablelike.properties.whitelist</name>
+ <value></value>
+ <description>Table Properties to copy over when executing a Create
+ Table Like.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute</name>
+ <value>true</value>
+ <description>This enables substitution using syntax like ${var}
+ ${system:var} and ${env:var}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute.depth</name>
+ <value>40</value>
+ <description>The maximum replacements the substitution engine will do.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.conf.validation</name>
+ <value>true</value>
+ <description>Eables type checking for registered hive configurations
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.enabled</name>
+ <value>false</value>
+ <description>enable or disable the hive client authorization
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.user.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some users
+ whenever a table gets created.
+ An example like
+ "userX,userY:select;userZ:create" will grant select
+ privilege to userX
+ and userY,
+ and grant create privilege to userZ whenever a new table
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.group.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some groups
+ whenever a table gets created.
+ An example like
+ "groupX,groupY:select;groupZ:create" will grant select
+ privilege to
+ groupX and groupY,
+ and grant create privilege to groupZ whenever a new
+ table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.role.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some roles
+ whenever a table gets created.
+ An example like
+ "roleX,roleY:select;roleZ:create" will grant select
+ privilege to roleX
+ and roleY,
+ and grant create privilege to roleZ whenever a new table
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.owner.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to the owner
+ whenever a table gets created.
+ An example like "select,drop" will
+ grant select and drop privilege to
+ the owner of the table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.authorization.storage.checks</name>
+ <value>false</value>
+ <description>Should the metastore do authorization checks against the
+ underlying storage
+ for operations like drop-partition (disallow the
+ drop-partition if the
+ user in
+ question doesn't have permissions to
+ delete the corresponding directory
+ on the storage).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.error.on.empty.partition</name>
+ <value>false</value>
+ <description>Whether to throw an excpetion if dynamic partition insert
+ generates empty results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.file.ignore.hdfs</name>
+ <value>false</value>
+ <description>True the hdfs location stored in the index file will be
+ igbored at runtime.
+ If the data got moved or the name of the cluster
+ got changed, the index
+ data should still be usable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.minsize</name>
+ <value>5368709120</value>
+ <description>Minimum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.maxsize</name>
+ <value>-1</value>
+ <description>Maximum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ A negative number is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.size</name>
+ <value>10737418240</value>
+ <description>The maximum number of bytes that a query using the
+ compact index can read. Negative value is equivalent to infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.entries</name>
+ <value>10000000</value>
+ <description>The maximum number of index entries to read during a
+ query that uses the compact index. Negative value is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.binary.search</name>
+ <value>true</value>
+ <description>Whether or not to use a binary search to find the entries
+ in an index table that match the filter, where possible
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exim.uri.scheme.whitelist</name>
+ <value>hdfs,pfile</value>
+ <description>A comma separated list of acceptable URI schemes for
+ import and export.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.mapred.only.operation</name>
+ <value>false</value>
+ <description>This param is to control whether or not only do lock on
+ queries
+ that need to execute at least one mapred job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.row.max.size</name>
+ <value>100000</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ how much size we need to guarantee
+ each row to have at least.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.limit.file</name>
+ <value>10</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ maximum number of files we can
+ sample.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.enable</name>
+ <value>false</value>
+ <description>Whether to enable to optimization to trying a smaller
+ subset of data for simple LIMIT first.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.fetch.max</name>
+ <value>50000</value>
+ <description>Maximum number of rows allowed for a smaller subset of
+ data for simple LIMIT, if it is a fetch query.
+ Insert queries are not
+ restricted by this limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.rework.mapredwork</name>
+ <value>false</value>
+ <description>should rework the mapred work or not.
+ This is first
+ introduced by SymlinkTextInputFormat to replace symlink
+ files with
+ real paths at compile time.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.concatenate.check.index</name>
+ <value>true</value>
+ <description>If this sets to true, hive will throw error when doing
+ 'alter table tbl_name [partSpec] concatenate' on a table/partition
+ that has indexes on it. The reason the user want to set this to true
+ is because it can help user to avoid handling all index drop,
+ recreation,
+ rebuild work. This is very helpful for tables with
+ thousands of partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.sample.seednumber</name>
+ <value>0</value>
+ <description>A number used to percentage sampling. By changing this
+ number, user will change the subsets
+ of data sampled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.io.exception.handlers</name>
+ <value></value>
+ <description>A list of io exception handler class names. This is used
+ to construct a list exception handlers to handle exceptions thrown
+ by
+ record readers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.label</name>
+ <value>_c</value>
+ <description>String used as a prefix when auto generating column
+ alias.
+ By default the prefix label will be appended with a column
+ position
+ number to form the column alias. Auto generation would happen
+ if an
+ aggregate function is used in a select clause without an
+ explicit
+ alias.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.includefuncname</name>
+ <value>false</value>
+ <description>Whether to include function name in the column alias auto
+ generated by hive.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.perf.logger</name>
+ <value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
+ <description>The class responsible logging client side performance
+ metrics. Must be a subclass of
+ org.apache.hadoop.hive.ql.log.PerfLogger
+ </description>
+ </property>
+
+ <property>
+ <name>hive.start.cleanup.scratchdir</name>
+ <value>false</value>
+ <description>To cleanup the hive scratchdir while starting the hive
+ server
+ </description>
+ </property>
+
+ <property>
+ <name>hive.output.file.extension</name>
+ <value></value>
+ <description>String used as a file extension for output files. If not
+ set, defaults to the codec extension for text files (e.g. ".gz"), or
+ no extension otherwise.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.insert.into.multilevel.dirs</name>
+ <value>false</value>
+ <description>Where to insert into multilevel directories like
+ "insert
+ directory '/HIVEFT25686/chinna/' from table"
+ </description>
+ </property>
+
+ <property>
+ <name>hive.warehouse.subdir.inherit.perms</name>
+ <value>false</value>
+ <description>Set this to true if the the table directories should
+ inherit the
+ permission of the warehouse or database directory instead
+ of being created
+ with the permissions derived from dfs umask
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.job.debug.capture.stacktraces</name>
+ <value>true</value>
+ <description>Whether or not stack traces parsed from the task logs of
+ a sampled failed task for
+ each failed job should be stored in the
+ SessionState
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.driver.run.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks which implement
+ HiveDriverRunHook and will be run at the
+ beginning and end of
+ Driver.run, these will be run in the order specified
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.output.format</name>
+ <value>text</value>
+ <description>
+ The data format to use for DDL output. One of "text" (for
+ human
+ readable text) or "json" (for a json object).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.transform.escape.input</name>
+ <value>false</value>
+ <description>
+ This adds an option to escape special chars (newlines,
+ carriage returns
+ and
+ tabs) when they are passed to the user script.
+ This is useful if the hive
+ tables
+ can contain data that contains
+ special characters.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rcfile.use.explicit.header</name>
+ <value>true</value>
+ <description>
+ If this is set the header for RC Files will simply be
+ RCF. If this is
+ not
+ set the header will be that borrowed from sequence
+ files, e.g. SEQ-
+ followed
+ by the input and output RC File formats.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multi.insert.move.tasks.share.dependencies</name>
+ <value>false</value>
+ <description>
+ If this is set all move tasks for tables/partitions (not
+ directories)
+ at the end of a
+ multi-insert query will only begin once
+ the dependencies for all these move tasks
+ have been
+ met.
+ Advantages: If
+ concurrency is enabled, the locks will only be released once the
+ query has
+ finished, so with this config enabled, the time when the
+ table/partition is
+ generated will be much closer to when the lock on
+ it is released.
+ Disadvantages: If concurrency is not enabled, with
+ this disabled, the
+ tables/partitions which
+ are produced by this query
+ and finish earlier will be available for
+ querying
+ much earlier. Since
+ the locks are only released once the query finishes,
+ this
+ does not
+ apply if concurrency is enabled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.fetch.task.conversion</name>
+ <value>minimal</value>
+ <description>
+ Some select queries can be converted to single FETCH task
+ minimizing
+ latency.
+ Currently the query should be single sourced not
+ having any subquery and
+ should not have
+ any aggregations or distincts
+ (which incurrs RS), lateral views and
+ joins.
+ 1. minimal : SELECT STAR,
+ FILTER on partition columns, LIMIT only
+ 2. more : SELECT, FILTER,
+ LIMIT only (TABLESAMPLE, virtual columns)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a HMSHandler call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between HMSHandler retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.read.socket.timeout</name>
+ <value>10</value>
+ <description>Timeout for the HiveServer to close the connection if no
+ response from the client in N seconds, defaults to 10 seconds.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the Hive server.
+ Keepalive will prevent accumulation of half-open connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.decode.partition.name</name>
+ <value>false</value>
+ <description>Whether to show the unquoted partition names in query
+ results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file.
+ If the property is not set,
+ then logging will be initialized using
+ hive-log4j.properties found on
+ the classpath.
+ If the property is set, the value must be a valid URI
+ (java.net.URI,
+ e.g. "file:///tmp/my-logging.properties"), which you
+ can then extract
+ a URL from and pass to
+ PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file for execution mode(sub
+ command).
+ If the property is not set, then logging will be initialized
+ using
+ hive-exec-log4j.properties found on the classpath.
+ If the
+ property is set, the value must be a valid URI (java.net.URI,
+ e.g.
+ "file:///tmp/my-logging.properties"), which you can then extract
+ a URL
+ from and pass to PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort</name>
+ <value>false</value>
+ <description>
+ If this is set, when writing partitions, the metadata
+ will include the
+ bucketing/sorting
+ properties with which the data was
+ written if any (this will not overwrite the
+ metadata
+ inherited from the
+ table if the table is bucketed/sorted)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
+ <value>false</value>
+ <description>
+ If this is set, when setting the number of reducers for
+ the map reduce
+ task which writes the
+ final output files, it will choose
+ a number which is a power of two,
+ unless the user specifies
+ the number
+ of reducers to use using mapred.reduce.tasks. The number of
+ reducers
+ may be set to a power of two, only to be followed by a merge task
+ meaning preventing
+ anything from being inferred.
+ With
+ hive.exec.infer.bucket.sort set to true:
+ Advantages: If this is not
+ set, the number of buckets for partitions will seem
+ arbitrary,
+ which
+ means that the number of mappers used for optimized joins, for
+ example, will
+ be very low. With this set, since the number of buckets
+ used for any
+ partition is
+ a power of two, the number of mappers used
+ for optimized joins will be
+ the least
+ number of buckets used by any
+ partition being joined.
+ Disadvantages: This may mean a much larger or
+ much smaller number of reducers being
+ used in the
+ final map reduce job,
+ e.g. if a job was originally going to take 257
+ reducers,
+ it will now
+ take 512 reducers, similarly if the max number of reducers
+ is 511,
+ and
+ a job was going to use this many, it will now use 256 reducers.
+
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.orderby.position.alias</name>
+ <value>false</value>
+ <description>Whether to enable using Column Position Alias in Group By
+ or Order By
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.min.worker.threads</name>
+ <value>5</value>
+ <description>Minimum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.max.worker.threads</name>
+ <value>100</value>
+ <description>Maximum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.port</name>
+ <value>10000</value>
+ <description>Port number of HiveServer2 Thrift interface.
+ Can be
+ overridden by setting $HIVE_SERVER2_THRIFT_PORT
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.bind.host</name>
+ <value>localhost</value>
+ <description>Bind host on which to run the HiveServer2 Thrift
+ interface.
+ Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication</name>
+ <value>NONE</value>
+ <description>
+ Client authentication types.
+ NONE: no authentication check
+ LDAP: LDAP/AD based authentication
+ KERBEROS: Kerberos/GSSAPI
+ authentication
+ CUSTOM: Custom authentication provider
+ (Use with
+ property hive.server2.custom.authentication.class)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.custom.authentication.class</name>
+ <value></value>
+ <description>
+ Custom authentication class. Used when property
+ 'hive.server2.authentication' is set to 'CUSTOM'. Provided class
+ must
+ be a proper implementation of the interface
+ org.apache.hive.service.auth.PasswdAuthenticationProvider.
+ HiveServer2
+ will call its Authenticate(user, passed) method to
+ authenticate requests.
+ The implementation may optionally extend the
+ Hadoop's
+ org.apache.hadoop.conf.Configured class to grab Hive's
+ Configuration
+ object.
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.principal</name>
+ <value></value>
+ <description>
+ Kerberos server principal
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.keytab</name>
+ <value></value>
+ <description>
+ Kerberos keytab file for server principal
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.url</name>
+ <value></value>
+ <description>
+ LDAP connection URL
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.baseDN</name>
+ <value></value>
+ <description>
+ LDAP base DN
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.enable.doAs</name>
+ <value>true</value>
+ <description>
+ Setting this property to true will have hive server2
+ execute
+ hive operations as the user making the calls to it.
+ </description>
+ </property>
+
+
+</configuration>
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
index c726bfa..49cdedf 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
@@ -1,22 +1,66 @@
<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
+ <!-- Hivesterix Execution Parameters -->
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
+
+ <property>
+ <name>hive.hyracks.parrallelism</name>
+ <value>4</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external.memory</name>
+ <value>3072</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.sort.memory</name>
+ <value>3072</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.framesize</name>
+ <value>768</value>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file
+ size. If this paramater is on, and the sum of size for n-1 of the
+ tables/partitions for a n-way join is smaller than the
+ specified size, the join is directly converted to a mapjoin (there is no
+ conditional task).
+ </description>
+ </property>
+
+
<!-- Hive Configuration can either be stored in this file or in the hadoop
configuration files -->
<!-- that are implied by Hadoop setup variables. -->
@@ -42,66 +86,6 @@
</property>
<property>
- <name>hive.auto.convert.join.noconditionaltask</name>
- <value>false</value>
- <description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file
- size. If this paramater is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the
- specified size, the join is directly converted to a mapjoin (there is no conditional task).
- </description>
- </property>
-
- <property>
- <name>hive.auto.convert.join</name>
- <value>false</value>
- </property>
-
- <property>
- <name>hive.hyracks.connectorpolicy</name>
- <value>SEND_SIDE_MAT_PIPELINING</value>
- </property>
-
- <property>
- <name>hive.hyracks.host</name>
- <value>127.0.0.1</value>
- </property>
-
- <property>
- <name>hive.hyracks.port</name>
- <value>13099</value>
- </property>
-
- <property>
- <name>hive.hyracks.app</name>
- <value>hivesterix</value>
- </property>
-
-
- <property>
- <name>hive.hyracks.parrallelism</name>
- <value>2</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external</name>
- <value>true</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external.memory</name>
- <value>3072</value>
- </property>
-
- <property>
- <name>hive.algebricks.sort.memory</name>
- <value>3072</value>
- </property>
-
- <property>
- <name>hive.algebricks.framesize</name>
- <value>768</value>
- </property>
-
- <property>
<name>hive.exec.reducers.bytes.per.reducer</name>
<value>1000000000</value>
<description>size per reducer.The default is 1G, i.e if the input size
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u8_order_by.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u8_order_by.hive
new file mode 100644
index 0000000..6efd2ae
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u8_order_by.hive
@@ -0,0 +1,8 @@
+drop table IF EXISTS nation;
+drop table IF EXISTS u8_non_mapred;
+
+create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/nation';
+create table u8_order_by (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING);
+
+insert overwrite table u8_order_by
+select * FROM nation order by N_NATIONKEY;
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u8_order_by.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u8_order_by.result
new file mode 100644
index 0000000..719b246
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u8_order_by.result
@@ -0,0 +1,25 @@
+0ALGERIA0 haggle. carefully final deposits detect slyly agai
+1ARGENTINA1al foxes promise slyly according to the regular accounts. bold requests alon
+2BRAZIL1y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special
+3CANADA1eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold
+4EGYPT4y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d
+5ETHIOPIA0ven packages wake quickly. regu
+6FRANCE3refully final requests. regular, ironi
+7GERMANY3l platelets. regular accounts x-ray: unusual, regular acco
+8INDIA2ss excuses cajole slyly across the packages. deposits print aroun
+9INDONESIA2 slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull
+10IRAN4efully alongside of the slyly final dependencies.
+11IRAQ4nic deposits boost atop the quickly final requests? quickly regula
+12JAPAN2ously. final, express gifts cajole a
+13JORDAN4ic deposits are blithely about the carefully regular pa
+14KENYA0 pending excuses haggle furiously deposits. pending, express pinto beans wake fluffily past t
+15MOROCCO0rns. blithely bold courts among the closely regular packages use furiously bold platelets?
+16MOZAMBIQUE0s. ironic, unusual asymptotes wake blithely r
+17PERU1platelets. blithely pending dependencies use fluffily across the even pinto beans. carefully silent accoun
+18CHINA2c dependencies. furiously express notornis sleep slyly regular accounts. ideas sleep. depos
+19ROMANIA3ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account
+20SAUDI ARABIA4ts. silent requests haggle. closely express packages sleep across the blithely
+21VIETNAM2hely enticingly express accounts. even, final
+22RUSSIA3 requests against the platelets use never according to the quickly regular pint
+23UNITED KINGDOM3eans boost carefully special requests. accounts are. carefull
+24UNITED STATES1y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be
diff --git a/hivesterix/hivesterix-runtime/pom.xml b/hivesterix/hivesterix-runtime/pom.xml
index 6d075ba..f1fcaf8 100644
--- a/hivesterix/hivesterix-runtime/pom.xml
+++ b/hivesterix/hivesterix-runtime/pom.xml
@@ -99,6 +99,11 @@
<scope>compile</scope>
</dependency>
<dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-hbase-handler</artifactId>
+ <version>0.11.0</version>
+ </dependency>
+ <dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
<version>0.2.7-SNAPSHOT</version>
diff --git a/hivesterix/hivesterix-serde/pom.xml b/hivesterix/hivesterix-serde/pom.xml
index 5b02ab3..cacf063 100644
--- a/hivesterix/hivesterix-serde/pom.xml
+++ b/hivesterix/hivesterix-serde/pom.xml
@@ -72,7 +72,7 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>3.8.1</version>
+ <version>4.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
diff --git a/hivesterix/hivesterix-translator/pom.xml b/hivesterix/hivesterix-translator/pom.xml
index b4c4454..00528aa 100644
--- a/hivesterix/hivesterix-translator/pom.xml
+++ b/hivesterix/hivesterix-translator/pom.xml
@@ -42,13 +42,6 @@
<dependencies>
<dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>0.11.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
<version>0.2.7-SNAPSHOT</version>
diff --git a/pregelix/pregelix-dist/pom.xml b/pregelix/pregelix-dist/pom.xml
index a868ff2..7a8554b 100644
--- a/pregelix/pregelix-dist/pom.xml
+++ b/pregelix/pregelix-dist/pom.xml
@@ -1,19 +1,15 @@
<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
@@ -38,22 +34,22 @@
</configuration>
</plugin>
<plugin>
- <artifactId>maven-assembly-plugin</artifactId>
- <version>2.2-beta-5</version>
- <executions>
- <execution>
- <configuration>
- <descriptors>
- <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
- </descriptors>
- </configuration>
- <phase>package</phase>
- <goals>
- <goal>attached</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.2-beta-5</version>
+ <executions>
+ <execution>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>attached</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
<dependencies>
diff --git a/pregelix/pregelix-dist/src/main/assembly/binary-assembly.xml b/pregelix/pregelix-dist/src/main/assembly/binary-assembly.xml
index ab46338..a0fc2ab 100644
--- a/pregelix/pregelix-dist/src/main/assembly/binary-assembly.xml
+++ b/pregelix/pregelix-dist/src/main/assembly/binary-assembly.xml
@@ -1,17 +1,12 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<assembly>
<id>binary-assembly</id>
<formats>
@@ -31,25 +26,25 @@
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
- <directory>../pregelix-core/target/appassembler/lib</directory>
- <outputDirectory>lib</outputDirectory>
- <includes>
- <include>*.jar</include>
- </includes>
- <fileMode>0755</fileMode>
- </fileSet>
- <fileSet>
- <directory>../pregelix-example/target</directory>
- <outputDirectory>examples</outputDirectory>
- <includes>
- <include>*with-dependencies.jar</include>
- </includes>
+ <directory>../pregelix-core/target/appassembler/lib</directory>
+ <outputDirectory>lib</outputDirectory>
+ <includes>
+ <include>*.jar</include>
+ </includes>
<fileMode>0755</fileMode>
- </fileSet>
+ </fileSet>
<fileSet>
- <directory>../pregelix-example/data</directory>
- <outputDirectory>data</outputDirectory>
- <fileMode>0755</fileMode>
- </fileSet>
+ <directory>../pregelix-example/target</directory>
+ <outputDirectory>examples</outputDirectory>
+ <includes>
+ <include>*with-dependencies.jar</include>
+ </includes>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>../pregelix-example/data</directory>
+ <outputDirectory>data</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
</fileSets>
</assembly>