| <?xml version="1.0"?> |
| <!-- ! Copyright 2009-2013 by The Regents of the University of California |
| ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may |
| not use this file except in compliance with the License. ! you may obtain |
| a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0 |
| ! ! Unless required by applicable law or agreed to in writing, software ! |
| distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT |
| WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the |
| License for the specific language governing permissions and ! limitations |
| under the License. ! --> |
| <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
| |
| <configuration> |
| |
| <configuration> |
| <!-- Hivesterix Execution Parameters --> |
| <property> |
| <name>hive.hyracks.connectorpolicy</name> |
| <value>PIPELINING</value> |
| </property> |
| |
| <property> |
| <name>hive.hyracks.parrallelism</name> |
| <value>4</value> |
| </property> |
| |
| <property> |
| <name>hive.algebricks.groupby.external</name> |
| <value>true</value> |
| </property> |
| |
| <property> |
| <name>hive.algebricks.groupby.external.memory</name> |
| <value>33554432</value> |
| </property> |
| |
| <property> |
| <name>hive.algebricks.sort.memory</name> |
| <value>33554432</value> |
| </property> |
| |
| <property> |
| <name>hive.algebricks.framesize</name> |
| <value>32768</value> |
| </property> |
| |
| <!-- Hive Execution Parameters --> |
| <property> |
| <name>mapred.reduce.tasks</name> |
| <value>-1</value> |
| <description>The default number of reduce tasks per job. Typically |
| set |
| to a prime close to the number of available hosts. Ignored when |
| mapred.job.tracker is "local". Hadoop set this to 1 by default, |
| whereas hive uses -1 as its default value. |
| By setting this property |
| to -1, Hive will automatically figure out |
| what should be the number |
| of reducers. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.reducers.bytes.per.reducer</name> |
| <value>1000000000</value> |
| <description>size per reducer.The default is 1G, i.e if the input |
| size is 10G, it will use 10 reducers. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.reducers.max</name> |
| <value>999</value> |
| <description>max number of reducers will be used. If the one |
| specified in the configuration parameter mapred.reduce.tasks is |
| negative, hive will use this one as the max number of reducers when |
| automatically determine number of reducers. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cli.print.header</name> |
| <value>false</value> |
| <description>Whether to print the names of the columns in query |
| output. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cli.print.current.db</name> |
| <value>false</value> |
| <description>Whether to include the current database in the hive |
| prompt. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cli.prompt</name> |
| <value>hive</value> |
| <description>Command line prompt configuration value. Other hiveconf |
| can be used in |
| this configuration value. Variable substitution will |
| only be invoked at |
| the hive |
| cli startup. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cli.pretty.output.num.cols</name> |
| <value>-1</value> |
| <description>The number of columns to use when formatting output |
| generated |
| by the DESCRIBE PRETTY table_name command. If the value of |
| this |
| property |
| is -1, then hive will use the auto-detected terminal |
| width. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.scratchdir</name> |
| <value>/tmp/hive-${user.name}</value> |
| <description>Scratch space for Hive jobs</description> |
| </property> |
| |
| <property> |
| <name>hive.exec.local.scratchdir</name> |
| <value>/tmp/${user.name}</value> |
| <description>Local scratch space for Hive jobs</description> |
| </property> |
| |
| <property> |
| <name>hive.test.mode</name> |
| <value>false</value> |
| <description>whether hive is running in test mode. If yes, it turns |
| on sampling and prefixes the output tablename |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.test.mode.prefix</name> |
| <value>test_</value> |
| <description>if hive is running in test mode, prefixes the output |
| table by this string |
| </description> |
| </property> |
| |
| <!-- If the input table is not bucketed, the denominator of the tablesample |
| is determinied by the parameter below --> |
| <!-- For example, the following query: --> |
| <!-- INSERT OVERWRITE TABLE dest --> |
| <!-- SELECT col1 from src --> |
| <!-- would be converted to --> |
| <!-- INSERT OVERWRITE TABLE test_dest --> |
| <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) --> |
| <property> |
| <name>hive.test.mode.samplefreq</name> |
| <value>32</value> |
| <description>if hive is running in test mode and table is not |
| bucketed, sampling frequency |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.test.mode.nosamplelist</name> |
| <value></value> |
| <description>if hive is running in test mode, dont sample the above |
| comma seperated list of tables |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.uris</name> |
| <value></value> |
| <description>Thrift uri for the remote metastore. Used by metastore |
| client to connect to remote metastore. |
| </description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.ConnectionURL</name> |
| <value>jdbc:derby:;databaseName=metastore_db;create=true</value> |
| <description>JDBC connect string for a JDBC metastore</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.ConnectionDriverName</name> |
| <value>org.apache.derby.jdbc.EmbeddedDriver</value> |
| <description>Driver class name for a JDBC metastore</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.PersistenceManagerFactoryClass</name> |
| <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value> |
| <description>class implementing the jdo persistence</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.DetachAllOnCommit</name> |
| <value>true</value> |
| <description>detaches all objects from session so that they can be |
| used after transaction is committed |
| </description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.NonTransactionalRead</name> |
| <value>true</value> |
| <description>reads outside of transactions</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.ConnectionUserName</name> |
| <value>APP</value> |
| <description>username to use against metastore database</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.ConnectionPassword</name> |
| <value>mine</value> |
| <description>password to use against metastore database</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.Multithreaded</name> |
| <value>true</value> |
| <description>Set this to true if multiple threads access metastore |
| through JDO concurrently. |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.connectionPoolingType</name> |
| <value>DBCP</value> |
| <description>Uses a DBCP connection pool for JDBC metastore |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.validateTables</name> |
| <value>false</value> |
| <description>validates existing schema against code. turn this on if |
| you want to verify existing schema |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.validateColumns</name> |
| <value>false</value> |
| <description>validates existing schema against code. turn this on if |
| you want to verify existing schema |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.validateConstraints</name> |
| <value>false</value> |
| <description>validates existing schema against code. turn this on if |
| you want to verify existing schema |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.storeManagerType</name> |
| <value>rdbms</value> |
| <description>metadata store type</description> |
| </property> |
| |
| <property> |
| <name>datanucleus.autoCreateSchema</name> |
| <value>true</value> |
| <description>creates necessary schema on a startup if one doesn't |
| exist. set this to false, after creating it once |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.autoStartMechanismMode</name> |
| <value>checked</value> |
| <description>throw exception if metadata tables are incorrect |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.transactionIsolation</name> |
| <value>read-committed</value> |
| <description>Default transaction isolation level for identity |
| generation. |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.cache.level2</name> |
| <value>false</value> |
| <description>Use a level 2 cache. Turn this off if metadata is |
| changed independently of hive metastore server |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.cache.level2.type</name> |
| <value>SOFT</value> |
| <description>SOFT=soft reference based cache, WEAK=weak reference |
| based cache. |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.identifierFactory</name> |
| <value>datanucleus</value> |
| <description>Name of the identifier factory to use when generating |
| table/column names etc. 'datanucleus' is used for backward |
| compatibility |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.plugin.pluginRegistryBundleCheck</name> |
| <value>LOG</value> |
| <description>Defines what happens when plugin bundles are found and |
| are duplicated [EXCEPTION|LOG|NONE] |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.warehouse.dir</name> |
| <value>/user/hive/warehouse</value> |
| <description>location of default database for the warehouse |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.execute.setugi</name> |
| <value>false</value> |
| <description>In unsecure mode, setting this property to true will |
| cause the metastore to execute DFS operations using the client's |
| reported user and group permissions. Note that this property must be |
| set on both the client and server sides. Further note that its best |
| effort. If client sets its to true and server sets it to false, |
| client setting will be ignored. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.event.listeners</name> |
| <value></value> |
| <description>list of comma seperated listeners for metastore events. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.partition.inherit.table.properties</name> |
| <value></value> |
| <description>list of comma seperated keys occurring in table |
| properties which will get inherited to newly created partitions. * |
| implies all the keys will get inherited. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metadata.export.location</name> |
| <value></value> |
| <description>When used in conjunction with the |
| org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event |
| listener, it is the location to which the metadata will be exported. |
| The default is an empty string, which results in the metadata being |
| exported to the current user's home directory on HDFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metadata.move.exported.metadata.to.trash</name> |
| <value></value> |
| <description>When used in conjunction with the |
| org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event |
| listener, this setting determines if the metadata that is exported |
| will subsequently be moved to the user's trash directory alongside |
| the dropped table data. This ensures that the metadata will be |
| cleaned up along with the dropped table data. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.partition.name.whitelist.pattern</name> |
| <value></value> |
| <description>Partition names will be checked against this regex |
| pattern and rejected if not matched. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.end.function.listeners</name> |
| <value></value> |
| <description>list of comma separated listeners for the end of |
| metastore functions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.event.expiry.duration</name> |
| <value>0</value> |
| <description>Duration after which events expire from events table (in |
| seconds) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.event.clean.freq</name> |
| <value>0</value> |
| <description>Frequency at which timer task runs to purge expired |
| events in metastore(in seconds). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.connect.retries</name> |
| <value>5</value> |
| <description>Number of retries while opening a connection to |
| metastore |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.failure.retries</name> |
| <value>3</value> |
| <description>Number of retries upon failure of Thrift metastore calls |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.client.connect.retry.delay</name> |
| <value>1</value> |
| <description>Number of seconds for the client to wait between |
| consecutive connection attempts |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.client.socket.timeout</name> |
| <value>20</value> |
| <description>MetaStore Client socket timeout in seconds</description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.rawstore.impl</name> |
| <value>org.apache.hadoop.hive.metastore.ObjectStore</value> |
| <description>Name of the class that implements |
| org.apache.hadoop.hive.metastore.rawstore interface. This class is |
| used to store and retrieval of raw metadata objects such as table, |
| database |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.batch.retrieve.max</name> |
| <value>300</value> |
| <description>Maximum number of objects (tables/partitions) can be |
| retrieved from metastore in one batch. The higher the number, the |
| less the number of round trips is needed to the Hive metastore |
| server, but it may also cause higher memory requirement at the |
| client side. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.batch.retrieve.table.partition.max</name> |
| <value>1000</value> |
| <description>Maximum number of table partitions that metastore |
| internally retrieves in one batch. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.default.fileformat</name> |
| <value>TextFile</value> |
| <description>Default file format for CREATE TABLE statement. Options |
| are TextFile and SequenceFile. Users can explicitly say CREATE TABLE |
| ... STORED AS <TEXTFILE|SEQUENCEFILE> to override |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.fileformat.check</name> |
| <value>true</value> |
| <description>Whether to check file format or not when loading data |
| files |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.aggr</name> |
| <value>true</value> |
| <description>Whether to use map-side aggregation in Hive Group By |
| queries |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.groupby.skewindata</name> |
| <value>false</value> |
| <description>Whether there is skew in data to optimize group by |
| queries |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.multigroupby.common.distincts</name> |
| <value>true</value> |
| <description>Whether to optimize a multi-groupby query with the same |
| distinct. |
| Consider a query like: |
| |
| from src |
| insert overwrite table dest1 |
| select col1, count(distinct colx) group by |
| col1 |
| insert overwrite table |
| dest2 select col2, count(distinct colx) group by |
| col2; |
| |
| With this |
| parameter set to true, first we spray by the distinct value |
| (colx), |
| and then |
| perform the 2 groups bys. This makes sense if map-side |
| aggregation is |
| turned off. However, |
| with maps-side aggregation, it |
| might be useful in some cases to treat |
| the 2 inserts independently, |
| thereby performing the query above in 2MR jobs instead of 3 (due to |
| spraying by distinct key first). |
| If this parameter is turned off, we |
| dont consider the fact that the |
| distinct key is the same across |
| different MR jobs. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.groupby.mapaggr.checkinterval</name> |
| <value>100000</value> |
| <description>Number of rows after which size of the grouping |
| keys/aggregation classes is performed |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapred.local.mem</name> |
| <value>0</value> |
| <description>For local mode, memory of the mappers/reducers |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name> |
| <value>0.3</value> |
| <description>Portion of total memory to be used by map-side grup |
| aggregation hash table, when this group by is followed by map join |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.aggr.hash.force.flush.memory.threshold</name> |
| <value>0.9</value> |
| <description>The max memory to be used by map-side grup aggregation |
| hash table, if the memory usage is higher than this number, force to |
| flush data |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.aggr.hash.percentmemory</name> |
| <value>0.5</value> |
| <description>Portion of total memory to be used by map-side grup |
| aggregation hash table |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.aggr.hash.min.reduction</name> |
| <value>0.5</value> |
| <description>Hash aggregation will be turned off if the ratio between |
| hash |
| table size and input rows is bigger than this number. Set to 1 |
| to make |
| sure |
| hash aggregation is never turned off. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.cp</name> |
| <value>true</value> |
| <description>Whether to enable column pruner</description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.index.filter</name> |
| <value>false</value> |
| <description>Whether to enable automatic use of indexes</description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.index.groupby</name> |
| <value>false</value> |
| <description>Whether to enable optimization of group-by queries using |
| Aggregate indexes. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.ppd</name> |
| <value>true</value> |
| <description>Whether to enable predicate pushdown</description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.ppd.storage</name> |
| <value>true</value> |
| <description>Whether to push predicates down into storage handlers. |
| Ignored when hive.optimize.ppd is false. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.ppd.recognizetransivity</name> |
| <value>true</value> |
| <description>Whether to transitively replicate predicate filters over |
| equijoin conditions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.groupby</name> |
| <value>true</value> |
| <description>Whether to enable the bucketed group by from bucketed |
| partitions/tables. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.skewjoin.compiletime</name> |
| <value>false</value> |
| <description>Whether to create a separate plan for skewed keys for |
| the tables in the join. |
| This is based on the skewed keys stored in |
| the metadata. At compile |
| time, the plan is broken |
| into different |
| joins: one for the skewed keys, and the other for the |
| remaining keys. |
| And then, |
| a union is performed for the 2 joins generated above. So |
| unless the |
| same skewed key is present |
| in both the joined tables, the |
| join for the skewed key will be |
| performed as a map-side join. |
| |
| The main |
| difference between this paramater and hive.optimize.skewjoin |
| is that |
| this parameter |
| uses the skew information stored in the metastore to |
| optimize the plan |
| at compile time itself. |
| If there is no skew |
| information in the metadata, this parameter will |
| not have any affect. |
| Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin |
| should be set to true. |
| Ideally, hive.optimize.skewjoin should be |
| renamed as |
| hive.optimize.skewjoin.runtime, but not doing |
| so for |
| backward compatibility. |
| |
| If the skew information is correctly stored |
| in the metadata, |
| hive.optimize.skewjoin.compiletime |
| would change the |
| query plan to take care of it, and |
| hive.optimize.skewjoin will be a |
| no-op. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.union.remove</name> |
| <value>false</value> |
| <description> |
| Whether to remove the union and push the operators |
| between union and the |
| filesink above |
| union. This avoids an extra scan |
| of the output by union. This is |
| independently useful for union |
| queries, and specially useful when |
| hive.optimize.skewjoin.compiletime is set |
| to true, since an |
| extra |
| union is inserted. |
| |
| The merge is triggered if either of |
| hive.merge.mapfiles or |
| hive.merge.mapredfiles is set to true. |
| If the |
| user has set hive.merge.mapfiles to true and |
| hive.merge.mapredfiles |
| to false, the idea was the |
| number of reducers are few, so the number |
| of files anyway are small. |
| However, with this optimization, |
| we are |
| increasing the number of files possibly by a big margin. So, we |
| merge aggresively. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapred.supports.subdirectories</name> |
| <value>false</value> |
| <description>Whether the version of hadoop which is running supports |
| sub-directories for tables/partitions. |
| Many hive optimizations can be |
| applied if the hadoop version supports |
| sub-directories for |
| tables/partitions. It was added by MAPREDUCE-1501 |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.multigroupby.singlemr</name> |
| <value>true</value> |
| <description>Whether to optimize multi group by query to generate |
| single M/R |
| job plan. If the multi group by query has common group by |
| keys, it will |
| be |
| optimized to generate single M/R job. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.groupby.sorted</name> |
| <value>false</value> |
| <description>If the bucketing/sorting properties of the table exactly |
| match the grouping key, whether to |
| perform the group by in the mapper |
| by using BucketizedHiveInputFormat. The |
| only downside to this |
| is that |
| it limits the number of mappers to the number of files. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.groupby.sorted.testmode</name> |
| <value>false</value> |
| <description>If the bucketing/sorting properties of the table exactly |
| match the grouping key, whether to |
| perform the group by in the mapper |
| by using BucketizedHiveInputFormat. If |
| the test mode is set, the plan |
| is not converted, but a query property is set to denote the same. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.new.job.grouping.set.cardinality</name> |
| <value>30</value> |
| <description> |
| Whether a new map-reduce job should be launched for |
| grouping |
| sets/rollups/cubes. |
| For a query like: select a, b, c, |
| count(1) from T group by a, b, c with |
| rollup; |
| 4 rows are created per |
| row: (a, b, c), (a, b, null), (a, null, null), |
| (null, null, null). |
| This can lead to explosion across map-reduce boundary if the |
| cardinality |
| of T is very high, |
| and map-side aggregation does not do a |
| very good job. |
| |
| This parameter decides if hive should add an |
| additional map-reduce job. |
| If the grouping set |
| cardinality (4 in the |
| example above), is more than this value, a new MR job is |
| added under |
| the |
| assumption that the orginal group by will reduce the data size. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.join.emit.interval</name> |
| <value>1000</value> |
| <description>How many rows in the right-most join operand Hive should |
| buffer before emitting the join result. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.join.cache.size</name> |
| <value>25000</value> |
| <description>How many rows in the joining tables (except the |
| streaming table) should be cached in memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.bucket.cache.size</name> |
| <value>100</value> |
| <description>How many values in each keys in the map-joined table |
| should be cached in memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.cache.numrows</name> |
| <value>25000</value> |
| <description>How many rows should be cached by jdbm for map join. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.skewjoin</name> |
| <value>false</value> |
| <description>Whether to enable skew join optimization. |
| The algorithm |
| is as follows: At runtime, detect the keys with a large |
| skew. Instead |
| of |
| processing those keys, store them temporarily in a hdfs directory. |
| In a |
| follow-up map-reduce |
| job, process those skewed keys. The same key |
| need not be skewed for all |
| the tables, and so, |
| the follow-up |
| map-reduce job (for the skewed keys) would be much |
| faster, since it |
| would be a |
| map-join. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.skewjoin.key</name> |
| <value>100000</value> |
| <description>Determine if we get a skew key in join. If we see more |
| than the specified number of rows with the same key in join |
| operator, |
| we think the key as a skew join key. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.skewjoin.mapjoin.map.tasks</name> |
| <value>10000</value> |
| <description> Determine the number of map task used in the follow up |
| map join job |
| for a skew join. It should be used together with |
| hive.skewjoin.mapjoin.min.split |
| to perform a fine grained control. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.skewjoin.mapjoin.min.split</name> |
| <value>33554432</value> |
| <description> Determine the number of map task at most used in the |
| follow up map join job |
| for a skew join by specifying the minimum |
| split size. It should be used |
| together with |
| hive.skewjoin.mapjoin.map.tasks to perform a fine grained control. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapred.mode</name> |
| <value>nonstrict</value> |
| <description>The mode in which the hive operations are being |
| performed. |
| In strict mode, some risky queries are not allowed to run. |
| They |
| include: |
| Cartesian Product. |
| No partition being picked up for a |
| query. |
| Comparing bigints and strings. |
| Comparing bigints and doubles. |
| Orderby without limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.enforce.bucketmapjoin</name> |
| <value>false</value> |
| <description>If the user asked for bucketed map-side join, and it |
| cannot be performed, |
| should the query fail or not ? For eg, if the |
| buckets in the tables being |
| joined are |
| not a multiple of each other, |
| bucketed map-side join cannot be |
| performed, and the |
| query will fail if |
| hive.enforce.bucketmapjoin is set to true. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.script.maxerrsize</name> |
| <value>100000</value> |
| <description>Maximum number of bytes a script is allowed to emit to |
| standard error (per map-reduce task). This prevents runaway scripts |
| from filling logs partitions to capacity |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.script.allow.partial.consumption</name> |
| <value>false</value> |
| <description> When enabled, this option allows a user script to exit |
| successfully without consuming all the data from the standard input. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.operator.id.env.var</name> |
| <value>HIVE_SCRIPT_OPERATOR_ID</value> |
| <description> Name of the environment variable that holds the unique |
| script operator ID in the user's transform function (the custom |
| mapper/reducer that the user has specified in the query) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.operator.truncate.env</name> |
| <value>false</value> |
| <description>Truncate each environment variable for external script |
| in scripts operator to 20KB (to fit system limits) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.compress.output</name> |
| <value>false</value> |
| <description> This controls whether the final outputs of a query (to |
| a local/hdfs file or a hive table) is compressed. The compression |
| codec and other options are determined from hadoop config variables |
| mapred.output.compress* |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.compress.intermediate</name> |
| <value>false</value> |
| <description> This controls whether intermediate files produced by |
| hive between multiple map-reduce jobs are compressed. The |
| compression codec and other options are determined from hadoop |
| config variables mapred.output.compress* |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.parallel</name> |
| <value>false</value> |
| <description>Whether to execute jobs in parallel</description> |
| </property> |
| |
| <property> |
| <name>hive.exec.parallel.thread.number</name> |
| <value>8</value> |
| <description>How many jobs at most can be executed in parallel |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.rowoffset</name> |
| <value>false</value> |
| <description>Whether to provide the row offset virtual column |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.task.progress</name> |
| <value>false</value> |
| <description>Whether Hive should periodically update task progress |
| counters during execution. Enabling this allows task progress to be |
| monitored more closely in the job tracker, but may impose a |
| performance penalty. This flag is automatically set to true for jobs |
| with hive.exec.dynamic.partition set to true. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hwi.war.file</name> |
| <value>lib/hive-hwi-@VERSION@.war</value> |
| <description>This sets the path to the HWI war file, relative to |
| ${HIVE_HOME}. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hwi.listen.host</name> |
| <value>0.0.0.0</value> |
| <description>This is the host address the Hive Web Interface will |
| listen on |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hwi.listen.port</name> |
| <value>9999</value> |
| <description>This is the port the Hive Web Interface will listen on |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.pre.hooks</name> |
| <value></value> |
| <description>Comma-separated list of pre-execution hooks to be |
| invoked for each statement. A pre-execution hook is specified as the |
| name of a Java class which implements the |
| org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.post.hooks</name> |
| <value></value> |
| <description>Comma-separated list of post-execution hooks to be |
| invoked for each statement. A post-execution hook is specified as |
| the name of a Java class which implements the |
| org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.failure.hooks</name> |
| <value></value> |
| <description>Comma-separated list of on-failure hooks to be invoked |
| for each statement. An on-failure hook is specified as the name of |
| Java class which implements the |
| org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.init.hooks</name> |
| <value></value> |
| <description>A comma separated list of hooks to be invoked at the |
| beginning of HMSHandler initialization. Aninit hook is specified as |
| the name of Java class which extends |
| org.apache.hadoop.hive.metastore.MetaStoreInitListener. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.client.stats.publishers</name> |
| <value></value> |
| <description>Comma-separated list of statistics publishers to be |
| invoked on counters on each job. A client stats publisher is |
| specified as the name of a Java class which implements the |
| org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.client.stats.counters</name> |
| <value></value> |
| <description>Subset of counters that should be of interest for |
| hive.client.stats.publishers (when one wants to limit their |
| publishing). Non-display names should be used |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.merge.mapfiles</name> |
| <value>true</value> |
| <description>Merge small files at the end of a map-only job |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.merge.mapredfiles</name> |
| <value>false</value> |
| <description>Merge small files at the end of a map-reduce job |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.heartbeat.interval</name> |
| <value>1000</value> |
| <description>Send a heartbeat after this interval - used by mapjoin |
| and filter operators |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.merge.size.per.task</name> |
| <value>256000000</value> |
| <description>Size of merged files at the end of the job</description> |
| </property> |
| |
| <property> |
| <name>hive.merge.smallfiles.avgsize</name> |
| <value>16000000</value> |
| <description>When the average output file size of a job is less than |
| this number, Hive will start an additional map-reduce job to merge |
| the output files into bigger files. This is only done for map-only |
| jobs if hive.merge.mapfiles is true, and for map-reduce jobs if |
| hive.merge.mapredfiles is true. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.smalltable.filesize</name> |
| <value>25000000</value> |
| <description>The threshold for the input file size of the small |
| tables; if the file size is smaller than this threshold, it will try |
| to convert the common join into map join |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.ignore.mapjoin.hint</name> |
| <value>true</value> |
| <description>Ignore the mapjoin hint</description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.localtask.max.memory.usage</name> |
| <value>0.90</value> |
| <description>This number means how much memory the local task can |
| take to hold the key/value into in-memory hash table; If the local |
| task's memory usage is more than this number, the local task will be |
| abort by themself. It means the data of small table is too large to |
| be hold in the memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name> |
| <value>0.55</value> |
| <description>This number means how much memory the local task can |
| take to hold the key/value into in-memory hash table when this map |
| join followed by a group by; If the local task's memory usage is |
| more than this number, the local task will be abort by themself. It |
| means the data of small table is too large to be hold in the memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.check.memory.rows</name> |
| <value>100000</value> |
| <description>The number means after how many rows processed it needs |
| to check the memory usage |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.join</name> |
| <value>false</value> |
| <description>Whether Hive enable the optimization about converting |
| common join into mapjoin based on the input file size |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.join.noconditionaltask</name> |
| <value>true</value> |
| <description>Whether Hive enable the optimization about converting |
| common join into mapjoin based on the input file |
| size. If this |
| paramater is on, and the sum of size for n-1 of the |
| tables/partitions for a n-way join is smaller than the |
| specified |
| size, the join is directly converted to a mapjoin (there is no |
| conditional task). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.join.noconditionaltask.size</name> |
| <value>10000000</value> |
| <description>If hive.auto.convert.join.noconditionaltask is off, this |
| parameter does not take affect. However, if it |
| is on, and the sum of |
| size for n-1 of the tables/partitions for a |
| n-way join is smaller |
| than this size, the join is directly |
| converted to a mapjoin(there is |
| no conditional task). The default is 10MB |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.mapjoin.mapreduce</name> |
| <value>false</value> |
| <description>If hive.auto.convert.join is off, this parameter does |
| not take |
| affect. If it is on, and if there are map-join jobs followed |
| by a |
| map-reduce |
| job (for e.g a group by), each map-only job is merged |
| with the |
| following |
| map-reduce job. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.auto.progress</name> |
| <value>false</value> |
| <description>Whether Hive Tranform/Map/Reduce Clause should |
| automatically send progress information to TaskTracker to avoid the |
| task getting killed because of inactivity. Hive sends progress |
| information when the script is outputting to stderr. This option |
| removes the need of periodically producing stderr messages, but |
| users should be cautious because this may prevent infinite loops in |
| the scripts to be killed by TaskTracker. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.serde</name> |
| <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value> |
| <description>The default serde for trasmitting input data to and |
| reading output data from the user scripts. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.binary.record.max.length</name> |
| <value>1000</value> |
| <description>Read from a binary stream and treat each |
| hive.binary.record.max.length bytes as a record. |
| The last record |
| before the end of stream can have less than |
| hive.binary.record.max.length bytes |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>hive.script.recordreader</name> |
| <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value> |
| <description>The default record reader for reading data from the user |
| scripts. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.recordwriter</name> |
| <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value> |
| <description>The default record writer for writing data to the user |
| scripts. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.input.format</name> |
| <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value> |
| <description>The default input format. Set this to HiveInputFormat if |
| you encounter problems with CombineHiveInputFormat. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.udtf.auto.progress</name> |
| <value>false</value> |
| <description>Whether Hive should automatically send progress |
| information to TaskTracker when using UDTF's to prevent the task |
| getting killed because of inactivity. Users should be cautious |
| because this may prevent TaskTracker from killing tasks with infinte |
| loops. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapred.reduce.tasks.speculative.execution</name> |
| <value>true</value> |
| <description>Whether speculative execution for reducers should be |
| turned on. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.counters.pull.interval</name> |
| <value>1000</value> |
| <description>The interval with which to poll the JobTracker for the |
| counters the running job. The smaller it is the more load there will |
| be on the jobtracker, the higher it is the less granular the caught |
| will be. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.querylog.location</name> |
| <value>/tmp/${user.name}</value> |
| <description> |
| Location of Hive run time structured log file |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.querylog.enable.plan.progress</name> |
| <value>true</value> |
| <description> |
| Whether to log the plan's progress every time a job's |
| progress is checked. |
| These logs are written to the location specified |
| by |
| hive.querylog.location |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.querylog.plan.progress.interval</name> |
| <value>60000</value> |
| <description> |
| The interval to wait between logging the plan's progress |
| in |
| milliseconds. |
| If there is a whole number percentage change in the |
| progress of the |
| mappers or the reducers, |
| the progress is logged |
| regardless of this value. |
| The actual interval will be the ceiling of |
| (this value divided by the |
| value of |
| hive.exec.counters.pull.interval) |
| multiplied by the value of hive.exec.counters.pull.interval |
| I.e. if |
| it is not divide evenly by the value of |
| hive.exec.counters.pull.interval it will be |
| logged less frequently |
| than specified. |
| This only has an effect if |
| hive.querylog.enable.plan.progress is set to |
| true. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.enforce.bucketing</name> |
| <value>false</value> |
| <description>Whether bucketing is enforced. If true, while inserting |
| into the table, bucketing is enforced. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.enforce.sorting</name> |
| <value>false</value> |
| <description>Whether sorting is enforced. If true, while inserting |
| into the table, sorting is enforced. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.bucketingsorting</name> |
| <value>true</value> |
| <description>If hive.enforce.bucketing or hive.enforce.sorting is |
| true, dont create a reducer for enforcing |
| bucketing/sorting for |
| queries of the form: |
| insert overwrite table T2 select * from T1; |
| where T1 and T2 are bucketed/sorted by the same keys into the same |
| number |
| of buckets. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.enforce.sortmergebucketmapjoin</name> |
| <value>false</value> |
| <description>If the user asked for sort-merge bucketed map-side join, |
| and it cannot be performed, |
| should the query fail or not ? |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.sortmerge.join</name> |
| <value>false</value> |
| <description>Will the join be automatically converted to a sort-merge |
| join, if the joined tables pass |
| the criteria for sort-merge join. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy |
| </name> |
| <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ |
| </value> |
| <description>The policy to choose the big table for automatic |
| conversion to sort-merge join. |
| By default, the table with the largest |
| partitions is assigned the big |
| table. All policies are: |
| . based on |
| position of the table - the leftmost table is selected |
| org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ. |
| . based on |
| total size (all the partitions selected in the query) of |
| the table |
| org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ. |
| . based on average size (all the partitions selected in the query) |
| of the table |
| org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ. |
| New policies can be added in future. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.ds.connection.url.hook</name> |
| <value></value> |
| <description>Name of the hook to use for retriving the JDO connection |
| URL. If empty, the value in javax.jdo.option.ConnectionURL is used |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.ds.retry.attempts</name> |
| <value>1</value> |
| <description>The number of times to retry a metastore call if there |
| were a connection error |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.ds.retry.interval</name> |
| <value>1000</value> |
| <description>The number of miliseconds between metastore retry |
| attempts |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.server.min.threads</name> |
| <value>200</value> |
| <description>Minimum number of worker threads in the Thrift server's |
| pool. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.server.max.threads</name> |
| <value>100000</value> |
| <description>Maximum number of worker threads in the Thrift server's |
| pool. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.server.tcp.keepalive</name> |
| <value>true</value> |
| <description>Whether to enable TCP keepalive for the metastore |
| server. Keepalive will prevent accumulation of half-open |
| connections. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.sasl.enabled</name> |
| <value>false</value> |
| <description>If true, the metastore thrift interface will be secured |
| with SASL. Clients must authenticate with Kerberos. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.thrift.framed.transport.enabled</name> |
| <value>false</value> |
| <description>If true, the metastore thrift interface will use |
| TFramedTransport. When false (default) a standard TTransport is |
| used. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.kerberos.keytab.file</name> |
| <value></value> |
| <description>The path to the Kerberos Keytab file containing the |
| metastore thrift server's service principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.kerberos.principal</name> |
| <value>hive-metastore/_HOST@EXAMPLE.COM</value> |
| <description>The service principal for the metastore thrift server. |
| The special string _HOST will be replaced automatically with the |
| correct host name. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cluster.delegation.token.store.class</name> |
| <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value> |
| <description>The delegation token store implementation. Set to |
| org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced |
| cluster. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cluster.delegation.token.store.zookeeper.connectString |
| </name> |
| <value>localhost:2181</value> |
| <description>The ZooKeeper token store connect string.</description> |
| </property> |
| |
| <property> |
| <name>hive.cluster.delegation.token.store.zookeeper.znode</name> |
| <value>/hive/cluster/delegation</value> |
| <description>The root path for token store data.</description> |
| </property> |
| |
| <property> |
| <name>hive.cluster.delegation.token.store.zookeeper.acl</name> |
| <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa |
| </value> |
| <description>ACL for token store entries. List comma separated all |
| server principals for the cluster. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.cache.pinobjtypes</name> |
| <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order |
| </value> |
| <description>List of comma separated metastore object types that |
| should be pinned in the cache |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.reducededuplication</name> |
| <value>true</value> |
| <description>Remove extra map-reduce jobs if the data is already |
| clustered by the same key which needs to be used again. This should |
| always be set to true. Since it is a new feature, it has been made |
| configurable. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.reducededuplication.min.reducer</name> |
| <value>4</value> |
| <description>Reduce deduplication merges two RSs by moving |
| key/parts/reducer-num of the child RS to parent RS. |
| That means if |
| reducer-num of the child RS is fixed (order by or forced |
| bucketing) |
| and small, it can make very slow, single MR. |
| The optimization will be |
| disabled if number of reducers is less than |
| specified value. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.dynamic.partition</name> |
| <value>true</value> |
| <description>Whether or not to allow dynamic partitions in DML/DDL. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.dynamic.partition.mode</name> |
| <value>strict</value> |
| <description>In strict mode, the user must specify at least one |
| static partition in case the user accidentally overwrites all |
| partitions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.max.dynamic.partitions</name> |
| <value>1000</value> |
| <description>Maximum number of dynamic partitions allowed to be |
| created in total. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.max.dynamic.partitions.pernode</name> |
| <value>100</value> |
| <description>Maximum number of dynamic partitions allowed to be |
| created in each mapper/reducer node. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.max.created.files</name> |
| <value>100000</value> |
| <description>Maximum number of HDFS files created by all |
| mappers/reducers in a MapReduce job. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.default.partition.name</name> |
| <value>__HIVE_DEFAULT_PARTITION__</value> |
| <description>The default partition name in case the dynamic partition |
| column value is null/empty string or anyother values that cannot be |
| escaped. This value must not contain any special character used in |
| HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that |
| the dynamic partition value should not contain this value to avoid |
| confusions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.dbclass</name> |
| <value>jdbc:derby</value> |
| <description>The default database that stores temporary hive |
| statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.autogather</name> |
| <value>true</value> |
| <description>A flag to gather statistics automatically during the |
| INSERT OVERWRITE command. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.jdbcdriver</name> |
| <value>org.apache.derby.jdbc.EmbeddedDriver</value> |
| <description>The JDBC driver for the database that stores temporary |
| hive statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.dbconnectionstring</name> |
| <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value> |
| <description>The default connection string for the database that |
| stores temporary hive statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.default.publisher</name> |
| <value></value> |
| <description>The Java class (implementing the StatsPublisher |
| interface) that is used by default if hive.stats.dbclass is not JDBC |
| or HBase. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.default.aggregator</name> |
| <value></value> |
| <description>The Java class (implementing the StatsAggregator |
| interface) that is used by default if hive.stats.dbclass is not JDBC |
| or HBase. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.jdbc.timeout</name> |
| <value>30</value> |
| <description>Timeout value (number of seconds) used by JDBC |
| connection and statements. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.retries.max</name> |
| <value>0</value> |
| <description>Maximum number of retries when stats |
| publisher/aggregator got an exception updating intermediate |
| database. Default is no tries on failures. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.retries.wait</name> |
| <value>3000</value> |
| <description>The base waiting window (in milliseconds) before the |
| next retry. The actual wait time is calculated by baseWindow * |
| failues baseWindow * (failure 1) * (random number between |
| [0.0,1.0]). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.reliable</name> |
| <value>false</value> |
| <description>Whether queries will fail because stats cannot be |
| collected completely accurately. |
| If this is set to true, |
| reading/writing from/into a partition may fail |
| becuase the stats |
| could not be computed accurately. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.collect.tablekeys</name> |
| <value>false</value> |
| <description>Whether join and group by keys on tables are derived and |
| maintained in the QueryPlan. |
| This is useful to identify how tables |
| are accessed and to determine if |
| they should be bucketed. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.collect.scancols</name> |
| <value>false</value> |
| <description>Whether column accesses are tracked in the QueryPlan. |
| This is useful to identify how tables are accessed and to determine |
| if there are wasted columns that can be trimmed. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.ndv.error</name> |
| <value>20.0</value> |
| <description>Standard error expressed in percentage. Provides a |
| tradeoff between accuracy and compute cost.A lower value for error |
| indicates higher accuracy and a higher compute cost. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.key.prefix.max.length</name> |
| <value>200</value> |
| <description> |
| Determines if when the prefix of the key used for |
| intermediate stats |
| collection |
| exceeds a certain length, a hash of the |
| key is used instead. If the value |
| < 0 then hashing |
| is never used, |
| if the value >= 0 then hashing is used only when the |
| key prefixes |
| length |
| exceeds that value. The key prefix is defined as everything |
| preceding the |
| task ID in the key. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.support.concurrency</name> |
| <value>false</value> |
| <description>Whether hive supports concurrency or not. A zookeeper |
| instance must be up and running for the default hive lock manager to |
| support read-write locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.lock.numretries</name> |
| <value>100</value> |
| <description>The number of times you want to try to get all the locks |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.unlock.numretries</name> |
| <value>10</value> |
| <description>The number of times you want to retry to do one unlock |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.lock.sleep.between.retries</name> |
| <value>60</value> |
| <description>The sleep time (in seconds) between various retries |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.quorum</name> |
| <value></value> |
| <description>The list of zookeeper servers to talk to. This is only |
| needed for read/write locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.client.port</name> |
| <value>2181</value> |
| <description>The port of zookeeper servers to talk to. This is only |
| needed for read/write locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.session.timeout</name> |
| <value>600000</value> |
| <description>Zookeeper client's session timeout. The client is |
| disconnected, and as a result, all locks released, if a heartbeat is |
| not sent in the timeout. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.namespace</name> |
| <value>hive_zookeeper_namespace</value> |
| <description>The parent node under which all zookeeper nodes are |
| created. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.clean.extra.nodes</name> |
| <value>false</value> |
| <description>Clean extra nodes at the end of the session. |
| </description> |
| </property> |
| |
| <property> |
| <name>fs.har.impl</name> |
| <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value> |
| <description>The implementation for accessing Hadoop Archives. Note |
| that this won't be applicable to Hadoop vers less than 0.20 |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.archive.enabled</name> |
| <value>false</value> |
| <description>Whether archiving operations are permitted</description> |
| </property> |
| |
| <property> |
| <name>hive.fetch.output.serde</name> |
| <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value> |
| <description>The serde used by FetchTask to serialize the fetch |
| output. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.mode.local.auto</name> |
| <value>false</value> |
| <description> Let hive determine whether to run in local mode |
| automatically |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.drop.ignorenonexistent</name> |
| <value>true</value> |
| <description> |
| Do not report an error if DROP TABLE/VIEW specifies a |
| non-existent |
| table/view |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.show.job.failure.debug.info</name> |
| <value>true</value> |
| <description> |
| If a job fails, whether to provide a link in the CLI to |
| the task with |
| the |
| most failures, along with debugging hints if |
| applicable. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.progress.timeout</name> |
| <value>0</value> |
| <description> |
| How long to run autoprogressor for the script/UDTF |
| operators (in |
| seconds). |
| Set to 0 for forever. |
| </description> |
| </property> |
| |
| <!-- HBase Storage Handler Parameters --> |
| |
| <property> |
| <name>hive.hbase.wal.enabled</name> |
| <value>true</value> |
| <description>Whether writes to HBase should be forced to the |
| write-ahead log. Disabling this improves HBase write performance at |
| the risk of lost writes in case of a crash. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.table.parameters.default</name> |
| <value></value> |
| <description>Default property values for newly created tables |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.entity.separator</name> |
| <value>@</value> |
| <description>Separator used to construct names of tables and |
| partitions. For example, dbname@tablename@partitionname |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.ddl.createtablelike.properties.whitelist</name> |
| <value></value> |
| <description>Table Properties to copy over when executing a Create |
| Table Like. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.variable.substitute</name> |
| <value>true</value> |
| <description>This enables substitution using syntax like ${var} |
| ${system:var} and ${env:var}. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.variable.substitute.depth</name> |
| <value>40</value> |
| <description>The maximum replacements the substitution engine will |
| do. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.conf.validation</name> |
| <value>true</value> |
| <description>Eables type checking for registered hive configurations |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.enabled</name> |
| <value>false</value> |
| <description>enable or disable the hive client authorization |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.createtable.user.grants</name> |
| <value></value> |
| <description>the privileges automatically granted to some users |
| whenever a table gets created. |
| An example like |
| "userX,userY:select;userZ:create" will grant select |
| privilege to |
| userX and userY, |
| and grant create privilege to userZ whenever a new |
| table created. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.createtable.group.grants</name> |
| <value></value> |
| <description>the privileges automatically granted to some groups |
| whenever a table gets created. |
| An example like |
| "groupX,groupY:select;groupZ:create" will grant select |
| privilege to |
| groupX and groupY, |
| and grant create privilege to groupZ whenever a |
| new table created. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.createtable.role.grants</name> |
| <value></value> |
| <description>the privileges automatically granted to some roles |
| whenever a table gets created. |
| An example like |
| "roleX,roleY:select;roleZ:create" will grant select |
| privilege to |
| roleX and roleY, |
| and grant create privilege to roleZ whenever a new |
| table created. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.createtable.owner.grants</name> |
| <value></value> |
| <description>the privileges automatically granted to the owner |
| whenever a table gets created. |
| An example like "select,drop" will |
| grant select and drop privilege to |
| the owner of the table |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.authorization.storage.checks</name> |
| <value>false</value> |
| <description>Should the metastore do authorization checks against the |
| underlying storage |
| for operations like drop-partition (disallow the |
| drop-partition if the |
| user in |
| question doesn't have permissions to |
| delete the corresponding directory |
| on the storage). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.error.on.empty.partition</name> |
| <value>false</value> |
| <description>Whether to throw an excpetion if dynamic partition |
| insert generates empty results. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.index.compact.file.ignore.hdfs</name> |
| <value>false</value> |
| <description>True the hdfs location stored in the index file will be |
| igbored at runtime. |
| If the data got moved or the name of the cluster |
| got changed, the |
| index data should still be usable. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.index.filter.compact.minsize</name> |
| <value>5368709120</value> |
| <description>Minimum size (in bytes) of the inputs on which a compact |
| index is automatically used. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.index.filter.compact.maxsize</name> |
| <value>-1</value> |
| <description>Maximum size (in bytes) of the inputs on which a compact |
| index is automatically used. |
| A negative number is equivalent to |
| infinity. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.index.compact.query.max.size</name> |
| <value>10737418240</value> |
| <description>The maximum number of bytes that a query using the |
| compact index can read. Negative value is equivalent to infinity. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.index.compact.query.max.entries</name> |
| <value>10000000</value> |
| <description>The maximum number of index entries to read during a |
| query that uses the compact index. Negative value is equivalent to |
| infinity. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.index.compact.binary.search</name> |
| <value>true</value> |
| <description>Whether or not to use a binary search to find the |
| entries in an index table that match the filter, where possible |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exim.uri.scheme.whitelist</name> |
| <value>hdfs,pfile</value> |
| <description>A comma separated list of acceptable URI schemes for |
| import and export. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.lock.mapred.only.operation</name> |
| <value>false</value> |
| <description>This param is to control whether or not only do lock on |
| queries |
| that need to execute at least one mapred job. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.limit.row.max.size</name> |
| <value>100000</value> |
| <description>When trying a smaller subset of data for simple LIMIT, |
| how much size we need to guarantee |
| each row to have at least. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.limit.optimize.limit.file</name> |
| <value>10</value> |
| <description>When trying a smaller subset of data for simple LIMIT, |
| maximum number of files we can |
| sample. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.limit.optimize.enable</name> |
| <value>false</value> |
| <description>Whether to enable to optimization to trying a smaller |
| subset of data for simple LIMIT first. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.limit.optimize.fetch.max</name> |
| <value>50000</value> |
| <description>Maximum number of rows allowed for a smaller subset of |
| data for simple LIMIT, if it is a fetch query. |
| Insert queries are not |
| restricted by this limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.rework.mapredwork</name> |
| <value>false</value> |
| <description>should rework the mapred work or not. |
| This is first |
| introduced by SymlinkTextInputFormat to replace symlink |
| files with |
| real paths at compile time. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.concatenate.check.index</name> |
| <value>true</value> |
| <description>If this sets to true, hive will throw error when doing |
| 'alter table tbl_name [partSpec] concatenate' on a table/partition |
| that has indexes on it. The reason the user want to set this to true |
| is because it can help user to avoid handling all index drop, |
| recreation, |
| rebuild work. This is very helpful for tables with |
| thousands of partitions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.sample.seednumber</name> |
| <value>0</value> |
| <description>A number used to percentage sampling. By changing this |
| number, user will change the subsets |
| of data sampled. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.io.exception.handlers</name> |
| <value></value> |
| <description>A list of io exception handler class names. This is used |
| to construct a list exception handlers to handle exceptions thrown |
| by record readers |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.autogen.columnalias.prefix.label</name> |
| <value>_c</value> |
| <description>String used as a prefix when auto generating column |
| alias. |
| By default the prefix label will be appended with a column |
| position |
| number to form the column alias. Auto generation would |
| happen if an |
| aggregate function is used in a select clause without an |
| explicit |
| alias. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.autogen.columnalias.prefix.includefuncname</name> |
| <value>false</value> |
| <description>Whether to include function name in the column alias |
| auto generated by hive. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.perf.logger</name> |
| <value>org.apache.hadoop.hive.ql.log.PerfLogger</value> |
| <description>The class responsible logging client side performance |
| metrics. Must be a subclass of |
| org.apache.hadoop.hive.ql.log.PerfLogger |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.start.cleanup.scratchdir</name> |
| <value>false</value> |
| <description>To cleanup the hive scratchdir while starting the hive |
| server |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.output.file.extension</name> |
| <value></value> |
| <description>String used as a file extension for output files. If not |
| set, defaults to the codec extension for text files (e.g. ".gz"), or |
| no extension otherwise. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.insert.into.multilevel.dirs</name> |
| <value>false</value> |
| <description>Where to insert into multilevel directories like |
| "insert |
| directory '/HIVEFT25686/chinna/' from table" |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.warehouse.subdir.inherit.perms</name> |
| <value>false</value> |
| <description>Set this to true if the the table directories should |
| inherit the |
| permission of the warehouse or database directory instead |
| of being created |
| with the permissions derived from dfs umask |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.job.debug.capture.stacktraces</name> |
| <value>true</value> |
| <description>Whether or not stack traces parsed from the task logs of |
| a sampled failed task for |
| each failed job should be stored in the |
| SessionState |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.driver.run.hooks</name> |
| <value></value> |
| <description>A comma separated list of hooks which implement |
| HiveDriverRunHook and will be run at the |
| beginning and end of |
| Driver.run, these will be run in the order specified |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.ddl.output.format</name> |
| <value>text</value> |
| <description> |
| The data format to use for DDL output. One of "text" |
| (for human |
| readable text) or "json" (for a json object). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.transform.escape.input</name> |
| <value>false</value> |
| <description> |
| This adds an option to escape special chars (newlines, |
| carriage returns |
| and |
| tabs) when they are passed to the user script. |
| This is useful if the hive |
| tables |
| can contain data that contains |
| special characters. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.rcfile.use.explicit.header</name> |
| <value>true</value> |
| <description> |
| If this is set the header for RC Files will simply be |
| RCF. If this is |
| not |
| set the header will be that borrowed from sequence |
| files, e.g. SEQ- |
| followed |
| by the input and output RC File formats. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.multi.insert.move.tasks.share.dependencies</name> |
| <value>false</value> |
| <description> |
| If this is set all move tasks for tables/partitions (not |
| directories) |
| at the end of a |
| multi-insert query will only begin once |
| the dependencies for all these move |
| tasks have been |
| met. |
| Advantages: If |
| concurrency is enabled, the locks will only be released once the |
| query has |
| finished, so with this config enabled, the time when the |
| table/partition is |
| generated will be much closer to when the lock on |
| it is released. |
| Disadvantages: If concurrency is not enabled, with |
| this disabled, |
| the tables/partitions which |
| are produced by this query |
| and finish earlier will be available for |
| querying |
| much earlier. Since |
| the locks are only released once the query finishes, |
| this |
| does not |
| apply if concurrency is enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.fetch.task.conversion</name> |
| <value>minimal</value> |
| <description> |
| Some select queries can be converted to single FETCH |
| task minimizing |
| latency. |
| Currently the query should be single sourced |
| not having any subquery and |
| should not have |
| any aggregations or |
| distincts (which incurrs RS), lateral views and |
| joins. |
| 1. minimal : |
| SELECT STAR, FILTER on partition columns, LIMIT only |
| 2. more : |
| SELECT, FILTER, LIMIT only (TABLESAMPLE, virtual columns) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hmshandler.retry.attempts</name> |
| <value>1</value> |
| <description>The number of times to retry a HMSHandler call if there |
| were a connection error |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hmshandler.retry.interval</name> |
| <value>1000</value> |
| <description>The number of miliseconds between HMSHandler retry |
| attempts |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server.read.socket.timeout</name> |
| <value>10</value> |
| <description>Timeout for the HiveServer to close the connection if no |
| response from the client in N seconds, defaults to 10 seconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server.tcp.keepalive</name> |
| <value>true</value> |
| <description>Whether to enable TCP keepalive for the Hive server. |
| Keepalive will prevent accumulation of half-open connections. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.decode.partition.name</name> |
| <value>false</value> |
| <description>Whether to show the unquoted partition names in query |
| results. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.log4j.file</name> |
| <value></value> |
| <description>Hive log4j configuration file. |
| If the property is not |
| set, then logging will be initialized using |
| hive-log4j.properties |
| found on the classpath. |
| If the property is set, the value must be a |
| valid URI (java.net.URI, |
| e.g. "file:///tmp/my-logging.properties"), |
| which you can then |
| extract a URL from and pass to |
| PropertyConfigurator.configure(URL). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.log4j.file</name> |
| <value></value> |
| <description>Hive log4j configuration file for execution mode(sub |
| command). |
| If the property is not set, then logging will be |
| initialized using |
| hive-exec-log4j.properties found on the classpath. |
| If the property is set, the value must be a valid URI (java.net.URI, |
| e.g. "file:///tmp/my-logging.properties"), which you can then |
| extract a URL from and pass to PropertyConfigurator.configure(URL). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.infer.bucket.sort</name> |
| <value>false</value> |
| <description> |
| If this is set, when writing partitions, the metadata |
| will include the |
| bucketing/sorting |
| properties with which the data was |
| written if any (this will not overwrite the |
| metadata |
| inherited from |
| the table if the table is bucketed/sorted) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name> |
| <value>false</value> |
| <description> |
| If this is set, when setting the number of reducers for |
| the map reduce |
| task which writes the |
| final output files, it will |
| choose a number which is a power of two, |
| unless the user specifies |
| the number of reducers to use using mapred.reduce.tasks. The number |
| of |
| reducers |
| may be set to a power of two, only to be followed by a |
| merge task |
| meaning preventing |
| anything from being inferred. |
| With |
| hive.exec.infer.bucket.sort set to true: |
| Advantages: If this is not |
| set, the number of buckets for partitions will seem |
| arbitrary, |
| which |
| means that the number of mappers used for optimized joins, for |
| example, will |
| be very low. With this set, since the number of buckets |
| used for any |
| partition is |
| a power of two, the number of mappers used |
| for optimized joins will |
| be the least |
| number of buckets used by any |
| partition being joined. |
| Disadvantages: This may mean a much larger or |
| much smaller number of reducers |
| being used in the |
| final map reduce |
| job, e.g. if a job was originally going to take 257 |
| reducers, |
| it will |
| now take 512 reducers, similarly if the max number of reducers |
| is |
| 511, |
| and a job was going to use this many, it will now use 256 |
| reducers. |
| |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.groupby.orderby.position.alias</name> |
| <value>false</value> |
| <description>Whether to enable using Column Position Alias in Group |
| By or Order By |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.thrift.min.worker.threads</name> |
| <value>5</value> |
| <description>Minimum number of Thrift worker threads</description> |
| </property> |
| |
| <property> |
| <name>hive.server2.thrift.max.worker.threads</name> |
| <value>100</value> |
| <description>Maximum number of Thrift worker threads</description> |
| </property> |
| |
| <property> |
| <name>hive.server2.thrift.port</name> |
| <value>10000</value> |
| <description>Port number of HiveServer2 Thrift interface. |
| Can be |
| overridden by setting $HIVE_SERVER2_THRIFT_PORT |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.thrift.bind.host</name> |
| <value>localhost</value> |
| <description>Bind host on which to run the HiveServer2 Thrift |
| interface. |
| Can be overridden by setting |
| $HIVE_SERVER2_THRIFT_BIND_HOST |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.authentication</name> |
| <value>NONE</value> |
| <description> |
| Client authentication types. |
| NONE: no authentication |
| check |
| LDAP: LDAP/AD based authentication |
| KERBEROS: Kerberos/GSSAPI |
| authentication |
| CUSTOM: Custom authentication provider |
| (Use with |
| property hive.server2.custom.authentication.class) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.custom.authentication.class</name> |
| <value></value> |
| <description> |
| Custom authentication class. Used when property |
| 'hive.server2.authentication' is set to 'CUSTOM'. Provided class |
| must be a proper implementation of the interface |
| org.apache.hive.service.auth.PasswdAuthenticationProvider. |
| HiveServer2 |
| will call its Authenticate(user, passed) method to |
| authenticate |
| requests. |
| The implementation may optionally extend the |
| Hadoop's |
| org.apache.hadoop.conf.Configured class to grab Hive's |
| Configuration |
| object. |
| </description> |
| </property> |
| |
| <property> |
| <name>>hive.server2.authentication.kerberos.principal</name> |
| <value></value> |
| <description> |
| Kerberos server principal |
| </description> |
| </property> |
| |
| <property> |
| <name>>hive.server2.authentication.kerberos.keytab</name> |
| <value></value> |
| <description> |
| Kerberos keytab file for server principal |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.authentication.ldap.url</name> |
| <value></value> |
| <description> |
| LDAP connection URL |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.authentication.ldap.baseDN</name> |
| <value></value> |
| <description> |
| LDAP base DN |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.enable.doAs</name> |
| <value>true</value> |
| <description> |
| Setting this property to true will have hive server2 |
| execute |
| hive operations as the user making the calls to it. |
| </description> |
| </property> |
| |
| |
| </configuration> |
| |
| <!-- Hive Execution Parameters --> |
| <property> |
| <name>mapred.reduce.tasks</name> |
| <value>-1</value> |
| <description>The default number of reduce tasks per job. Typically set |
| to a prime close to the number of available hosts. Ignored when |
| mapred.job.tracker is "local". Hadoop set this to 1 by default, |
| whereas hive uses -1 as its default value. |
| By setting this property to |
| -1, Hive will automatically figure out what |
| should be the number of |
| reducers. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.reducers.bytes.per.reducer</name> |
| <value>1000000000</value> |
| <description>size per reducer.The default is 1G, i.e if the input size |
| is 10G, it will use 10 reducers. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.reducers.max</name> |
| <value>999</value> |
| <description>max number of reducers will be used. If the one |
| specified |
| in the configuration parameter mapred.reduce.tasks is |
| negative, hive |
| will use this one as the max number of reducers when |
| automatically |
| determine number of reducers. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cli.print.header</name> |
| <value>false</value> |
| <description>Whether to print the names of the columns in query |
| output. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cli.print.current.db</name> |
| <value>false</value> |
| <description>Whether to include the current database in the hive |
| prompt. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cli.prompt</name> |
| <value>hive</value> |
| <description>Command line prompt configuration value. Other hiveconf |
| can be used in |
| this configuration value. Variable substitution will |
| only be invoked at |
| the hive |
| cli startup. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cli.pretty.output.num.cols</name> |
| <value>-1</value> |
| <description>The number of columns to use when formatting output |
| generated |
| by the DESCRIBE PRETTY table_name command. If the value of |
| this |
| property |
| is -1, then hive will use the auto-detected terminal |
| width. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.scratchdir</name> |
| <value>/tmp/hive-${user.name}</value> |
| <description>Scratch space for Hive jobs</description> |
| </property> |
| |
| <property> |
| <name>hive.exec.local.scratchdir</name> |
| <value>/tmp/${user.name}</value> |
| <description>Local scratch space for Hive jobs</description> |
| </property> |
| |
| <property> |
| <name>hive.test.mode</name> |
| <value>false</value> |
| <description>whether hive is running in test mode. If yes, it turns on |
| sampling and prefixes the output tablename |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.test.mode.prefix</name> |
| <value>test_</value> |
| <description>if hive is running in test mode, prefixes the output |
| table by this string |
| </description> |
| </property> |
| |
| <!-- If the input table is not bucketed, the denominator of the tablesample |
| is determinied by the parameter below --> |
| <!-- For example, the following query: --> |
| <!-- INSERT OVERWRITE TABLE dest --> |
| <!-- SELECT col1 from src --> |
| <!-- would be converted to --> |
| <!-- INSERT OVERWRITE TABLE test_dest --> |
| <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) --> |
| <property> |
| <name>hive.test.mode.samplefreq</name> |
| <value>32</value> |
| <description>if hive is running in test mode and table is not |
| bucketed, sampling frequency |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.test.mode.nosamplelist</name> |
| <value></value> |
| <description>if hive is running in test mode, dont sample the above |
| comma seperated list of tables |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.uris</name> |
| <value></value> |
| <description>Thrift uri for the remote metastore. Used by metastore |
| client to connect to remote metastore. |
| </description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.ConnectionURL</name> |
| <value>jdbc:derby:;databaseName=metastore_db;create=true</value> |
| <description>JDBC connect string for a JDBC metastore</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.ConnectionDriverName</name> |
| <value>org.apache.derby.jdbc.EmbeddedDriver</value> |
| <description>Driver class name for a JDBC metastore</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.PersistenceManagerFactoryClass</name> |
| <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value> |
| <description>class implementing the jdo persistence</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.DetachAllOnCommit</name> |
| <value>true</value> |
| <description>detaches all objects from session so that they can be |
| used after transaction is committed |
| </description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.NonTransactionalRead</name> |
| <value>true</value> |
| <description>reads outside of transactions</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.ConnectionUserName</name> |
| <value>APP</value> |
| <description>username to use against metastore database</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.ConnectionPassword</name> |
| <value>mine</value> |
| <description>password to use against metastore database</description> |
| </property> |
| |
| <property> |
| <name>javax.jdo.option.Multithreaded</name> |
| <value>true</value> |
| <description>Set this to true if multiple threads access metastore |
| through JDO concurrently. |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.connectionPoolingType</name> |
| <value>DBCP</value> |
| <description>Uses a DBCP connection pool for JDBC metastore |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.validateTables</name> |
| <value>false</value> |
| <description>validates existing schema against code. turn this on if |
| you want to verify existing schema |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.validateColumns</name> |
| <value>false</value> |
| <description>validates existing schema against code. turn this on if |
| you want to verify existing schema |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.validateConstraints</name> |
| <value>false</value> |
| <description>validates existing schema against code. turn this on if |
| you want to verify existing schema |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.storeManagerType</name> |
| <value>rdbms</value> |
| <description>metadata store type</description> |
| </property> |
| |
| <property> |
| <name>datanucleus.autoCreateSchema</name> |
| <value>true</value> |
| <description>creates necessary schema on a startup if one doesn't |
| exist. set this to false, after creating it once |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.autoStartMechanismMode</name> |
| <value>checked</value> |
| <description>throw exception if metadata tables are incorrect |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.transactionIsolation</name> |
| <value>read-committed</value> |
| <description>Default transaction isolation level for identity |
| generation. |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.cache.level2</name> |
| <value>false</value> |
| <description>Use a level 2 cache. Turn this off if metadata is changed |
| independently of hive metastore server |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.cache.level2.type</name> |
| <value>SOFT</value> |
| <description>SOFT=soft reference based cache, WEAK=weak reference |
| based cache. |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.identifierFactory</name> |
| <value>datanucleus</value> |
| <description>Name of the identifier factory to use when generating |
| table/column names etc. 'datanucleus' is used for backward |
| compatibility |
| </description> |
| </property> |
| |
| <property> |
| <name>datanucleus.plugin.pluginRegistryBundleCheck</name> |
| <value>LOG</value> |
| <description>Defines what happens when plugin bundles are found and |
| are duplicated [EXCEPTION|LOG|NONE] |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.warehouse.dir</name> |
| <value>/user/hive/warehouse</value> |
| <description>location of default database for the warehouse |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.execute.setugi</name> |
| <value>false</value> |
| <description>In unsecure mode, setting this property to true will |
| cause the metastore to execute DFS operations using the client's |
| reported user and group permissions. Note that this property must be |
| set on both the client and server sides. Further note that its best |
| effort. If client sets its to true and server sets it to false, |
| client setting will be ignored. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.event.listeners</name> |
| <value></value> |
| <description>list of comma seperated listeners for metastore events. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.partition.inherit.table.properties</name> |
| <value></value> |
| <description>list of comma seperated keys occurring in table |
| properties which will get inherited to newly created partitions. * |
| implies all the keys will get inherited. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metadata.export.location</name> |
| <value></value> |
| <description>When used in conjunction with the |
| org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event |
| listener, it is the location to which the metadata will be exported. |
| The default is an empty string, which results in the metadata being |
| exported to the current user's home directory on HDFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metadata.move.exported.metadata.to.trash</name> |
| <value></value> |
| <description>When used in conjunction with the |
| org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event |
| listener, this setting determines if the metadata that is exported |
| will subsequently be moved to the user's trash directory alongside |
| the dropped table data. This ensures that the metadata will be |
| cleaned up along with the dropped table data. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.partition.name.whitelist.pattern</name> |
| <value></value> |
| <description>Partition names will be checked against this regex |
| pattern and rejected if not matched. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.end.function.listeners</name> |
| <value></value> |
| <description>list of comma separated listeners for the end of |
| metastore functions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.event.expiry.duration</name> |
| <value>0</value> |
| <description>Duration after which events expire from events table (in |
| seconds) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.event.clean.freq</name> |
| <value>0</value> |
| <description>Frequency at which timer task runs to purge expired |
| events in metastore(in seconds). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.connect.retries</name> |
| <value>5</value> |
| <description>Number of retries while opening a connection to metastore |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.failure.retries</name> |
| <value>3</value> |
| <description>Number of retries upon failure of Thrift metastore calls |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.client.connect.retry.delay</name> |
| <value>1</value> |
| <description>Number of seconds for the client to wait between |
| consecutive connection attempts |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.client.socket.timeout</name> |
| <value>20</value> |
| <description>MetaStore Client socket timeout in seconds</description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.rawstore.impl</name> |
| <value>org.apache.hadoop.hive.metastore.ObjectStore</value> |
| <description>Name of the class that implements |
| org.apache.hadoop.hive.metastore.rawstore interface. This class is |
| used to store and retrieval of raw metadata objects such as table, |
| database |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.batch.retrieve.max</name> |
| <value>300</value> |
| <description>Maximum number of objects (tables/partitions) can be |
| retrieved from metastore in one batch. The higher the number, the |
| less the number of round trips is needed to the Hive metastore |
| server, but it may also cause higher memory requirement at the client |
| side. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.batch.retrieve.table.partition.max</name> |
| <value>1000</value> |
| <description>Maximum number of table partitions that metastore |
| internally retrieves in one batch. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.default.fileformat</name> |
| <value>TextFile</value> |
| <description>Default file format for CREATE TABLE statement. Options |
| are TextFile and SequenceFile. Users can explicitly say CREATE TABLE |
| ... STORED AS <TEXTFILE|SEQUENCEFILE> to override</description> |
| </property> |
| |
| <property> |
| <name>hive.fileformat.check</name> |
| <value>true</value> |
| <description>Whether to check file format or not when loading data |
| files |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.aggr</name> |
| <value>true</value> |
| <description>Whether to use map-side aggregation in Hive Group By |
| queries |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.groupby.skewindata</name> |
| <value>false</value> |
| <description>Whether there is skew in data to optimize group by |
| queries |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.multigroupby.common.distincts</name> |
| <value>true</value> |
| <description>Whether to optimize a multi-groupby query with the same |
| distinct. |
| Consider a query like: |
| |
| from src |
| insert overwrite table dest1 |
| select col1, count(distinct colx) group by |
| col1 |
| insert overwrite table |
| dest2 select col2, count(distinct colx) group by |
| col2; |
| |
| With this |
| parameter set to true, first we spray by the distinct value |
| (colx), |
| and then |
| perform the 2 groups bys. This makes sense if map-side |
| aggregation is turned |
| off. However, |
| with maps-side aggregation, it |
| might be useful in some cases to treat the |
| 2 inserts independently, |
| thereby performing the query above in 2MR jobs instead of 3 (due to |
| spraying |
| by distinct key first). |
| If this parameter is turned off, we |
| dont consider the fact that the |
| distinct key is the same across |
| different MR jobs. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.groupby.mapaggr.checkinterval</name> |
| <value>100000</value> |
| <description>Number of rows after which size of the grouping |
| keys/aggregation classes is performed |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapred.local.mem</name> |
| <value>0</value> |
| <description>For local mode, memory of the mappers/reducers |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name> |
| <value>0.3</value> |
| <description>Portion of total memory to be used by map-side grup |
| aggregation hash table, when this group by is followed by map join |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.aggr.hash.force.flush.memory.threshold</name> |
| <value>0.9</value> |
| <description>The max memory to be used by map-side grup aggregation |
| hash table, if the memory usage is higher than this number, force to |
| flush data |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.aggr.hash.percentmemory</name> |
| <value>0.5</value> |
| <description>Portion of total memory to be used by map-side grup |
| aggregation hash table |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.aggr.hash.min.reduction</name> |
| <value>0.5</value> |
| <description>Hash aggregation will be turned off if the ratio between |
| hash |
| table size and input rows is bigger than this number. Set to 1 to |
| make |
| sure |
| hash aggregation is never turned off. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.cp</name> |
| <value>true</value> |
| <description>Whether to enable column pruner</description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.index.filter</name> |
| <value>false</value> |
| <description>Whether to enable automatic use of indexes</description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.index.groupby</name> |
| <value>false</value> |
| <description>Whether to enable optimization of group-by queries using |
| Aggregate indexes. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.ppd</name> |
| <value>true</value> |
| <description>Whether to enable predicate pushdown</description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.ppd.storage</name> |
| <value>true</value> |
| <description>Whether to push predicates down into storage handlers. |
| Ignored when hive.optimize.ppd is false. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.ppd.recognizetransivity</name> |
| <value>true</value> |
| <description>Whether to transitively replicate predicate filters over |
| equijoin conditions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.groupby</name> |
| <value>true</value> |
| <description>Whether to enable the bucketed group by from bucketed |
| partitions/tables. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.skewjoin.compiletime</name> |
| <value>false</value> |
| <description>Whether to create a separate plan for skewed keys for the |
| tables in the join. |
| This is based on the skewed keys stored in the |
| metadata. At compile time, |
| the plan is broken |
| into different joins: one |
| for the skewed keys, and the other for the |
| remaining keys. And then, |
| a |
| union is performed for the 2 joins generated above. So unless the |
| same skewed key is present |
| in both the joined tables, the join for the |
| skewed key will be |
| performed as a map-side join. |
| |
| The main difference |
| between this paramater and hive.optimize.skewjoin is |
| that this |
| parameter |
| uses the skew information stored in the metastore to |
| optimize the plan at |
| compile time itself. |
| If there is no skew |
| information in the metadata, this parameter will |
| not have any affect. |
| Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin |
| should |
| be set to true. |
| Ideally, hive.optimize.skewjoin should be |
| renamed as |
| hive.optimize.skewjoin.runtime, but not doing |
| so for |
| backward compatibility. |
| |
| If the skew information is correctly stored in |
| the metadata, |
| hive.optimize.skewjoin.compiletime |
| would change the query |
| plan to take care of it, and hive.optimize.skewjoin |
| will be a no-op. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.union.remove</name> |
| <value>false</value> |
| <description> |
| Whether to remove the union and push the operators |
| between union and the |
| filesink above |
| union. This avoids an extra scan |
| of the output by union. This is |
| independently useful for union |
| queries, and specially useful when hive.optimize.skewjoin.compiletime |
| is set |
| to true, since an |
| extra union is inserted. |
| |
| The merge is triggered |
| if either of hive.merge.mapfiles or |
| hive.merge.mapredfiles is set to |
| true. |
| If the user has set hive.merge.mapfiles to true and |
| hive.merge.mapredfiles to false, the idea was the |
| number of reducers |
| are few, so the number of files anyway are small. |
| However, with this |
| optimization, |
| we are increasing the number of files possibly by a big |
| margin. So, we |
| merge aggresively. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapred.supports.subdirectories</name> |
| <value>false</value> |
| <description>Whether the version of hadoop which is running supports |
| sub-directories for tables/partitions. |
| Many hive optimizations can be |
| applied if the hadoop version supports |
| sub-directories for |
| tables/partitions. It was added by MAPREDUCE-1501 |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.multigroupby.singlemr</name> |
| <value>false</value> |
| <description>Whether to optimize multi group by query to generate |
| single M/R |
| job plan. If the multi group by query has common group by |
| keys, it will |
| be |
| optimized to generate single M/R job. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.groupby.sorted</name> |
| <value>false</value> |
| <description>If the bucketing/sorting properties of the table exactly |
| match the grouping key, whether to |
| perform the group by in the mapper |
| by using BucketizedHiveInputFormat. The |
| only downside to this |
| is that |
| it limits the number of mappers to the number of files. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.map.groupby.sorted.testmode</name> |
| <value>false</value> |
| <description>If the bucketing/sorting properties of the table exactly |
| match the grouping key, whether to |
| perform the group by in the mapper |
| by using BucketizedHiveInputFormat. If |
| the test mode is set, the plan |
| is not converted, but a query property is set to denote the same. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.new.job.grouping.set.cardinality</name> |
| <value>30</value> |
| <description> |
| Whether a new map-reduce job should be launched for |
| grouping |
| sets/rollups/cubes. |
| For a query like: select a, b, c, count(1) |
| from T group by a, b, c with |
| rollup; |
| 4 rows are created per row: (a, b, |
| c), (a, b, null), (a, null, null), |
| (null, null, null). |
| This can lead to |
| explosion across map-reduce boundary if the cardinality |
| of T is very |
| high, |
| and map-side aggregation does not do a very good job. |
| |
| This |
| parameter decides if hive should add an additional map-reduce job. |
| If |
| the grouping set |
| cardinality (4 in the example above), is more than |
| this value, a new MR job is |
| added under the |
| assumption that the orginal |
| group by will reduce the data size. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.join.emit.interval</name> |
| <value>1000</value> |
| <description>How many rows in the right-most join operand Hive should |
| buffer before emitting the join result. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.join.cache.size</name> |
| <value>25000</value> |
| <description>How many rows in the joining tables (except the streaming |
| table) should be cached in memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.bucket.cache.size</name> |
| <value>100</value> |
| <description>How many values in each keys in the map-joined table |
| should be cached in memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.cache.numrows</name> |
| <value>25000</value> |
| <description>How many rows should be cached by jdbm for map join. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.skewjoin</name> |
| <value>false</value> |
| <description>Whether to enable skew join optimization. |
| The algorithm is |
| as follows: At runtime, detect the keys with a large |
| skew. Instead of |
| processing those keys, store them temporarily in a hdfs directory. In |
| a |
| follow-up map-reduce |
| job, process those skewed keys. The same key |
| need not be skewed for all |
| the tables, and so, |
| the follow-up map-reduce |
| job (for the skewed keys) would be much faster, |
| since it would be a |
| map-join. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.skewjoin.key</name> |
| <value>100000</value> |
| <description>Determine if we get a skew key in join. If we see more |
| than the specified number of rows with the same key in join operator, |
| we think the key as a skew join key. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.skewjoin.mapjoin.map.tasks</name> |
| <value>10000</value> |
| <description> Determine the number of map task used in the follow up |
| map join job |
| for a skew join. It should be used together with |
| hive.skewjoin.mapjoin.min.split |
| to perform a fine grained control. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.skewjoin.mapjoin.min.split</name> |
| <value>33554432</value> |
| <description> Determine the number of map task at most used in the |
| follow up map join job |
| for a skew join by specifying the minimum split |
| size. It should be used |
| together with |
| hive.skewjoin.mapjoin.map.tasks |
| to perform a fine grained control. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapred.mode</name> |
| <value>nonstrict</value> |
| <description>The mode in which the hive operations are being |
| performed. |
| In strict mode, some risky queries are not allowed to run. |
| They |
| include: |
| Cartesian Product. |
| No partition being picked up for a |
| query. |
| Comparing bigints and strings. |
| Comparing bigints and doubles. |
| Orderby without limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.enforce.bucketmapjoin</name> |
| <value>false</value> |
| <description>If the user asked for bucketed map-side join, and it |
| cannot be performed, |
| should the query fail or not ? For eg, if the |
| buckets in the tables being |
| joined are |
| not a multiple of each other, |
| bucketed map-side join cannot be |
| performed, and the |
| query will fail if |
| hive.enforce.bucketmapjoin is set to true. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.script.maxerrsize</name> |
| <value>100000</value> |
| <description>Maximum number of bytes a script is allowed to emit to |
| standard error (per map-reduce task). This prevents runaway scripts |
| from filling logs partitions to capacity |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.script.allow.partial.consumption</name> |
| <value>false</value> |
| <description> When enabled, this option allows a user script to exit |
| successfully without consuming all the data from the standard input. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.operator.id.env.var</name> |
| <value>HIVE_SCRIPT_OPERATOR_ID</value> |
| <description> Name of the environment variable that holds the unique |
| script operator ID in the user's transform function (the custom |
| mapper/reducer that the user has specified in the query) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.operator.truncate.env</name> |
| <value>false</value> |
| <description>Truncate each environment variable for external script in |
| scripts operator to 20KB (to fit system limits) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.compress.output</name> |
| <value>false</value> |
| <description> This controls whether the final outputs of a query (to a |
| local/hdfs file or a hive table) is compressed. The compression codec |
| and other options are determined from hadoop config variables |
| mapred.output.compress* |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.compress.intermediate</name> |
| <value>false</value> |
| <description> This controls whether intermediate files produced by |
| hive between multiple map-reduce jobs are compressed. The compression |
| codec and other options are determined from hadoop config variables |
| mapred.output.compress* |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.parallel</name> |
| <value>false</value> |
| <description>Whether to execute jobs in parallel</description> |
| </property> |
| |
| <property> |
| <name>hive.exec.parallel.thread.number</name> |
| <value>8</value> |
| <description>How many jobs at most can be executed in parallel |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.rowoffset</name> |
| <value>false</value> |
| <description>Whether to provide the row offset virtual column |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.task.progress</name> |
| <value>false</value> |
| <description>Whether Hive should periodically update task progress |
| counters during execution. Enabling this allows task progress to be |
| monitored more closely in the job tracker, but may impose a |
| performance penalty. This flag is automatically set to true for jobs |
| with hive.exec.dynamic.partition set to true. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hwi.war.file</name> |
| <value>lib/hive-hwi-@VERSION@.war</value> |
| <description>This sets the path to the HWI war file, relative to |
| ${HIVE_HOME}. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hwi.listen.host</name> |
| <value>0.0.0.0</value> |
| <description>This is the host address the Hive Web Interface will |
| listen on |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hwi.listen.port</name> |
| <value>9999</value> |
| <description>This is the port the Hive Web Interface will listen on |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.pre.hooks</name> |
| <value></value> |
| <description>Comma-separated list of pre-execution hooks to be invoked |
| for each statement. A pre-execution hook is specified as the name of |
| a Java class which implements the |
| org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.post.hooks</name> |
| <value></value> |
| <description>Comma-separated list of post-execution hooks to be |
| invoked for each statement. A post-execution hook is specified as the |
| name of a Java class which implements the |
| org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.failure.hooks</name> |
| <value></value> |
| <description>Comma-separated list of on-failure hooks to be invoked |
| for each statement. An on-failure hook is specified as the name of |
| Java class which implements the |
| org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.init.hooks</name> |
| <value></value> |
| <description>A comma separated list of hooks to be invoked at the |
| beginning of HMSHandler initialization. Aninit hook is specified as |
| the name of Java class which extends |
| org.apache.hadoop.hive.metastore.MetaStoreInitListener. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.client.stats.publishers</name> |
| <value></value> |
| <description>Comma-separated list of statistics publishers to be |
| invoked on counters on each job. A client stats publisher is |
| specified as the name of a Java class which implements the |
| org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.client.stats.counters</name> |
| <value></value> |
| <description>Subset of counters that should be of interest for |
| hive.client.stats.publishers (when one wants to limit their |
| publishing). Non-display names should be used |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.merge.mapfiles</name> |
| <value>true</value> |
| <description>Merge small files at the end of a map-only job |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.merge.mapredfiles</name> |
| <value>false</value> |
| <description>Merge small files at the end of a map-reduce job |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.heartbeat.interval</name> |
| <value>1000</value> |
| <description>Send a heartbeat after this interval - used by mapjoin |
| and filter operators |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.merge.size.per.task</name> |
| <value>256000000</value> |
| <description>Size of merged files at the end of the job</description> |
| </property> |
| |
| <property> |
| <name>hive.merge.smallfiles.avgsize</name> |
| <value>16000000</value> |
| <description>When the average output file size of a job is less than |
| this number, Hive will start an additional map-reduce job to merge |
| the output files into bigger files. This is only done for map-only |
| jobs if hive.merge.mapfiles is true, and for map-reduce jobs if |
| hive.merge.mapredfiles is true. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.smalltable.filesize</name> |
| <value>25000000</value> |
| <description>The threshold for the input file size of the small |
| tables; if the file size is smaller than this threshold, it will try |
| to convert the common join into map join |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.ignore.mapjoin.hint</name> |
| <value>true</value> |
| <description>Ignore the mapjoin hint</description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.localtask.max.memory.usage</name> |
| <value>0.90</value> |
| <description>This number means how much memory the local task can take |
| to hold the key/value into in-memory hash table; If the local task's |
| memory usage is more than this number, the local task will be abort |
| by themself. It means the data of small table is too large to be hold |
| in the memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name> |
| <value>0.55</value> |
| <description>This number means how much memory the local task can take |
| to hold the key/value into in-memory hash table when this map join |
| followed by a group by; If the local task's memory usage is more than |
| this number, the local task will be abort by themself. It means the |
| data of small table is too large to be hold in the memory. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapjoin.check.memory.rows</name> |
| <value>100000</value> |
| <description>The number means after how many rows processed it needs |
| to check the memory usage |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.join</name> |
| <value>false</value> |
| <description>Whether Hive enable the optimization about converting |
| common join into mapjoin based on the input file size |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.join.noconditionaltask</name> |
| <value>true</value> |
| <description>Whether Hive enable the optimization about converting |
| common join into mapjoin based on the input file |
| size. If this |
| paramater is on, and the sum of size for n-1 of the |
| tables/partitions |
| for a n-way join is smaller than the |
| specified size, the join is |
| directly converted to a mapjoin (there is no |
| conditional task). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.join.noconditionaltask.size</name> |
| <value>10000000</value> |
| <description>If hive.auto.convert.join.noconditionaltask is off, this |
| parameter does not take affect. However, if it |
| is on, and the sum of |
| size for n-1 of the tables/partitions for a n-way |
| join is smaller than |
| this size, the join is directly |
| converted to a mapjoin(there is no |
| conditional task). The default is 10MB |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.mapjoin.mapreduce</name> |
| <value>false</value> |
| <description>If hive.auto.convert.join is off, this parameter does not |
| take |
| affect. If it is on, and if there are map-join jobs followed by a |
| map-reduce |
| job (for e.g a group by), each map-only job is merged with |
| the |
| following |
| map-reduce job. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.auto.progress</name> |
| <value>false</value> |
| <description>Whether Hive Tranform/Map/Reduce Clause should |
| automatically send progress information to TaskTracker to avoid the |
| task getting killed because of inactivity. Hive sends progress |
| information when the script is outputting to stderr. This option |
| removes the need of periodically producing stderr messages, but users |
| should be cautious because this may prevent infinite loops in the |
| scripts to be killed by TaskTracker. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.serde</name> |
| <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value> |
| <description>The default serde for trasmitting input data to and |
| reading output data from the user scripts. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.binary.record.max.length</name> |
| <value>1000</value> |
| <description>Read from a binary stream and treat each |
| hive.binary.record.max.length bytes as a record. |
| The last record |
| before the end of stream can have less than |
| hive.binary.record.max.length bytes |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>hive.script.recordreader</name> |
| <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value> |
| <description>The default record reader for reading data from the user |
| scripts. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.script.recordwriter</name> |
| <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value> |
| <description>The default record writer for writing data to the user |
| scripts. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.input.format</name> |
| <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value> |
| <description>The default input format. Set this to HiveInputFormat if |
| you encounter problems with CombineHiveInputFormat. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.udtf.auto.progress</name> |
| <value>false</value> |
| <description>Whether Hive should automatically send progress |
| information to TaskTracker when using UDTF's to prevent the task |
| getting killed because of inactivity. Users should be cautious |
| because this may prevent TaskTracker from killing tasks with infinte |
| loops. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.mapred.reduce.tasks.speculative.execution</name> |
| <value>true</value> |
| <description>Whether speculative execution for reducers should be |
| turned on. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.counters.pull.interval</name> |
| <value>1000</value> |
| <description>The interval with which to poll the JobTracker for the |
| counters the running job. The smaller it is the more load there will |
| be on the jobtracker, the higher it is the less granular the caught |
| will be. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.querylog.location</name> |
| <value>/tmp/${user.name}</value> |
| <description> |
| Location of Hive run time structured log file |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.querylog.enable.plan.progress</name> |
| <value>true</value> |
| <description> |
| Whether to log the plan's progress every time a job's |
| progress is checked. |
| These logs are written to the location specified |
| by |
| hive.querylog.location |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.querylog.plan.progress.interval</name> |
| <value>60000</value> |
| <description> |
| The interval to wait between logging the plan's progress |
| in |
| milliseconds. |
| If there is a whole number percentage change in the |
| progress of the |
| mappers or the reducers, |
| the progress is logged |
| regardless of this value. |
| The actual interval will be the ceiling of |
| (this value divided by the |
| value of |
| hive.exec.counters.pull.interval) |
| multiplied by the value of hive.exec.counters.pull.interval |
| I.e. if it |
| is not divide evenly by the value of |
| hive.exec.counters.pull.interval |
| it will be |
| logged less frequently than specified. |
| This only has an |
| effect if hive.querylog.enable.plan.progress is set to |
| true. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.enforce.bucketing</name> |
| <value>false</value> |
| <description>Whether bucketing is enforced. If true, while inserting |
| into the table, bucketing is enforced. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.enforce.sorting</name> |
| <value>false</value> |
| <description>Whether sorting is enforced. If true, while inserting |
| into the table, sorting is enforced. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.bucketingsorting</name> |
| <value>true</value> |
| <description>If hive.enforce.bucketing or hive.enforce.sorting is |
| true, dont create a reducer for enforcing |
| bucketing/sorting for |
| queries of the form: |
| insert overwrite table T2 select * from T1; |
| where |
| T1 and T2 are bucketed/sorted by the same keys into the same number |
| of buckets. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.enforce.sortmergebucketmapjoin</name> |
| <value>false</value> |
| <description>If the user asked for sort-merge bucketed map-side join, |
| and it cannot be performed, |
| should the query fail or not ? |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.sortmerge.join</name> |
| <value>false</value> |
| <description>Will the join be automatically converted to a sort-merge |
| join, if the joined tables pass |
| the criteria for sort-merge join. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy |
| </name> |
| <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ |
| </value> |
| <description>The policy to choose the big table for automatic |
| conversion to sort-merge join. |
| By default, the table with the largest |
| partitions is assigned the big |
| table. All policies are: |
| . based on |
| position of the table - the leftmost table is selected |
| org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ. |
| . based on |
| total size (all the partitions selected in the query) of |
| the table |
| org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ. |
| . based on average size (all the partitions selected in the query) of |
| the table |
| org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ. |
| New policies can be added in future. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.ds.connection.url.hook</name> |
| <value></value> |
| <description>Name of the hook to use for retriving the JDO connection |
| URL. If empty, the value in javax.jdo.option.ConnectionURL is used |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.ds.retry.attempts</name> |
| <value>1</value> |
| <description>The number of times to retry a metastore call if there |
| were a connection error |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.ds.retry.interval</name> |
| <value>1000</value> |
| <description>The number of miliseconds between metastore retry |
| attempts |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.server.min.threads</name> |
| <value>200</value> |
| <description>Minimum number of worker threads in the Thrift server's |
| pool. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.server.max.threads</name> |
| <value>100000</value> |
| <description>Maximum number of worker threads in the Thrift server's |
| pool. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.server.tcp.keepalive</name> |
| <value>true</value> |
| <description>Whether to enable TCP keepalive for the metastore server. |
| Keepalive will prevent accumulation of half-open connections. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.sasl.enabled</name> |
| <value>false</value> |
| <description>If true, the metastore thrift interface will be secured |
| with SASL. Clients must authenticate with Kerberos. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.thrift.framed.transport.enabled</name> |
| <value>false</value> |
| <description>If true, the metastore thrift interface will use |
| TFramedTransport. When false (default) a standard TTransport is used. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.kerberos.keytab.file</name> |
| <value></value> |
| <description>The path to the Kerberos Keytab file containing the |
| metastore thrift server's service principal. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.kerberos.principal</name> |
| <value>hive-metastore/_HOST@EXAMPLE.COM</value> |
| <description>The service principal for the metastore thrift server. |
| The special string _HOST will be replaced automatically with the |
| correct host name. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cluster.delegation.token.store.class</name> |
| <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value> |
| <description>The delegation token store implementation. Set to |
| org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced |
| cluster. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.cluster.delegation.token.store.zookeeper.connectString |
| </name> |
| <value>localhost:2181</value> |
| <description>The ZooKeeper token store connect string.</description> |
| </property> |
| |
| <property> |
| <name>hive.cluster.delegation.token.store.zookeeper.znode</name> |
| <value>/hive/cluster/delegation</value> |
| <description>The root path for token store data.</description> |
| </property> |
| |
| <property> |
| <name>hive.cluster.delegation.token.store.zookeeper.acl</name> |
| <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa |
| </value> |
| <description>ACL for token store entries. List comma separated all |
| server principals for the cluster. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.cache.pinobjtypes</name> |
| <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order |
| </value> |
| <description>List of comma separated metastore object types that |
| should be pinned in the cache |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.reducededuplication</name> |
| <value>true</value> |
| <description>Remove extra map-reduce jobs if the data is already |
| clustered by the same key which needs to be used again. This should |
| always be set to true. Since it is a new feature, it has been made |
| configurable. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.reducededuplication.min.reducer</name> |
| <value>4</value> |
| <description>Reduce deduplication merges two RSs by moving |
| key/parts/reducer-num of the child RS to parent RS. |
| That means if |
| reducer-num of the child RS is fixed (order by or forced |
| bucketing) |
| and small, it can make very slow, single MR. |
| The optimization will be |
| disabled if number of reducers is less than |
| specified value. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.dynamic.partition</name> |
| <value>true</value> |
| <description>Whether or not to allow dynamic partitions in DML/DDL. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.dynamic.partition.mode</name> |
| <value>strict</value> |
| <description>In strict mode, the user must specify at least one static |
| partition in case the user accidentally overwrites all partitions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.max.dynamic.partitions</name> |
| <value>1000</value> |
| <description>Maximum number of dynamic partitions allowed to be |
| created in total. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.max.dynamic.partitions.pernode</name> |
| <value>100</value> |
| <description>Maximum number of dynamic partitions allowed to be |
| created in each mapper/reducer node. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.max.created.files</name> |
| <value>100000</value> |
| <description>Maximum number of HDFS files created by all |
| mappers/reducers in a MapReduce job. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.default.partition.name</name> |
| <value>__HIVE_DEFAULT_PARTITION__</value> |
| <description>The default partition name in case the dynamic partition |
| column value is null/empty string or anyother values that cannot be |
| escaped. This value must not contain any special character used in |
| HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the |
| dynamic partition value should not contain this value to avoid |
| confusions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.dbclass</name> |
| <value>jdbc:derby</value> |
| <description>The default database that stores temporary hive |
| statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.autogather</name> |
| <value>true</value> |
| <description>A flag to gather statistics automatically during the |
| INSERT OVERWRITE command. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.jdbcdriver</name> |
| <value>org.apache.derby.jdbc.EmbeddedDriver</value> |
| <description>The JDBC driver for the database that stores temporary |
| hive statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.dbconnectionstring</name> |
| <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value> |
| <description>The default connection string for the database that |
| stores temporary hive statistics. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.default.publisher</name> |
| <value></value> |
| <description>The Java class (implementing the StatsPublisher |
| interface) that is used by default if hive.stats.dbclass is not JDBC |
| or HBase. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.default.aggregator</name> |
| <value></value> |
| <description>The Java class (implementing the StatsAggregator |
| interface) that is used by default if hive.stats.dbclass is not JDBC |
| or HBase. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.jdbc.timeout</name> |
| <value>30</value> |
| <description>Timeout value (number of seconds) used by JDBC connection |
| and statements. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.retries.max</name> |
| <value>0</value> |
| <description>Maximum number of retries when stats publisher/aggregator |
| got an exception updating intermediate database. Default is no tries |
| on failures. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.retries.wait</name> |
| <value>3000</value> |
| <description>The base waiting window (in milliseconds) before the next |
| retry. The actual wait time is calculated by baseWindow * failues |
| baseWindow * (failure 1) * (random number between [0.0,1.0]). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.reliable</name> |
| <value>false</value> |
| <description>Whether queries will fail because stats cannot be |
| collected completely accurately. |
| If this is set to true, |
| reading/writing from/into a partition may fail |
| becuase the stats |
| could |
| not be computed accurately. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.collect.tablekeys</name> |
| <value>false</value> |
| <description>Whether join and group by keys on tables are derived and |
| maintained in the QueryPlan. |
| This is useful to identify how tables are |
| accessed and to determine if |
| they should be bucketed. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.collect.scancols</name> |
| <value>false</value> |
| <description>Whether column accesses are tracked in the QueryPlan. |
| This is useful to identify how tables are accessed and to determine |
| if there are wasted columns that can be trimmed. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.ndv.error</name> |
| <value>20.0</value> |
| <description>Standard error expressed in percentage. Provides a |
| tradeoff between accuracy and compute cost.A lower value for error |
| indicates higher accuracy and a higher compute cost. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.stats.key.prefix.max.length</name> |
| <value>200</value> |
| <description> |
| Determines if when the prefix of the key used for |
| intermediate stats collection |
| exceeds a certain length, a hash of the |
| key is used instead. If the |
| value < 0 then hashing |
| is never used, if |
| the value >= 0 then hashing is used only when the key |
| prefixes length |
| exceeds that value. The key prefix is defined as everything preceding |
| the |
| task ID in the key. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.support.concurrency</name> |
| <value>false</value> |
| <description>Whether hive supports concurrency or not. A zookeeper |
| instance must be up and running for the default hive lock manager to |
| support read-write locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.lock.numretries</name> |
| <value>100</value> |
| <description>The number of times you want to try to get all the locks |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.unlock.numretries</name> |
| <value>10</value> |
| <description>The number of times you want to retry to do one unlock |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.lock.sleep.between.retries</name> |
| <value>60</value> |
| <description>The sleep time (in seconds) between various retries |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.quorum</name> |
| <value></value> |
| <description>The list of zookeeper servers to talk to. This is only |
| needed for read/write locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.client.port</name> |
| <value>2181</value> |
| <description>The port of zookeeper servers to talk to. This is only |
| needed for read/write locks. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.session.timeout</name> |
| <value>600000</value> |
| <description>Zookeeper client's session timeout. The client is |
| disconnected, and as a result, all locks released, if a heartbeat is |
| not sent in the timeout. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.namespace</name> |
| <value>hive_zookeeper_namespace</value> |
| <description>The parent node under which all zookeeper nodes are |
| created. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.zookeeper.clean.extra.nodes</name> |
| <value>false</value> |
| <description>Clean extra nodes at the end of the session. |
| </description> |
| </property> |
| |
| <property> |
| <name>fs.har.impl</name> |
| <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value> |
| <description>The implementation for accessing Hadoop Archives. Note |
| that this won't be applicable to Hadoop vers less than 0.20 |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.archive.enabled</name> |
| <value>false</value> |
| <description>Whether archiving operations are permitted</description> |
| </property> |
| |
| <property> |
| <name>hive.fetch.output.serde</name> |
| <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value> |
| <description>The serde used by FetchTask to serialize the fetch |
| output. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.mode.local.auto</name> |
| <value>false</value> |
| <description> Let hive determine whether to run in local mode |
| automatically |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.drop.ignorenonexistent</name> |
| <value>true</value> |
| <description> |
| Do not report an error if DROP TABLE/VIEW specifies a |
| non-existent |
| table/view |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.show.job.failure.debug.info</name> |
| <value>true</value> |
| <description> |
| If a job fails, whether to provide a link in the CLI to |
| the task with |
| the |
| most failures, along with debugging hints if |
| applicable. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.auto.progress.timeout</name> |
| <value>0</value> |
| <description> |
| How long to run autoprogressor for the script/UDTF |
| operators (in |
| seconds). |
| Set to 0 for forever. |
| </description> |
| </property> |
| |
| <!-- HBase Storage Handler Parameters --> |
| |
| <property> |
| <name>hive.hbase.wal.enabled</name> |
| <value>true</value> |
| <description>Whether writes to HBase should be forced to the |
| write-ahead log. Disabling this improves HBase write performance at |
| the risk of lost writes in case of a crash. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.table.parameters.default</name> |
| <value></value> |
| <description>Default property values for newly created tables |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.entity.separator</name> |
| <value>@</value> |
| <description>Separator used to construct names of tables and |
| partitions. For example, dbname@tablename@partitionname |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.ddl.createtablelike.properties.whitelist</name> |
| <value></value> |
| <description>Table Properties to copy over when executing a Create |
| Table Like. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.variable.substitute</name> |
| <value>true</value> |
| <description>This enables substitution using syntax like ${var} |
| ${system:var} and ${env:var}. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.variable.substitute.depth</name> |
| <value>40</value> |
| <description>The maximum replacements the substitution engine will do. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.conf.validation</name> |
| <value>true</value> |
| <description>Eables type checking for registered hive configurations |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.enabled</name> |
| <value>false</value> |
| <description>enable or disable the hive client authorization |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.createtable.user.grants</name> |
| <value></value> |
| <description>the privileges automatically granted to some users |
| whenever a table gets created. |
| An example like |
| "userX,userY:select;userZ:create" will grant select |
| privilege to userX |
| and userY, |
| and grant create privilege to userZ whenever a new table |
| created. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.createtable.group.grants</name> |
| <value></value> |
| <description>the privileges automatically granted to some groups |
| whenever a table gets created. |
| An example like |
| "groupX,groupY:select;groupZ:create" will grant select |
| privilege to |
| groupX and groupY, |
| and grant create privilege to groupZ whenever a new |
| table created. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.createtable.role.grants</name> |
| <value></value> |
| <description>the privileges automatically granted to some roles |
| whenever a table gets created. |
| An example like |
| "roleX,roleY:select;roleZ:create" will grant select |
| privilege to roleX |
| and roleY, |
| and grant create privilege to roleZ whenever a new table |
| created. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.security.authorization.createtable.owner.grants</name> |
| <value></value> |
| <description>the privileges automatically granted to the owner |
| whenever a table gets created. |
| An example like "select,drop" will |
| grant select and drop privilege to |
| the owner of the table |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.metastore.authorization.storage.checks</name> |
| <value>false</value> |
| <description>Should the metastore do authorization checks against the |
| underlying storage |
| for operations like drop-partition (disallow the |
| drop-partition if the |
| user in |
| question doesn't have permissions to |
| delete the corresponding directory |
| on the storage). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.error.on.empty.partition</name> |
| <value>false</value> |
| <description>Whether to throw an excpetion if dynamic partition insert |
| generates empty results. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.index.compact.file.ignore.hdfs</name> |
| <value>false</value> |
| <description>True the hdfs location stored in the index file will be |
| igbored at runtime. |
| If the data got moved or the name of the cluster |
| got changed, the index |
| data should still be usable. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.index.filter.compact.minsize</name> |
| <value>5368709120</value> |
| <description>Minimum size (in bytes) of the inputs on which a compact |
| index is automatically used. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.optimize.index.filter.compact.maxsize</name> |
| <value>-1</value> |
| <description>Maximum size (in bytes) of the inputs on which a compact |
| index is automatically used. |
| A negative number is equivalent to |
| infinity. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.index.compact.query.max.size</name> |
| <value>10737418240</value> |
| <description>The maximum number of bytes that a query using the |
| compact index can read. Negative value is equivalent to infinity. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.index.compact.query.max.entries</name> |
| <value>10000000</value> |
| <description>The maximum number of index entries to read during a |
| query that uses the compact index. Negative value is equivalent to |
| infinity. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.index.compact.binary.search</name> |
| <value>true</value> |
| <description>Whether or not to use a binary search to find the entries |
| in an index table that match the filter, where possible |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exim.uri.scheme.whitelist</name> |
| <value>hdfs,pfile</value> |
| <description>A comma separated list of acceptable URI schemes for |
| import and export. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.lock.mapred.only.operation</name> |
| <value>false</value> |
| <description>This param is to control whether or not only do lock on |
| queries |
| that need to execute at least one mapred job. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.limit.row.max.size</name> |
| <value>100000</value> |
| <description>When trying a smaller subset of data for simple LIMIT, |
| how much size we need to guarantee |
| each row to have at least. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.limit.optimize.limit.file</name> |
| <value>10</value> |
| <description>When trying a smaller subset of data for simple LIMIT, |
| maximum number of files we can |
| sample. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.limit.optimize.enable</name> |
| <value>false</value> |
| <description>Whether to enable to optimization to trying a smaller |
| subset of data for simple LIMIT first. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.limit.optimize.fetch.max</name> |
| <value>50000</value> |
| <description>Maximum number of rows allowed for a smaller subset of |
| data for simple LIMIT, if it is a fetch query. |
| Insert queries are not |
| restricted by this limit. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.rework.mapredwork</name> |
| <value>false</value> |
| <description>should rework the mapred work or not. |
| This is first |
| introduced by SymlinkTextInputFormat to replace symlink |
| files with |
| real paths at compile time. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.concatenate.check.index</name> |
| <value>true</value> |
| <description>If this sets to true, hive will throw error when doing |
| 'alter table tbl_name [partSpec] concatenate' on a table/partition |
| that has indexes on it. The reason the user want to set this to true |
| is because it can help user to avoid handling all index drop, |
| recreation, |
| rebuild work. This is very helpful for tables with |
| thousands of partitions. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.sample.seednumber</name> |
| <value>0</value> |
| <description>A number used to percentage sampling. By changing this |
| number, user will change the subsets |
| of data sampled. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.io.exception.handlers</name> |
| <value></value> |
| <description>A list of io exception handler class names. This is used |
| to construct a list exception handlers to handle exceptions thrown |
| by |
| record readers |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.autogen.columnalias.prefix.label</name> |
| <value>_c</value> |
| <description>String used as a prefix when auto generating column |
| alias. |
| By default the prefix label will be appended with a column |
| position |
| number to form the column alias. Auto generation would happen |
| if an |
| aggregate function is used in a select clause without an |
| explicit |
| alias. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.autogen.columnalias.prefix.includefuncname</name> |
| <value>false</value> |
| <description>Whether to include function name in the column alias auto |
| generated by hive. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.perf.logger</name> |
| <value>org.apache.hadoop.hive.ql.log.PerfLogger</value> |
| <description>The class responsible logging client side performance |
| metrics. Must be a subclass of |
| org.apache.hadoop.hive.ql.log.PerfLogger |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.start.cleanup.scratchdir</name> |
| <value>false</value> |
| <description>To cleanup the hive scratchdir while starting the hive |
| server |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.output.file.extension</name> |
| <value></value> |
| <description>String used as a file extension for output files. If not |
| set, defaults to the codec extension for text files (e.g. ".gz"), or |
| no extension otherwise. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.insert.into.multilevel.dirs</name> |
| <value>false</value> |
| <description>Where to insert into multilevel directories like |
| "insert |
| directory '/HIVEFT25686/chinna/' from table" |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.warehouse.subdir.inherit.perms</name> |
| <value>false</value> |
| <description>Set this to true if the the table directories should |
| inherit the |
| permission of the warehouse or database directory instead |
| of being created |
| with the permissions derived from dfs umask |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.job.debug.capture.stacktraces</name> |
| <value>true</value> |
| <description>Whether or not stack traces parsed from the task logs of |
| a sampled failed task for |
| each failed job should be stored in the |
| SessionState |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.driver.run.hooks</name> |
| <value></value> |
| <description>A comma separated list of hooks which implement |
| HiveDriverRunHook and will be run at the |
| beginning and end of |
| Driver.run, these will be run in the order specified |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.ddl.output.format</name> |
| <value>text</value> |
| <description> |
| The data format to use for DDL output. One of "text" (for |
| human |
| readable text) or "json" (for a json object). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.transform.escape.input</name> |
| <value>false</value> |
| <description> |
| This adds an option to escape special chars (newlines, |
| carriage returns |
| and |
| tabs) when they are passed to the user script. |
| This is useful if the hive |
| tables |
| can contain data that contains |
| special characters. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.rcfile.use.explicit.header</name> |
| <value>true</value> |
| <description> |
| If this is set the header for RC Files will simply be |
| RCF. If this is |
| not |
| set the header will be that borrowed from sequence |
| files, e.g. SEQ- |
| followed |
| by the input and output RC File formats. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.multi.insert.move.tasks.share.dependencies</name> |
| <value>false</value> |
| <description> |
| If this is set all move tasks for tables/partitions (not |
| directories) |
| at the end of a |
| multi-insert query will only begin once |
| the dependencies for all these move tasks |
| have been |
| met. |
| Advantages: If |
| concurrency is enabled, the locks will only be released once the |
| query has |
| finished, so with this config enabled, the time when the |
| table/partition is |
| generated will be much closer to when the lock on |
| it is released. |
| Disadvantages: If concurrency is not enabled, with |
| this disabled, the |
| tables/partitions which |
| are produced by this query |
| and finish earlier will be available for |
| querying |
| much earlier. Since |
| the locks are only released once the query finishes, |
| this |
| does not |
| apply if concurrency is enabled. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.fetch.task.conversion</name> |
| <value>minimal</value> |
| <description> |
| Some select queries can be converted to single FETCH task |
| minimizing |
| latency. |
| Currently the query should be single sourced not |
| having any subquery and |
| should not have |
| any aggregations or distincts |
| (which incurrs RS), lateral views and |
| joins. |
| 1. minimal : SELECT STAR, |
| FILTER on partition columns, LIMIT only |
| 2. more : SELECT, FILTER, |
| LIMIT only (TABLESAMPLE, virtual columns) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hmshandler.retry.attempts</name> |
| <value>1</value> |
| <description>The number of times to retry a HMSHandler call if there |
| were a connection error |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.hmshandler.retry.interval</name> |
| <value>1000</value> |
| <description>The number of miliseconds between HMSHandler retry |
| attempts |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server.read.socket.timeout</name> |
| <value>10</value> |
| <description>Timeout for the HiveServer to close the connection if no |
| response from the client in N seconds, defaults to 10 seconds. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server.tcp.keepalive</name> |
| <value>true</value> |
| <description>Whether to enable TCP keepalive for the Hive server. |
| Keepalive will prevent accumulation of half-open connections. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.decode.partition.name</name> |
| <value>false</value> |
| <description>Whether to show the unquoted partition names in query |
| results. |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.log4j.file</name> |
| <value></value> |
| <description>Hive log4j configuration file. |
| If the property is not set, |
| then logging will be initialized using |
| hive-log4j.properties found on |
| the classpath. |
| If the property is set, the value must be a valid URI |
| (java.net.URI, |
| e.g. "file:///tmp/my-logging.properties"), which you |
| can then extract |
| a URL from and pass to |
| PropertyConfigurator.configure(URL). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.log4j.file</name> |
| <value></value> |
| <description>Hive log4j configuration file for execution mode(sub |
| command). |
| If the property is not set, then logging will be initialized |
| using |
| hive-exec-log4j.properties found on the classpath. |
| If the |
| property is set, the value must be a valid URI (java.net.URI, |
| e.g. |
| "file:///tmp/my-logging.properties"), which you can then extract |
| a URL |
| from and pass to PropertyConfigurator.configure(URL). |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.infer.bucket.sort</name> |
| <value>false</value> |
| <description> |
| If this is set, when writing partitions, the metadata |
| will include the |
| bucketing/sorting |
| properties with which the data was |
| written if any (this will not overwrite the |
| metadata |
| inherited from the |
| table if the table is bucketed/sorted) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name> |
| <value>false</value> |
| <description> |
| If this is set, when setting the number of reducers for |
| the map reduce |
| task which writes the |
| final output files, it will choose |
| a number which is a power of two, |
| unless the user specifies |
| the number |
| of reducers to use using mapred.reduce.tasks. The number of |
| reducers |
| may be set to a power of two, only to be followed by a merge task |
| meaning preventing |
| anything from being inferred. |
| With |
| hive.exec.infer.bucket.sort set to true: |
| Advantages: If this is not |
| set, the number of buckets for partitions will seem |
| arbitrary, |
| which |
| means that the number of mappers used for optimized joins, for |
| example, will |
| be very low. With this set, since the number of buckets |
| used for any |
| partition is |
| a power of two, the number of mappers used |
| for optimized joins will be |
| the least |
| number of buckets used by any |
| partition being joined. |
| Disadvantages: This may mean a much larger or |
| much smaller number of reducers being |
| used in the |
| final map reduce job, |
| e.g. if a job was originally going to take 257 |
| reducers, |
| it will now |
| take 512 reducers, similarly if the max number of reducers |
| is 511, |
| and |
| a job was going to use this many, it will now use 256 reducers. |
| |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.groupby.orderby.position.alias</name> |
| <value>false</value> |
| <description>Whether to enable using Column Position Alias in Group By |
| or Order By |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.thrift.min.worker.threads</name> |
| <value>5</value> |
| <description>Minimum number of Thrift worker threads</description> |
| </property> |
| |
| <property> |
| <name>hive.server2.thrift.max.worker.threads</name> |
| <value>100</value> |
| <description>Maximum number of Thrift worker threads</description> |
| </property> |
| |
| <property> |
| <name>hive.server2.thrift.port</name> |
| <value>10000</value> |
| <description>Port number of HiveServer2 Thrift interface. |
| Can be |
| overridden by setting $HIVE_SERVER2_THRIFT_PORT |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.thrift.bind.host</name> |
| <value>localhost</value> |
| <description>Bind host on which to run the HiveServer2 Thrift |
| interface. |
| Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.authentication</name> |
| <value>NONE</value> |
| <description> |
| Client authentication types. |
| NONE: no authentication check |
| LDAP: LDAP/AD based authentication |
| KERBEROS: Kerberos/GSSAPI |
| authentication |
| CUSTOM: Custom authentication provider |
| (Use with |
| property hive.server2.custom.authentication.class) |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.custom.authentication.class</name> |
| <value></value> |
| <description> |
| Custom authentication class. Used when property |
| 'hive.server2.authentication' is set to 'CUSTOM'. Provided class |
| must |
| be a proper implementation of the interface |
| org.apache.hive.service.auth.PasswdAuthenticationProvider. |
| HiveServer2 |
| will call its Authenticate(user, passed) method to |
| authenticate requests. |
| The implementation may optionally extend the |
| Hadoop's |
| org.apache.hadoop.conf.Configured class to grab Hive's |
| Configuration |
| object. |
| </description> |
| </property> |
| |
| <property> |
| <name>>hive.server2.authentication.kerberos.principal</name> |
| <value></value> |
| <description> |
| Kerberos server principal |
| </description> |
| </property> |
| |
| <property> |
| <name>>hive.server2.authentication.kerberos.keytab</name> |
| <value></value> |
| <description> |
| Kerberos keytab file for server principal |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.authentication.ldap.url</name> |
| <value></value> |
| <description> |
| LDAP connection URL |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.authentication.ldap.baseDN</name> |
| <value></value> |
| <description> |
| LDAP base DN |
| </description> |
| </property> |
| |
| <property> |
| <name>hive.server2.enable.doAs</name> |
| <value>true</value> |
| <description> |
| Setting this property to true will have hive server2 |
| execute |
| hive operations as the user making the calls to it. |
| </description> |
| </property> |
| |
| |
| </configuration> |