blob: ccfcd74c279ac79fb3fc0c6214778ea04c03e980 [file] [log] [blame]
buyingyi8027a492013-07-10 23:54:56 -07001<?xml version="1.0"?>
2<!-- ! Copyright 2009-2013 by The Regents of the University of California
3 ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
4 not use this file except in compliance with the License. ! you may obtain
5 a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
6 ! ! Unless required by applicable law or agreed to in writing, software !
7 distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
8 WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
9 License for the specific language governing permissions and ! limitations
10 under the License. ! -->
11<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
12
13<configuration>
14
15 <configuration>
16 <!-- Hivesterix Execution Parameters -->
17 <property>
18 <name>hive.hyracks.connectorpolicy</name>
19 <value>PIPELINING</value>
20 </property>
21
22 <property>
23 <name>hive.hyracks.parrallelism</name>
24 <value>4</value>
25 </property>
26
27 <property>
28 <name>hive.algebricks.groupby.external</name>
29 <value>true</value>
30 </property>
31
32 <property>
33 <name>hive.algebricks.groupby.external.memory</name>
34 <value>33554432</value>
35 </property>
36
37 <property>
38 <name>hive.algebricks.sort.memory</name>
39 <value>33554432</value>
40 </property>
41
42 <property>
43 <name>hive.algebricks.framesize</name>
44 <value>32768</value>
45 </property>
46
47 <!-- Hive Execution Parameters -->
48 <property>
49 <name>mapred.reduce.tasks</name>
50 <value>-1</value>
51 <description>The default number of reduce tasks per job. Typically
52 set
53 to a prime close to the number of available hosts. Ignored when
54 mapred.job.tracker is "local". Hadoop set this to 1 by default,
55 whereas hive uses -1 as its default value.
56 By setting this property
57 to -1, Hive will automatically figure out
58 what should be the number
59 of reducers.
60 </description>
61 </property>
62
63 <property>
64 <name>hive.exec.reducers.bytes.per.reducer</name>
65 <value>1000000000</value>
66 <description>size per reducer.The default is 1G, i.e if the input
67 size is 10G, it will use 10 reducers.
68 </description>
69 </property>
70
71 <property>
72 <name>hive.exec.reducers.max</name>
73 <value>999</value>
74 <description>max number of reducers will be used. If the one
75 specified in the configuration parameter mapred.reduce.tasks is
76 negative, hive will use this one as the max number of reducers when
77 automatically determine number of reducers.
78 </description>
79 </property>
80
81 <property>
82 <name>hive.cli.print.header</name>
83 <value>false</value>
84 <description>Whether to print the names of the columns in query
85 output.
86 </description>
87 </property>
88
89 <property>
90 <name>hive.cli.print.current.db</name>
91 <value>false</value>
92 <description>Whether to include the current database in the hive
93 prompt.
94 </description>
95 </property>
96
97 <property>
98 <name>hive.cli.prompt</name>
99 <value>hive</value>
100 <description>Command line prompt configuration value. Other hiveconf
101 can be used in
102 this configuration value. Variable substitution will
103 only be invoked at
104 the hive
105 cli startup.
106 </description>
107 </property>
108
109 <property>
110 <name>hive.cli.pretty.output.num.cols</name>
111 <value>-1</value>
112 <description>The number of columns to use when formatting output
113 generated
114 by the DESCRIBE PRETTY table_name command. If the value of
115 this
116 property
117 is -1, then hive will use the auto-detected terminal
118 width.
119 </description>
120 </property>
121
122 <property>
123 <name>hive.exec.scratchdir</name>
124 <value>/tmp/hive-${user.name}</value>
125 <description>Scratch space for Hive jobs</description>
126 </property>
127
128 <property>
129 <name>hive.exec.local.scratchdir</name>
130 <value>/tmp/${user.name}</value>
131 <description>Local scratch space for Hive jobs</description>
132 </property>
133
134 <property>
135 <name>hive.test.mode</name>
136 <value>false</value>
137 <description>whether hive is running in test mode. If yes, it turns
138 on sampling and prefixes the output tablename
139 </description>
140 </property>
141
142 <property>
143 <name>hive.test.mode.prefix</name>
144 <value>test_</value>
145 <description>if hive is running in test mode, prefixes the output
146 table by this string
147 </description>
148 </property>
149
150 <!-- If the input table is not bucketed, the denominator of the tablesample
151 is determinied by the parameter below -->
152 <!-- For example, the following query: -->
153 <!-- INSERT OVERWRITE TABLE dest -->
154 <!-- SELECT col1 from src -->
155 <!-- would be converted to -->
156 <!-- INSERT OVERWRITE TABLE test_dest -->
157 <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
158 <property>
159 <name>hive.test.mode.samplefreq</name>
160 <value>32</value>
161 <description>if hive is running in test mode and table is not
162 bucketed, sampling frequency
163 </description>
164 </property>
165
166 <property>
167 <name>hive.test.mode.nosamplelist</name>
168 <value></value>
169 <description>if hive is running in test mode, dont sample the above
170 comma seperated list of tables
171 </description>
172 </property>
173
174 <property>
175 <name>hive.metastore.uris</name>
176 <value></value>
177 <description>Thrift uri for the remote metastore. Used by metastore
178 client to connect to remote metastore.
179 </description>
180 </property>
181
182 <property>
183 <name>javax.jdo.option.ConnectionURL</name>
184 <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
185 <description>JDBC connect string for a JDBC metastore</description>
186 </property>
187
188 <property>
189 <name>javax.jdo.option.ConnectionDriverName</name>
190 <value>org.apache.derby.jdbc.EmbeddedDriver</value>
191 <description>Driver class name for a JDBC metastore</description>
192 </property>
193
194 <property>
195 <name>javax.jdo.PersistenceManagerFactoryClass</name>
196 <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
197 <description>class implementing the jdo persistence</description>
198 </property>
199
200 <property>
201 <name>javax.jdo.option.DetachAllOnCommit</name>
202 <value>true</value>
203 <description>detaches all objects from session so that they can be
204 used after transaction is committed
205 </description>
206 </property>
207
208 <property>
209 <name>javax.jdo.option.NonTransactionalRead</name>
210 <value>true</value>
211 <description>reads outside of transactions</description>
212 </property>
213
214 <property>
215 <name>javax.jdo.option.ConnectionUserName</name>
216 <value>APP</value>
217 <description>username to use against metastore database</description>
218 </property>
219
220 <property>
221 <name>javax.jdo.option.ConnectionPassword</name>
222 <value>mine</value>
223 <description>password to use against metastore database</description>
224 </property>
225
226 <property>
227 <name>javax.jdo.option.Multithreaded</name>
228 <value>true</value>
229 <description>Set this to true if multiple threads access metastore
230 through JDO concurrently.
231 </description>
232 </property>
233
234 <property>
235 <name>datanucleus.connectionPoolingType</name>
236 <value>DBCP</value>
237 <description>Uses a DBCP connection pool for JDBC metastore
238 </description>
239 </property>
240
241 <property>
242 <name>datanucleus.validateTables</name>
243 <value>false</value>
244 <description>validates existing schema against code. turn this on if
245 you want to verify existing schema
246 </description>
247 </property>
248
249 <property>
250 <name>datanucleus.validateColumns</name>
251 <value>false</value>
252 <description>validates existing schema against code. turn this on if
253 you want to verify existing schema
254 </description>
255 </property>
256
257 <property>
258 <name>datanucleus.validateConstraints</name>
259 <value>false</value>
260 <description>validates existing schema against code. turn this on if
261 you want to verify existing schema
262 </description>
263 </property>
264
265 <property>
266 <name>datanucleus.storeManagerType</name>
267 <value>rdbms</value>
268 <description>metadata store type</description>
269 </property>
270
271 <property>
272 <name>datanucleus.autoCreateSchema</name>
273 <value>true</value>
274 <description>creates necessary schema on a startup if one doesn't
275 exist. set this to false, after creating it once
276 </description>
277 </property>
278
279 <property>
280 <name>datanucleus.autoStartMechanismMode</name>
281 <value>checked</value>
282 <description>throw exception if metadata tables are incorrect
283 </description>
284 </property>
285
286 <property>
287 <name>datanucleus.transactionIsolation</name>
288 <value>read-committed</value>
289 <description>Default transaction isolation level for identity
290 generation.
291 </description>
292 </property>
293
294 <property>
295 <name>datanucleus.cache.level2</name>
296 <value>false</value>
297 <description>Use a level 2 cache. Turn this off if metadata is
298 changed independently of hive metastore server
299 </description>
300 </property>
301
302 <property>
303 <name>datanucleus.cache.level2.type</name>
304 <value>SOFT</value>
305 <description>SOFT=soft reference based cache, WEAK=weak reference
306 based cache.
307 </description>
308 </property>
309
310 <property>
311 <name>datanucleus.identifierFactory</name>
312 <value>datanucleus</value>
313 <description>Name of the identifier factory to use when generating
314 table/column names etc. 'datanucleus' is used for backward
315 compatibility
316 </description>
317 </property>
318
319 <property>
320 <name>datanucleus.plugin.pluginRegistryBundleCheck</name>
321 <value>LOG</value>
322 <description>Defines what happens when plugin bundles are found and
323 are duplicated [EXCEPTION|LOG|NONE]
324 </description>
325 </property>
326
327 <property>
328 <name>hive.metastore.warehouse.dir</name>
329 <value>/user/hive/warehouse</value>
330 <description>location of default database for the warehouse
331 </description>
332 </property>
333
334 <property>
335 <name>hive.metastore.execute.setugi</name>
336 <value>false</value>
337 <description>In unsecure mode, setting this property to true will
338 cause the metastore to execute DFS operations using the client's
339 reported user and group permissions. Note that this property must be
340 set on both the client and server sides. Further note that its best
341 effort. If client sets its to true and server sets it to false,
342 client setting will be ignored.
343 </description>
344 </property>
345
346 <property>
347 <name>hive.metastore.event.listeners</name>
348 <value></value>
349 <description>list of comma seperated listeners for metastore events.
350 </description>
351 </property>
352
353 <property>
354 <name>hive.metastore.partition.inherit.table.properties</name>
355 <value></value>
356 <description>list of comma seperated keys occurring in table
357 properties which will get inherited to newly created partitions. *
358 implies all the keys will get inherited.
359 </description>
360 </property>
361
362 <property>
363 <name>hive.metadata.export.location</name>
364 <value></value>
365 <description>When used in conjunction with the
366 org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
367 listener, it is the location to which the metadata will be exported.
368 The default is an empty string, which results in the metadata being
369 exported to the current user's home directory on HDFS.
370 </description>
371 </property>
372
373 <property>
374 <name>hive.metadata.move.exported.metadata.to.trash</name>
375 <value></value>
376 <description>When used in conjunction with the
377 org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
378 listener, this setting determines if the metadata that is exported
379 will subsequently be moved to the user's trash directory alongside
380 the dropped table data. This ensures that the metadata will be
381 cleaned up along with the dropped table data.
382 </description>
383 </property>
384
385 <property>
386 <name>hive.metastore.partition.name.whitelist.pattern</name>
387 <value></value>
388 <description>Partition names will be checked against this regex
389 pattern and rejected if not matched.
390 </description>
391 </property>
392
393 <property>
394 <name>hive.metastore.end.function.listeners</name>
395 <value></value>
396 <description>list of comma separated listeners for the end of
397 metastore functions.
398 </description>
399 </property>
400
401 <property>
402 <name>hive.metastore.event.expiry.duration</name>
403 <value>0</value>
404 <description>Duration after which events expire from events table (in
405 seconds)
406 </description>
407 </property>
408
409 <property>
410 <name>hive.metastore.event.clean.freq</name>
411 <value>0</value>
412 <description>Frequency at which timer task runs to purge expired
413 events in metastore(in seconds).
414 </description>
415 </property>
416
417 <property>
418 <name>hive.metastore.connect.retries</name>
419 <value>5</value>
420 <description>Number of retries while opening a connection to
421 metastore
422 </description>
423 </property>
424
425 <property>
426 <name>hive.metastore.failure.retries</name>
427 <value>3</value>
428 <description>Number of retries upon failure of Thrift metastore calls
429 </description>
430 </property>
431
432 <property>
433 <name>hive.metastore.client.connect.retry.delay</name>
434 <value>1</value>
435 <description>Number of seconds for the client to wait between
436 consecutive connection attempts
437 </description>
438 </property>
439
440 <property>
441 <name>hive.metastore.client.socket.timeout</name>
442 <value>20</value>
443 <description>MetaStore Client socket timeout in seconds</description>
444 </property>
445
446 <property>
447 <name>hive.metastore.rawstore.impl</name>
448 <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
449 <description>Name of the class that implements
450 org.apache.hadoop.hive.metastore.rawstore interface. This class is
451 used to store and retrieval of raw metadata objects such as table,
452 database
453 </description>
454 </property>
455
456 <property>
457 <name>hive.metastore.batch.retrieve.max</name>
458 <value>300</value>
459 <description>Maximum number of objects (tables/partitions) can be
460 retrieved from metastore in one batch. The higher the number, the
461 less the number of round trips is needed to the Hive metastore
462 server, but it may also cause higher memory requirement at the
463 client side.
464 </description>
465 </property>
466
467 <property>
468 <name>hive.metastore.batch.retrieve.table.partition.max</name>
469 <value>1000</value>
470 <description>Maximum number of table partitions that metastore
471 internally retrieves in one batch.
472 </description>
473 </property>
474
475 <property>
476 <name>hive.default.fileformat</name>
477 <value>TextFile</value>
478 <description>Default file format for CREATE TABLE statement. Options
479 are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
480 ... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override
481 </description>
482 </property>
483
484 <property>
485 <name>hive.fileformat.check</name>
486 <value>true</value>
487 <description>Whether to check file format or not when loading data
488 files
489 </description>
490 </property>
491
492 <property>
493 <name>hive.map.aggr</name>
494 <value>true</value>
495 <description>Whether to use map-side aggregation in Hive Group By
496 queries
497 </description>
498 </property>
499
500 <property>
501 <name>hive.groupby.skewindata</name>
502 <value>false</value>
503 <description>Whether there is skew in data to optimize group by
504 queries
505 </description>
506 </property>
507
508 <property>
509 <name>hive.optimize.multigroupby.common.distincts</name>
510 <value>true</value>
511 <description>Whether to optimize a multi-groupby query with the same
512 distinct.
513 Consider a query like:
514
515 from src
516 insert overwrite table dest1
517 select col1, count(distinct colx) group by
518 col1
519 insert overwrite table
520 dest2 select col2, count(distinct colx) group by
521 col2;
522
523 With this
524 parameter set to true, first we spray by the distinct value
525 (colx),
526 and then
527 perform the 2 groups bys. This makes sense if map-side
528 aggregation is
529 turned off. However,
530 with maps-side aggregation, it
531 might be useful in some cases to treat
532 the 2 inserts independently,
533 thereby performing the query above in 2MR jobs instead of 3 (due to
534 spraying by distinct key first).
535 If this parameter is turned off, we
536 dont consider the fact that the
537 distinct key is the same across
538 different MR jobs.
539 </description>
540 </property>
541
542 <property>
543 <name>hive.groupby.mapaggr.checkinterval</name>
544 <value>100000</value>
545 <description>Number of rows after which size of the grouping
546 keys/aggregation classes is performed
547 </description>
548 </property>
549
550 <property>
551 <name>hive.mapred.local.mem</name>
552 <value>0</value>
553 <description>For local mode, memory of the mappers/reducers
554 </description>
555 </property>
556
557 <property>
558 <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
559 <value>0.3</value>
560 <description>Portion of total memory to be used by map-side grup
561 aggregation hash table, when this group by is followed by map join
562 </description>
563 </property>
564
565 <property>
566 <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
567 <value>0.9</value>
568 <description>The max memory to be used by map-side grup aggregation
569 hash table, if the memory usage is higher than this number, force to
570 flush data
571 </description>
572 </property>
573
574 <property>
575 <name>hive.map.aggr.hash.percentmemory</name>
576 <value>0.5</value>
577 <description>Portion of total memory to be used by map-side grup
578 aggregation hash table
579 </description>
580 </property>
581
582 <property>
583 <name>hive.map.aggr.hash.min.reduction</name>
584 <value>0.5</value>
585 <description>Hash aggregation will be turned off if the ratio between
586 hash
587 table size and input rows is bigger than this number. Set to 1
588 to make
589 sure
590 hash aggregation is never turned off.
591 </description>
592 </property>
593
594 <property>
595 <name>hive.optimize.cp</name>
596 <value>true</value>
597 <description>Whether to enable column pruner</description>
598 </property>
599
600 <property>
601 <name>hive.optimize.index.filter</name>
602 <value>false</value>
603 <description>Whether to enable automatic use of indexes</description>
604 </property>
605
606 <property>
607 <name>hive.optimize.index.groupby</name>
608 <value>false</value>
609 <description>Whether to enable optimization of group-by queries using
610 Aggregate indexes.
611 </description>
612 </property>
613
614 <property>
615 <name>hive.optimize.ppd</name>
616 <value>true</value>
617 <description>Whether to enable predicate pushdown</description>
618 </property>
619
620 <property>
621 <name>hive.optimize.ppd.storage</name>
622 <value>true</value>
623 <description>Whether to push predicates down into storage handlers.
624 Ignored when hive.optimize.ppd is false.
625 </description>
626 </property>
627
628 <property>
629 <name>hive.ppd.recognizetransivity</name>
630 <value>true</value>
631 <description>Whether to transitively replicate predicate filters over
632 equijoin conditions.
633 </description>
634 </property>
635
636 <property>
637 <name>hive.optimize.groupby</name>
638 <value>true</value>
639 <description>Whether to enable the bucketed group by from bucketed
640 partitions/tables.
641 </description>
642 </property>
643
644 <property>
645 <name>hive.optimize.skewjoin.compiletime</name>
646 <value>false</value>
647 <description>Whether to create a separate plan for skewed keys for
648 the tables in the join.
649 This is based on the skewed keys stored in
650 the metadata. At compile
651 time, the plan is broken
652 into different
653 joins: one for the skewed keys, and the other for the
654 remaining keys.
655 And then,
656 a union is performed for the 2 joins generated above. So
657 unless the
658 same skewed key is present
659 in both the joined tables, the
660 join for the skewed key will be
661 performed as a map-side join.
662
663 The main
664 difference between this paramater and hive.optimize.skewjoin
665 is that
666 this parameter
667 uses the skew information stored in the metastore to
668 optimize the plan
669 at compile time itself.
670 If there is no skew
671 information in the metadata, this parameter will
672 not have any affect.
673 Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
674 should be set to true.
675 Ideally, hive.optimize.skewjoin should be
676 renamed as
677 hive.optimize.skewjoin.runtime, but not doing
678 so for
679 backward compatibility.
680
681 If the skew information is correctly stored
682 in the metadata,
683 hive.optimize.skewjoin.compiletime
684 would change the
685 query plan to take care of it, and
686 hive.optimize.skewjoin will be a
687 no-op.
688 </description>
689 </property>
690
691 <property>
692 <name>hive.optimize.union.remove</name>
693 <value>false</value>
694 <description>
695 Whether to remove the union and push the operators
696 between union and the
697 filesink above
698 union. This avoids an extra scan
699 of the output by union. This is
700 independently useful for union
701 queries, and specially useful when
702 hive.optimize.skewjoin.compiletime is set
703 to true, since an
704 extra
705 union is inserted.
706
707 The merge is triggered if either of
708 hive.merge.mapfiles or
709 hive.merge.mapredfiles is set to true.
710 If the
711 user has set hive.merge.mapfiles to true and
712 hive.merge.mapredfiles
713 to false, the idea was the
714 number of reducers are few, so the number
715 of files anyway are small.
716 However, with this optimization,
717 we are
718 increasing the number of files possibly by a big margin. So, we
719 merge aggresively.
720 </description>
721 </property>
722
723 <property>
724 <name>hive.mapred.supports.subdirectories</name>
725 <value>false</value>
726 <description>Whether the version of hadoop which is running supports
727 sub-directories for tables/partitions.
728 Many hive optimizations can be
729 applied if the hadoop version supports
730 sub-directories for
731 tables/partitions. It was added by MAPREDUCE-1501
732 </description>
733 </property>
734
735 <property>
736 <name>hive.multigroupby.singlemr</name>
737 <value>true</value>
738 <description>Whether to optimize multi group by query to generate
739 single M/R
740 job plan. If the multi group by query has common group by
741 keys, it will
742 be
743 optimized to generate single M/R job.
744 </description>
745 </property>
746
747 <property>
748 <name>hive.map.groupby.sorted</name>
749 <value>false</value>
750 <description>If the bucketing/sorting properties of the table exactly
751 match the grouping key, whether to
752 perform the group by in the mapper
753 by using BucketizedHiveInputFormat. The
754 only downside to this
755 is that
756 it limits the number of mappers to the number of files.
757 </description>
758 </property>
759
760 <property>
761 <name>hive.map.groupby.sorted.testmode</name>
762 <value>false</value>
763 <description>If the bucketing/sorting properties of the table exactly
764 match the grouping key, whether to
765 perform the group by in the mapper
766 by using BucketizedHiveInputFormat. If
767 the test mode is set, the plan
768 is not converted, but a query property is set to denote the same.
769 </description>
770 </property>
771
772 <property>
773 <name>hive.new.job.grouping.set.cardinality</name>
774 <value>30</value>
775 <description>
776 Whether a new map-reduce job should be launched for
777 grouping
778 sets/rollups/cubes.
779 For a query like: select a, b, c,
780 count(1) from T group by a, b, c with
781 rollup;
782 4 rows are created per
783 row: (a, b, c), (a, b, null), (a, null, null),
784 (null, null, null).
785 This can lead to explosion across map-reduce boundary if the
786 cardinality
787 of T is very high,
788 and map-side aggregation does not do a
789 very good job.
790
791 This parameter decides if hive should add an
792 additional map-reduce job.
793 If the grouping set
794 cardinality (4 in the
795 example above), is more than this value, a new MR job is
796 added under
797 the
798 assumption that the orginal group by will reduce the data size.
799 </description>
800 </property>
801
802 <property>
803 <name>hive.join.emit.interval</name>
804 <value>1000</value>
805 <description>How many rows in the right-most join operand Hive should
806 buffer before emitting the join result.
807 </description>
808 </property>
809
810 <property>
811 <name>hive.join.cache.size</name>
812 <value>25000</value>
813 <description>How many rows in the joining tables (except the
814 streaming table) should be cached in memory.
815 </description>
816 </property>
817
818 <property>
819 <name>hive.mapjoin.bucket.cache.size</name>
820 <value>100</value>
821 <description>How many values in each keys in the map-joined table
822 should be cached in memory.
823 </description>
824 </property>
825
826 <property>
827 <name>hive.mapjoin.cache.numrows</name>
828 <value>25000</value>
829 <description>How many rows should be cached by jdbm for map join.
830 </description>
831 </property>
832
833 <property>
834 <name>hive.optimize.skewjoin</name>
835 <value>false</value>
836 <description>Whether to enable skew join optimization.
837 The algorithm
838 is as follows: At runtime, detect the keys with a large
839 skew. Instead
840 of
841 processing those keys, store them temporarily in a hdfs directory.
842 In a
843 follow-up map-reduce
844 job, process those skewed keys. The same key
845 need not be skewed for all
846 the tables, and so,
847 the follow-up
848 map-reduce job (for the skewed keys) would be much
849 faster, since it
850 would be a
851 map-join.
852 </description>
853 </property>
854
855 <property>
856 <name>hive.skewjoin.key</name>
857 <value>100000</value>
858 <description>Determine if we get a skew key in join. If we see more
859 than the specified number of rows with the same key in join
860 operator,
861 we think the key as a skew join key.
862 </description>
863 </property>
864
865 <property>
866 <name>hive.skewjoin.mapjoin.map.tasks</name>
867 <value>10000</value>
868 <description> Determine the number of map task used in the follow up
869 map join job
870 for a skew join. It should be used together with
871 hive.skewjoin.mapjoin.min.split
872 to perform a fine grained control.
873 </description>
874 </property>
875
876 <property>
877 <name>hive.skewjoin.mapjoin.min.split</name>
878 <value>33554432</value>
879 <description> Determine the number of map task at most used in the
880 follow up map join job
881 for a skew join by specifying the minimum
882 split size. It should be used
883 together with
884 hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.
885 </description>
886 </property>
887
888 <property>
889 <name>hive.mapred.mode</name>
890 <value>nonstrict</value>
891 <description>The mode in which the hive operations are being
892 performed.
893 In strict mode, some risky queries are not allowed to run.
894 They
895 include:
896 Cartesian Product.
897 No partition being picked up for a
898 query.
899 Comparing bigints and strings.
900 Comparing bigints and doubles.
901 Orderby without limit.
902 </description>
903 </property>
904
905 <property>
906 <name>hive.enforce.bucketmapjoin</name>
907 <value>false</value>
908 <description>If the user asked for bucketed map-side join, and it
909 cannot be performed,
910 should the query fail or not ? For eg, if the
911 buckets in the tables being
912 joined are
913 not a multiple of each other,
914 bucketed map-side join cannot be
915 performed, and the
916 query will fail if
917 hive.enforce.bucketmapjoin is set to true.
918 </description>
919 </property>
920
921 <property>
922 <name>hive.exec.script.maxerrsize</name>
923 <value>100000</value>
924 <description>Maximum number of bytes a script is allowed to emit to
925 standard error (per map-reduce task). This prevents runaway scripts
926 from filling logs partitions to capacity
927 </description>
928 </property>
929
930 <property>
931 <name>hive.exec.script.allow.partial.consumption</name>
932 <value>false</value>
933 <description> When enabled, this option allows a user script to exit
934 successfully without consuming all the data from the standard input.
935 </description>
936 </property>
937
938 <property>
939 <name>hive.script.operator.id.env.var</name>
940 <value>HIVE_SCRIPT_OPERATOR_ID</value>
941 <description> Name of the environment variable that holds the unique
942 script operator ID in the user's transform function (the custom
943 mapper/reducer that the user has specified in the query)
944 </description>
945 </property>
946
947 <property>
948 <name>hive.script.operator.truncate.env</name>
949 <value>false</value>
950 <description>Truncate each environment variable for external script
951 in scripts operator to 20KB (to fit system limits)
952 </description>
953 </property>
954
955 <property>
956 <name>hive.exec.compress.output</name>
957 <value>false</value>
958 <description> This controls whether the final outputs of a query (to
959 a local/hdfs file or a hive table) is compressed. The compression
960 codec and other options are determined from hadoop config variables
961 mapred.output.compress*
962 </description>
963 </property>
964
965 <property>
966 <name>hive.exec.compress.intermediate</name>
967 <value>false</value>
968 <description> This controls whether intermediate files produced by
969 hive between multiple map-reduce jobs are compressed. The
970 compression codec and other options are determined from hadoop
971 config variables mapred.output.compress*
972 </description>
973 </property>
974
975 <property>
976 <name>hive.exec.parallel</name>
977 <value>false</value>
978 <description>Whether to execute jobs in parallel</description>
979 </property>
980
981 <property>
982 <name>hive.exec.parallel.thread.number</name>
983 <value>8</value>
984 <description>How many jobs at most can be executed in parallel
985 </description>
986 </property>
987
988 <property>
989 <name>hive.exec.rowoffset</name>
990 <value>false</value>
991 <description>Whether to provide the row offset virtual column
992 </description>
993 </property>
994
995 <property>
996 <name>hive.task.progress</name>
997 <value>false</value>
998 <description>Whether Hive should periodically update task progress
999 counters during execution. Enabling this allows task progress to be
1000 monitored more closely in the job tracker, but may impose a
1001 performance penalty. This flag is automatically set to true for jobs
1002 with hive.exec.dynamic.partition set to true.
1003 </description>
1004 </property>
1005
1006 <property>
1007 <name>hive.hwi.war.file</name>
1008 <value>lib/hive-hwi-@VERSION@.war</value>
1009 <description>This sets the path to the HWI war file, relative to
1010 ${HIVE_HOME}.
1011 </description>
1012 </property>
1013
1014 <property>
1015 <name>hive.hwi.listen.host</name>
1016 <value>0.0.0.0</value>
1017 <description>This is the host address the Hive Web Interface will
1018 listen on
1019 </description>
1020 </property>
1021
1022 <property>
1023 <name>hive.hwi.listen.port</name>
1024 <value>9999</value>
1025 <description>This is the port the Hive Web Interface will listen on
1026 </description>
1027 </property>
1028
1029 <property>
1030 <name>hive.exec.pre.hooks</name>
1031 <value></value>
1032 <description>Comma-separated list of pre-execution hooks to be
1033 invoked for each statement. A pre-execution hook is specified as the
1034 name of a Java class which implements the
1035 org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
1036 </description>
1037 </property>
1038
1039 <property>
1040 <name>hive.exec.post.hooks</name>
1041 <value></value>
1042 <description>Comma-separated list of post-execution hooks to be
1043 invoked for each statement. A post-execution hook is specified as
1044 the name of a Java class which implements the
1045 org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
1046 </description>
1047 </property>
1048
1049 <property>
1050 <name>hive.exec.failure.hooks</name>
1051 <value></value>
1052 <description>Comma-separated list of on-failure hooks to be invoked
1053 for each statement. An on-failure hook is specified as the name of
1054 Java class which implements the
1055 org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
1056 </description>
1057 </property>
1058
1059 <property>
1060 <name>hive.metastore.init.hooks</name>
1061 <value></value>
1062 <description>A comma separated list of hooks to be invoked at the
1063 beginning of HMSHandler initialization. Aninit hook is specified as
1064 the name of Java class which extends
1065 org.apache.hadoop.hive.metastore.MetaStoreInitListener.
1066 </description>
1067 </property>
1068
1069 <property>
1070 <name>hive.client.stats.publishers</name>
1071 <value></value>
1072 <description>Comma-separated list of statistics publishers to be
1073 invoked on counters on each job. A client stats publisher is
1074 specified as the name of a Java class which implements the
1075 org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
1076 </description>
1077 </property>
1078
1079 <property>
1080 <name>hive.client.stats.counters</name>
1081 <value></value>
1082 <description>Subset of counters that should be of interest for
1083 hive.client.stats.publishers (when one wants to limit their
1084 publishing). Non-display names should be used
1085 </description>
1086 </property>
1087
1088 <property>
1089 <name>hive.merge.mapfiles</name>
1090 <value>true</value>
1091 <description>Merge small files at the end of a map-only job
1092 </description>
1093 </property>
1094
1095 <property>
1096 <name>hive.merge.mapredfiles</name>
1097 <value>false</value>
1098 <description>Merge small files at the end of a map-reduce job
1099 </description>
1100 </property>
1101
1102 <property>
1103 <name>hive.heartbeat.interval</name>
1104 <value>1000</value>
1105 <description>Send a heartbeat after this interval - used by mapjoin
1106 and filter operators
1107 </description>
1108 </property>
1109
1110 <property>
1111 <name>hive.merge.size.per.task</name>
1112 <value>256000000</value>
1113 <description>Size of merged files at the end of the job</description>
1114 </property>
1115
1116 <property>
1117 <name>hive.merge.smallfiles.avgsize</name>
1118 <value>16000000</value>
1119 <description>When the average output file size of a job is less than
1120 this number, Hive will start an additional map-reduce job to merge
1121 the output files into bigger files. This is only done for map-only
1122 jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
1123 hive.merge.mapredfiles is true.
1124 </description>
1125 </property>
1126
1127 <property>
1128 <name>hive.mapjoin.smalltable.filesize</name>
1129 <value>25000000</value>
1130 <description>The threshold for the input file size of the small
1131 tables; if the file size is smaller than this threshold, it will try
1132 to convert the common join into map join
1133 </description>
1134 </property>
1135
1136 <property>
1137 <name>hive.ignore.mapjoin.hint</name>
1138 <value>true</value>
1139 <description>Ignore the mapjoin hint</description>
1140 </property>
1141
1142 <property>
1143 <name>hive.mapjoin.localtask.max.memory.usage</name>
1144 <value>0.90</value>
1145 <description>This number means how much memory the local task can
1146 take to hold the key/value into in-memory hash table; If the local
1147 task's memory usage is more than this number, the local task will be
1148 abort by themself. It means the data of small table is too large to
1149 be hold in the memory.
1150 </description>
1151 </property>
1152
1153 <property>
1154 <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
1155 <value>0.55</value>
1156 <description>This number means how much memory the local task can
1157 take to hold the key/value into in-memory hash table when this map
1158 join followed by a group by; If the local task's memory usage is
1159 more than this number, the local task will be abort by themself. It
1160 means the data of small table is too large to be hold in the memory.
1161 </description>
1162 </property>
1163
1164 <property>
1165 <name>hive.mapjoin.check.memory.rows</name>
1166 <value>100000</value>
1167 <description>The number means after how many rows processed it needs
1168 to check the memory usage
1169 </description>
1170 </property>
1171
1172 <property>
1173 <name>hive.auto.convert.join</name>
1174 <value>false</value>
1175 <description>Whether Hive enable the optimization about converting
1176 common join into mapjoin based on the input file size
1177 </description>
1178 </property>
1179
1180 <property>
1181 <name>hive.auto.convert.join.noconditionaltask</name>
1182 <value>true</value>
1183 <description>Whether Hive enable the optimization about converting
1184 common join into mapjoin based on the input file
1185 size. If this
1186 paramater is on, and the sum of size for n-1 of the
1187 tables/partitions for a n-way join is smaller than the
1188 specified
1189 size, the join is directly converted to a mapjoin (there is no
1190 conditional task).
1191 </description>
1192 </property>
1193
1194 <property>
1195 <name>hive.auto.convert.join.noconditionaltask.size</name>
1196 <value>10000000</value>
1197 <description>If hive.auto.convert.join.noconditionaltask is off, this
1198 parameter does not take affect. However, if it
1199 is on, and the sum of
1200 size for n-1 of the tables/partitions for a
1201 n-way join is smaller
1202 than this size, the join is directly
1203 converted to a mapjoin(there is
1204 no conditional task). The default is 10MB
1205 </description>
1206 </property>
1207
1208 <property>
1209 <name>hive.optimize.mapjoin.mapreduce</name>
1210 <value>false</value>
1211 <description>If hive.auto.convert.join is off, this parameter does
1212 not take
1213 affect. If it is on, and if there are map-join jobs followed
1214 by a
1215 map-reduce
1216 job (for e.g a group by), each map-only job is merged
1217 with the
1218 following
1219 map-reduce job.
1220 </description>
1221 </property>
1222
1223 <property>
1224 <name>hive.script.auto.progress</name>
1225 <value>false</value>
1226 <description>Whether Hive Tranform/Map/Reduce Clause should
1227 automatically send progress information to TaskTracker to avoid the
1228 task getting killed because of inactivity. Hive sends progress
1229 information when the script is outputting to stderr. This option
1230 removes the need of periodically producing stderr messages, but
1231 users should be cautious because this may prevent infinite loops in
1232 the scripts to be killed by TaskTracker.
1233 </description>
1234 </property>
1235
1236 <property>
1237 <name>hive.script.serde</name>
1238 <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
1239 <description>The default serde for trasmitting input data to and
1240 reading output data from the user scripts.
1241 </description>
1242 </property>
1243
1244 <property>
1245 <name>hive.binary.record.max.length</name>
1246 <value>1000</value>
1247 <description>Read from a binary stream and treat each
1248 hive.binary.record.max.length bytes as a record.
1249 The last record
1250 before the end of stream can have less than
1251 hive.binary.record.max.length bytes
1252 </description>
1253 </property>
1254
1255
1256 <property>
1257 <name>hive.script.recordreader</name>
1258 <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
1259 <description>The default record reader for reading data from the user
1260 scripts.
1261 </description>
1262 </property>
1263
1264 <property>
1265 <name>hive.script.recordwriter</name>
1266 <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
1267 <description>The default record writer for writing data to the user
1268 scripts.
1269 </description>
1270 </property>
1271
1272 <property>
1273 <name>hive.input.format</name>
1274 <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
1275 <description>The default input format. Set this to HiveInputFormat if
1276 you encounter problems with CombineHiveInputFormat.
1277 </description>
1278 </property>
1279
1280 <property>
1281 <name>hive.udtf.auto.progress</name>
1282 <value>false</value>
1283 <description>Whether Hive should automatically send progress
1284 information to TaskTracker when using UDTF's to prevent the task
1285 getting killed because of inactivity. Users should be cautious
1286 because this may prevent TaskTracker from killing tasks with infinte
1287 loops.
1288 </description>
1289 </property>
1290
1291 <property>
1292 <name>hive.mapred.reduce.tasks.speculative.execution</name>
1293 <value>true</value>
1294 <description>Whether speculative execution for reducers should be
1295 turned on.
1296 </description>
1297 </property>
1298
1299 <property>
1300 <name>hive.exec.counters.pull.interval</name>
1301 <value>1000</value>
1302 <description>The interval with which to poll the JobTracker for the
1303 counters the running job. The smaller it is the more load there will
1304 be on the jobtracker, the higher it is the less granular the caught
1305 will be.
1306 </description>
1307 </property>
1308
1309 <property>
1310 <name>hive.querylog.location</name>
1311 <value>/tmp/${user.name}</value>
1312 <description>
1313 Location of Hive run time structured log file
1314 </description>
1315 </property>
1316
1317 <property>
1318 <name>hive.querylog.enable.plan.progress</name>
1319 <value>true</value>
1320 <description>
1321 Whether to log the plan's progress every time a job's
1322 progress is checked.
1323 These logs are written to the location specified
1324 by
1325 hive.querylog.location
1326 </description>
1327 </property>
1328
1329 <property>
1330 <name>hive.querylog.plan.progress.interval</name>
1331 <value>60000</value>
1332 <description>
1333 The interval to wait between logging the plan's progress
1334 in
1335 milliseconds.
1336 If there is a whole number percentage change in the
1337 progress of the
1338 mappers or the reducers,
1339 the progress is logged
1340 regardless of this value.
1341 The actual interval will be the ceiling of
1342 (this value divided by the
1343 value of
1344 hive.exec.counters.pull.interval)
1345 multiplied by the value of hive.exec.counters.pull.interval
1346 I.e. if
1347 it is not divide evenly by the value of
1348 hive.exec.counters.pull.interval it will be
1349 logged less frequently
1350 than specified.
1351 This only has an effect if
1352 hive.querylog.enable.plan.progress is set to
1353 true.
1354 </description>
1355 </property>
1356
1357 <property>
1358 <name>hive.enforce.bucketing</name>
1359 <value>false</value>
1360 <description>Whether bucketing is enforced. If true, while inserting
1361 into the table, bucketing is enforced.
1362 </description>
1363 </property>
1364
1365 <property>
1366 <name>hive.enforce.sorting</name>
1367 <value>false</value>
1368 <description>Whether sorting is enforced. If true, while inserting
1369 into the table, sorting is enforced.
1370 </description>
1371 </property>
1372
1373 <property>
1374 <name>hive.optimize.bucketingsorting</name>
1375 <value>true</value>
1376 <description>If hive.enforce.bucketing or hive.enforce.sorting is
1377 true, dont create a reducer for enforcing
1378 bucketing/sorting for
1379 queries of the form:
1380 insert overwrite table T2 select * from T1;
1381 where T1 and T2 are bucketed/sorted by the same keys into the same
1382 number
1383 of buckets.
1384 </description>
1385 </property>
1386
1387 <property>
1388 <name>hive.enforce.sortmergebucketmapjoin</name>
1389 <value>false</value>
1390 <description>If the user asked for sort-merge bucketed map-side join,
1391 and it cannot be performed,
1392 should the query fail or not ?
1393 </description>
1394 </property>
1395
1396 <property>
1397 <name>hive.auto.convert.sortmerge.join</name>
1398 <value>false</value>
1399 <description>Will the join be automatically converted to a sort-merge
1400 join, if the joined tables pass
1401 the criteria for sort-merge join.
1402 </description>
1403 </property>
1404
1405 <property>
1406 <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
1407 </name>
1408 <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
1409 </value>
1410 <description>The policy to choose the big table for automatic
1411 conversion to sort-merge join.
1412 By default, the table with the largest
1413 partitions is assigned the big
1414 table. All policies are:
1415 . based on
1416 position of the table - the leftmost table is selected
1417 org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
1418 . based on
1419 total size (all the partitions selected in the query) of
1420 the table
1421 org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
1422 . based on average size (all the partitions selected in the query)
1423 of the table
1424 org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
1425 New policies can be added in future.
1426 </description>
1427 </property>
1428
1429 <property>
1430 <name>hive.metastore.ds.connection.url.hook</name>
1431 <value></value>
1432 <description>Name of the hook to use for retriving the JDO connection
1433 URL. If empty, the value in javax.jdo.option.ConnectionURL is used
1434 </description>
1435 </property>
1436
1437 <property>
1438 <name>hive.metastore.ds.retry.attempts</name>
1439 <value>1</value>
1440 <description>The number of times to retry a metastore call if there
1441 were a connection error
1442 </description>
1443 </property>
1444
1445 <property>
1446 <name>hive.metastore.ds.retry.interval</name>
1447 <value>1000</value>
1448 <description>The number of miliseconds between metastore retry
1449 attempts
1450 </description>
1451 </property>
1452
1453 <property>
1454 <name>hive.metastore.server.min.threads</name>
1455 <value>200</value>
1456 <description>Minimum number of worker threads in the Thrift server's
1457 pool.
1458 </description>
1459 </property>
1460
1461 <property>
1462 <name>hive.metastore.server.max.threads</name>
1463 <value>100000</value>
1464 <description>Maximum number of worker threads in the Thrift server's
1465 pool.
1466 </description>
1467 </property>
1468
1469 <property>
1470 <name>hive.metastore.server.tcp.keepalive</name>
1471 <value>true</value>
1472 <description>Whether to enable TCP keepalive for the metastore
1473 server. Keepalive will prevent accumulation of half-open
1474 connections.
1475 </description>
1476 </property>
1477
1478 <property>
1479 <name>hive.metastore.sasl.enabled</name>
1480 <value>false</value>
1481 <description>If true, the metastore thrift interface will be secured
1482 with SASL. Clients must authenticate with Kerberos.
1483 </description>
1484 </property>
1485
1486 <property>
1487 <name>hive.metastore.thrift.framed.transport.enabled</name>
1488 <value>false</value>
1489 <description>If true, the metastore thrift interface will use
1490 TFramedTransport. When false (default) a standard TTransport is
1491 used.
1492 </description>
1493 </property>
1494
1495 <property>
1496 <name>hive.metastore.kerberos.keytab.file</name>
1497 <value></value>
1498 <description>The path to the Kerberos Keytab file containing the
1499 metastore thrift server's service principal.
1500 </description>
1501 </property>
1502
1503 <property>
1504 <name>hive.metastore.kerberos.principal</name>
1505 <value>hive-metastore/_HOST@EXAMPLE.COM</value>
1506 <description>The service principal for the metastore thrift server.
1507 The special string _HOST will be replaced automatically with the
1508 correct host name.
1509 </description>
1510 </property>
1511
1512 <property>
1513 <name>hive.cluster.delegation.token.store.class</name>
1514 <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
1515 <description>The delegation token store implementation. Set to
1516 org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
1517 cluster.
1518 </description>
1519 </property>
1520
1521 <property>
1522 <name>hive.cluster.delegation.token.store.zookeeper.connectString
1523 </name>
1524 <value>localhost:2181</value>
1525 <description>The ZooKeeper token store connect string.</description>
1526 </property>
1527
1528 <property>
1529 <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
1530 <value>/hive/cluster/delegation</value>
1531 <description>The root path for token store data.</description>
1532 </property>
1533
1534 <property>
1535 <name>hive.cluster.delegation.token.store.zookeeper.acl</name>
1536 <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
1537 </value>
1538 <description>ACL for token store entries. List comma separated all
1539 server principals for the cluster.
1540 </description>
1541 </property>
1542
1543 <property>
1544 <name>hive.metastore.cache.pinobjtypes</name>
1545 <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
1546 </value>
1547 <description>List of comma separated metastore object types that
1548 should be pinned in the cache
1549 </description>
1550 </property>
1551
1552 <property>
1553 <name>hive.optimize.reducededuplication</name>
1554 <value>true</value>
1555 <description>Remove extra map-reduce jobs if the data is already
1556 clustered by the same key which needs to be used again. This should
1557 always be set to true. Since it is a new feature, it has been made
1558 configurable.
1559 </description>
1560 </property>
1561
1562 <property>
1563 <name>hive.optimize.reducededuplication.min.reducer</name>
1564 <value>4</value>
1565 <description>Reduce deduplication merges two RSs by moving
1566 key/parts/reducer-num of the child RS to parent RS.
1567 That means if
1568 reducer-num of the child RS is fixed (order by or forced
1569 bucketing)
1570 and small, it can make very slow, single MR.
1571 The optimization will be
1572 disabled if number of reducers is less than
1573 specified value.
1574 </description>
1575 </property>
1576
1577 <property>
1578 <name>hive.exec.dynamic.partition</name>
1579 <value>true</value>
1580 <description>Whether or not to allow dynamic partitions in DML/DDL.
1581 </description>
1582 </property>
1583
1584 <property>
1585 <name>hive.exec.dynamic.partition.mode</name>
1586 <value>strict</value>
1587 <description>In strict mode, the user must specify at least one
1588 static partition in case the user accidentally overwrites all
1589 partitions.
1590 </description>
1591 </property>
1592
1593 <property>
1594 <name>hive.exec.max.dynamic.partitions</name>
1595 <value>1000</value>
1596 <description>Maximum number of dynamic partitions allowed to be
1597 created in total.
1598 </description>
1599 </property>
1600
1601 <property>
1602 <name>hive.exec.max.dynamic.partitions.pernode</name>
1603 <value>100</value>
1604 <description>Maximum number of dynamic partitions allowed to be
1605 created in each mapper/reducer node.
1606 </description>
1607 </property>
1608
1609 <property>
1610 <name>hive.exec.max.created.files</name>
1611 <value>100000</value>
1612 <description>Maximum number of HDFS files created by all
1613 mappers/reducers in a MapReduce job.
1614 </description>
1615 </property>
1616
1617 <property>
1618 <name>hive.exec.default.partition.name</name>
1619 <value>__HIVE_DEFAULT_PARTITION__</value>
1620 <description>The default partition name in case the dynamic partition
1621 column value is null/empty string or anyother values that cannot be
1622 escaped. This value must not contain any special character used in
1623 HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that
1624 the dynamic partition value should not contain this value to avoid
1625 confusions.
1626 </description>
1627 </property>
1628
1629 <property>
1630 <name>hive.stats.dbclass</name>
1631 <value>jdbc:derby</value>
1632 <description>The default database that stores temporary hive
1633 statistics.
1634 </description>
1635 </property>
1636
1637 <property>
1638 <name>hive.stats.autogather</name>
1639 <value>true</value>
1640 <description>A flag to gather statistics automatically during the
1641 INSERT OVERWRITE command.
1642 </description>
1643 </property>
1644
1645 <property>
1646 <name>hive.stats.jdbcdriver</name>
1647 <value>org.apache.derby.jdbc.EmbeddedDriver</value>
1648 <description>The JDBC driver for the database that stores temporary
1649 hive statistics.
1650 </description>
1651 </property>
1652
1653 <property>
1654 <name>hive.stats.dbconnectionstring</name>
1655 <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
1656 <description>The default connection string for the database that
1657 stores temporary hive statistics.
1658 </description>
1659 </property>
1660
1661 <property>
1662 <name>hive.stats.default.publisher</name>
1663 <value></value>
1664 <description>The Java class (implementing the StatsPublisher
1665 interface) that is used by default if hive.stats.dbclass is not JDBC
1666 or HBase.
1667 </description>
1668 </property>
1669
1670 <property>
1671 <name>hive.stats.default.aggregator</name>
1672 <value></value>
1673 <description>The Java class (implementing the StatsAggregator
1674 interface) that is used by default if hive.stats.dbclass is not JDBC
1675 or HBase.
1676 </description>
1677 </property>
1678
1679 <property>
1680 <name>hive.stats.jdbc.timeout</name>
1681 <value>30</value>
1682 <description>Timeout value (number of seconds) used by JDBC
1683 connection and statements.
1684 </description>
1685 </property>
1686
1687 <property>
1688 <name>hive.stats.retries.max</name>
1689 <value>0</value>
1690 <description>Maximum number of retries when stats
1691 publisher/aggregator got an exception updating intermediate
1692 database. Default is no tries on failures.
1693 </description>
1694 </property>
1695
1696 <property>
1697 <name>hive.stats.retries.wait</name>
1698 <value>3000</value>
1699 <description>The base waiting window (in milliseconds) before the
1700 next retry. The actual wait time is calculated by baseWindow *
1701 failues baseWindow * (failure 1) * (random number between
1702 [0.0,1.0]).
1703 </description>
1704 </property>
1705
1706 <property>
1707 <name>hive.stats.reliable</name>
1708 <value>false</value>
1709 <description>Whether queries will fail because stats cannot be
1710 collected completely accurately.
1711 If this is set to true,
1712 reading/writing from/into a partition may fail
1713 becuase the stats
1714 could not be computed accurately.
1715 </description>
1716 </property>
1717
1718 <property>
1719 <name>hive.stats.collect.tablekeys</name>
1720 <value>false</value>
1721 <description>Whether join and group by keys on tables are derived and
1722 maintained in the QueryPlan.
1723 This is useful to identify how tables
1724 are accessed and to determine if
1725 they should be bucketed.
1726 </description>
1727 </property>
1728
1729 <property>
1730 <name>hive.stats.collect.scancols</name>
1731 <value>false</value>
1732 <description>Whether column accesses are tracked in the QueryPlan.
1733 This is useful to identify how tables are accessed and to determine
1734 if there are wasted columns that can be trimmed.
1735 </description>
1736 </property>
1737
1738 <property>
1739 <name>hive.stats.ndv.error</name>
1740 <value>20.0</value>
1741 <description>Standard error expressed in percentage. Provides a
1742 tradeoff between accuracy and compute cost.A lower value for error
1743 indicates higher accuracy and a higher compute cost.
1744 </description>
1745 </property>
1746
1747 <property>
1748 <name>hive.stats.key.prefix.max.length</name>
1749 <value>200</value>
1750 <description>
1751 Determines if when the prefix of the key used for
1752 intermediate stats
1753 collection
1754 exceeds a certain length, a hash of the
1755 key is used instead. If the value
1756 &lt; 0 then hashing
1757 is never used,
1758 if the value >= 0 then hashing is used only when the
1759 key prefixes
1760 length
1761 exceeds that value. The key prefix is defined as everything
1762 preceding the
1763 task ID in the key.
1764 </description>
1765 </property>
1766
1767 <property>
1768 <name>hive.support.concurrency</name>
1769 <value>false</value>
1770 <description>Whether hive supports concurrency or not. A zookeeper
1771 instance must be up and running for the default hive lock manager to
1772 support read-write locks.
1773 </description>
1774 </property>
1775
1776 <property>
1777 <name>hive.lock.numretries</name>
1778 <value>100</value>
1779 <description>The number of times you want to try to get all the locks
1780 </description>
1781 </property>
1782
1783 <property>
1784 <name>hive.unlock.numretries</name>
1785 <value>10</value>
1786 <description>The number of times you want to retry to do one unlock
1787 </description>
1788 </property>
1789
1790 <property>
1791 <name>hive.lock.sleep.between.retries</name>
1792 <value>60</value>
1793 <description>The sleep time (in seconds) between various retries
1794 </description>
1795 </property>
1796
1797 <property>
1798 <name>hive.zookeeper.quorum</name>
1799 <value></value>
1800 <description>The list of zookeeper servers to talk to. This is only
1801 needed for read/write locks.
1802 </description>
1803 </property>
1804
1805 <property>
1806 <name>hive.zookeeper.client.port</name>
1807 <value>2181</value>
1808 <description>The port of zookeeper servers to talk to. This is only
1809 needed for read/write locks.
1810 </description>
1811 </property>
1812
1813 <property>
1814 <name>hive.zookeeper.session.timeout</name>
1815 <value>600000</value>
1816 <description>Zookeeper client's session timeout. The client is
1817 disconnected, and as a result, all locks released, if a heartbeat is
1818 not sent in the timeout.
1819 </description>
1820 </property>
1821
1822 <property>
1823 <name>hive.zookeeper.namespace</name>
1824 <value>hive_zookeeper_namespace</value>
1825 <description>The parent node under which all zookeeper nodes are
1826 created.
1827 </description>
1828 </property>
1829
1830 <property>
1831 <name>hive.zookeeper.clean.extra.nodes</name>
1832 <value>false</value>
1833 <description>Clean extra nodes at the end of the session.
1834 </description>
1835 </property>
1836
1837 <property>
1838 <name>fs.har.impl</name>
1839 <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
1840 <description>The implementation for accessing Hadoop Archives. Note
1841 that this won't be applicable to Hadoop vers less than 0.20
1842 </description>
1843 </property>
1844
1845 <property>
1846 <name>hive.archive.enabled</name>
1847 <value>false</value>
1848 <description>Whether archiving operations are permitted</description>
1849 </property>
1850
1851 <property>
1852 <name>hive.fetch.output.serde</name>
1853 <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
1854 <description>The serde used by FetchTask to serialize the fetch
1855 output.
1856 </description>
1857 </property>
1858
1859 <property>
1860 <name>hive.exec.mode.local.auto</name>
1861 <value>false</value>
1862 <description> Let hive determine whether to run in local mode
1863 automatically
1864 </description>
1865 </property>
1866
1867 <property>
1868 <name>hive.exec.drop.ignorenonexistent</name>
1869 <value>true</value>
1870 <description>
1871 Do not report an error if DROP TABLE/VIEW specifies a
1872 non-existent
1873 table/view
1874 </description>
1875 </property>
1876
1877 <property>
1878 <name>hive.exec.show.job.failure.debug.info</name>
1879 <value>true</value>
1880 <description>
1881 If a job fails, whether to provide a link in the CLI to
1882 the task with
1883 the
1884 most failures, along with debugging hints if
1885 applicable.
1886 </description>
1887 </property>
1888
1889 <property>
1890 <name>hive.auto.progress.timeout</name>
1891 <value>0</value>
1892 <description>
1893 How long to run autoprogressor for the script/UDTF
1894 operators (in
1895 seconds).
1896 Set to 0 for forever.
1897 </description>
1898 </property>
1899
1900 <!-- HBase Storage Handler Parameters -->
1901
1902 <property>
1903 <name>hive.hbase.wal.enabled</name>
1904 <value>true</value>
1905 <description>Whether writes to HBase should be forced to the
1906 write-ahead log. Disabling this improves HBase write performance at
1907 the risk of lost writes in case of a crash.
1908 </description>
1909 </property>
1910
1911 <property>
1912 <name>hive.table.parameters.default</name>
1913 <value></value>
1914 <description>Default property values for newly created tables
1915 </description>
1916 </property>
1917
1918 <property>
1919 <name>hive.entity.separator</name>
1920 <value>@</value>
1921 <description>Separator used to construct names of tables and
1922 partitions. For example, dbname@tablename@partitionname
1923 </description>
1924 </property>
1925
1926 <property>
1927 <name>hive.ddl.createtablelike.properties.whitelist</name>
1928 <value></value>
1929 <description>Table Properties to copy over when executing a Create
1930 Table Like.
1931 </description>
1932 </property>
1933
1934 <property>
1935 <name>hive.variable.substitute</name>
1936 <value>true</value>
1937 <description>This enables substitution using syntax like ${var}
1938 ${system:var} and ${env:var}.
1939 </description>
1940 </property>
1941
1942 <property>
1943 <name>hive.variable.substitute.depth</name>
1944 <value>40</value>
1945 <description>The maximum replacements the substitution engine will
1946 do.
1947 </description>
1948 </property>
1949
1950 <property>
1951 <name>hive.conf.validation</name>
1952 <value>true</value>
1953 <description>Eables type checking for registered hive configurations
1954 </description>
1955 </property>
1956
1957 <property>
1958 <name>hive.security.authorization.enabled</name>
1959 <value>false</value>
1960 <description>enable or disable the hive client authorization
1961 </description>
1962 </property>
1963
1964 <property>
1965 <name>hive.security.authorization.createtable.user.grants</name>
1966 <value></value>
1967 <description>the privileges automatically granted to some users
1968 whenever a table gets created.
1969 An example like
1970 "userX,userY:select;userZ:create" will grant select
1971 privilege to
1972 userX and userY,
1973 and grant create privilege to userZ whenever a new
1974 table created.
1975 </description>
1976 </property>
1977
1978 <property>
1979 <name>hive.security.authorization.createtable.group.grants</name>
1980 <value></value>
1981 <description>the privileges automatically granted to some groups
1982 whenever a table gets created.
1983 An example like
1984 "groupX,groupY:select;groupZ:create" will grant select
1985 privilege to
1986 groupX and groupY,
1987 and grant create privilege to groupZ whenever a
1988 new table created.
1989 </description>
1990 </property>
1991
1992 <property>
1993 <name>hive.security.authorization.createtable.role.grants</name>
1994 <value></value>
1995 <description>the privileges automatically granted to some roles
1996 whenever a table gets created.
1997 An example like
1998 "roleX,roleY:select;roleZ:create" will grant select
1999 privilege to
2000 roleX and roleY,
2001 and grant create privilege to roleZ whenever a new
2002 table created.
2003 </description>
2004 </property>
2005
2006 <property>
2007 <name>hive.security.authorization.createtable.owner.grants</name>
2008 <value></value>
2009 <description>the privileges automatically granted to the owner
2010 whenever a table gets created.
2011 An example like "select,drop" will
2012 grant select and drop privilege to
2013 the owner of the table
2014 </description>
2015 </property>
2016
2017 <property>
2018 <name>hive.metastore.authorization.storage.checks</name>
2019 <value>false</value>
2020 <description>Should the metastore do authorization checks against the
2021 underlying storage
2022 for operations like drop-partition (disallow the
2023 drop-partition if the
2024 user in
2025 question doesn't have permissions to
2026 delete the corresponding directory
2027 on the storage).
2028 </description>
2029 </property>
2030
2031 <property>
2032 <name>hive.error.on.empty.partition</name>
2033 <value>false</value>
2034 <description>Whether to throw an excpetion if dynamic partition
2035 insert generates empty results.
2036 </description>
2037 </property>
2038
2039 <property>
2040 <name>hive.index.compact.file.ignore.hdfs</name>
2041 <value>false</value>
2042 <description>True the hdfs location stored in the index file will be
2043 igbored at runtime.
2044 If the data got moved or the name of the cluster
2045 got changed, the
2046 index data should still be usable.
2047 </description>
2048 </property>
2049
2050 <property>
2051 <name>hive.optimize.index.filter.compact.minsize</name>
2052 <value>5368709120</value>
2053 <description>Minimum size (in bytes) of the inputs on which a compact
2054 index is automatically used.
2055 </description>
2056 </property>
2057
2058 <property>
2059 <name>hive.optimize.index.filter.compact.maxsize</name>
2060 <value>-1</value>
2061 <description>Maximum size (in bytes) of the inputs on which a compact
2062 index is automatically used.
2063 A negative number is equivalent to
2064 infinity.
2065 </description>
2066 </property>
2067
2068 <property>
2069 <name>hive.index.compact.query.max.size</name>
2070 <value>10737418240</value>
2071 <description>The maximum number of bytes that a query using the
2072 compact index can read. Negative value is equivalent to infinity.
2073 </description>
2074 </property>
2075
2076 <property>
2077 <name>hive.index.compact.query.max.entries</name>
2078 <value>10000000</value>
2079 <description>The maximum number of index entries to read during a
2080 query that uses the compact index. Negative value is equivalent to
2081 infinity.
2082 </description>
2083 </property>
2084
2085 <property>
2086 <name>hive.index.compact.binary.search</name>
2087 <value>true</value>
2088 <description>Whether or not to use a binary search to find the
2089 entries in an index table that match the filter, where possible
2090 </description>
2091 </property>
2092
2093 <property>
2094 <name>hive.exim.uri.scheme.whitelist</name>
2095 <value>hdfs,pfile</value>
2096 <description>A comma separated list of acceptable URI schemes for
2097 import and export.
2098 </description>
2099 </property>
2100
2101 <property>
2102 <name>hive.lock.mapred.only.operation</name>
2103 <value>false</value>
2104 <description>This param is to control whether or not only do lock on
2105 queries
2106 that need to execute at least one mapred job.
2107 </description>
2108 </property>
2109
2110 <property>
2111 <name>hive.limit.row.max.size</name>
2112 <value>100000</value>
2113 <description>When trying a smaller subset of data for simple LIMIT,
2114 how much size we need to guarantee
2115 each row to have at least.
2116 </description>
2117 </property>
2118
2119 <property>
2120 <name>hive.limit.optimize.limit.file</name>
2121 <value>10</value>
2122 <description>When trying a smaller subset of data for simple LIMIT,
2123 maximum number of files we can
2124 sample.
2125 </description>
2126 </property>
2127
2128 <property>
2129 <name>hive.limit.optimize.enable</name>
2130 <value>false</value>
2131 <description>Whether to enable to optimization to trying a smaller
2132 subset of data for simple LIMIT first.
2133 </description>
2134 </property>
2135
2136 <property>
2137 <name>hive.limit.optimize.fetch.max</name>
2138 <value>50000</value>
2139 <description>Maximum number of rows allowed for a smaller subset of
2140 data for simple LIMIT, if it is a fetch query.
2141 Insert queries are not
2142 restricted by this limit.
2143 </description>
2144 </property>
2145
2146 <property>
2147 <name>hive.rework.mapredwork</name>
2148 <value>false</value>
2149 <description>should rework the mapred work or not.
2150 This is first
2151 introduced by SymlinkTextInputFormat to replace symlink
2152 files with
2153 real paths at compile time.
2154 </description>
2155 </property>
2156
2157 <property>
2158 <name>hive.exec.concatenate.check.index</name>
2159 <value>true</value>
2160 <description>If this sets to true, hive will throw error when doing
2161 'alter table tbl_name [partSpec] concatenate' on a table/partition
2162 that has indexes on it. The reason the user want to set this to true
2163 is because it can help user to avoid handling all index drop,
2164 recreation,
2165 rebuild work. This is very helpful for tables with
2166 thousands of partitions.
2167 </description>
2168 </property>
2169
2170 <property>
2171 <name>hive.sample.seednumber</name>
2172 <value>0</value>
2173 <description>A number used to percentage sampling. By changing this
2174 number, user will change the subsets
2175 of data sampled.
2176 </description>
2177 </property>
2178
2179 <property>
2180 <name>hive.io.exception.handlers</name>
2181 <value></value>
2182 <description>A list of io exception handler class names. This is used
2183 to construct a list exception handlers to handle exceptions thrown
2184 by record readers
2185 </description>
2186 </property>
2187
2188 <property>
2189 <name>hive.autogen.columnalias.prefix.label</name>
2190 <value>_c</value>
2191 <description>String used as a prefix when auto generating column
2192 alias.
2193 By default the prefix label will be appended with a column
2194 position
2195 number to form the column alias. Auto generation would
2196 happen if an
2197 aggregate function is used in a select clause without an
2198 explicit
2199 alias.
2200 </description>
2201 </property>
2202
2203 <property>
2204 <name>hive.autogen.columnalias.prefix.includefuncname</name>
2205 <value>false</value>
2206 <description>Whether to include function name in the column alias
2207 auto generated by hive.
2208 </description>
2209 </property>
2210
2211 <property>
2212 <name>hive.exec.perf.logger</name>
2213 <value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
2214 <description>The class responsible logging client side performance
2215 metrics. Must be a subclass of
2216 org.apache.hadoop.hive.ql.log.PerfLogger
2217 </description>
2218 </property>
2219
2220 <property>
2221 <name>hive.start.cleanup.scratchdir</name>
2222 <value>false</value>
2223 <description>To cleanup the hive scratchdir while starting the hive
2224 server
2225 </description>
2226 </property>
2227
2228 <property>
2229 <name>hive.output.file.extension</name>
2230 <value></value>
2231 <description>String used as a file extension for output files. If not
2232 set, defaults to the codec extension for text files (e.g. ".gz"), or
2233 no extension otherwise.
2234 </description>
2235 </property>
2236
2237 <property>
2238 <name>hive.insert.into.multilevel.dirs</name>
2239 <value>false</value>
2240 <description>Where to insert into multilevel directories like
2241 "insert
2242 directory '/HIVEFT25686/chinna/' from table"
2243 </description>
2244 </property>
2245
2246 <property>
2247 <name>hive.warehouse.subdir.inherit.perms</name>
2248 <value>false</value>
2249 <description>Set this to true if the the table directories should
2250 inherit the
2251 permission of the warehouse or database directory instead
2252 of being created
2253 with the permissions derived from dfs umask
2254 </description>
2255 </property>
2256
2257 <property>
2258 <name>hive.exec.job.debug.capture.stacktraces</name>
2259 <value>true</value>
2260 <description>Whether or not stack traces parsed from the task logs of
2261 a sampled failed task for
2262 each failed job should be stored in the
2263 SessionState
2264 </description>
2265 </property>
2266
2267 <property>
2268 <name>hive.exec.driver.run.hooks</name>
2269 <value></value>
2270 <description>A comma separated list of hooks which implement
2271 HiveDriverRunHook and will be run at the
2272 beginning and end of
2273 Driver.run, these will be run in the order specified
2274 </description>
2275 </property>
2276
2277 <property>
2278 <name>hive.ddl.output.format</name>
2279 <value>text</value>
2280 <description>
2281 The data format to use for DDL output. One of "text"
2282 (for human
2283 readable text) or "json" (for a json object).
2284 </description>
2285 </property>
2286
2287 <property>
2288 <name>hive.transform.escape.input</name>
2289 <value>false</value>
2290 <description>
2291 This adds an option to escape special chars (newlines,
2292 carriage returns
2293 and
2294 tabs) when they are passed to the user script.
2295 This is useful if the hive
2296 tables
2297 can contain data that contains
2298 special characters.
2299 </description>
2300 </property>
2301
2302 <property>
2303 <name>hive.exec.rcfile.use.explicit.header</name>
2304 <value>true</value>
2305 <description>
2306 If this is set the header for RC Files will simply be
2307 RCF. If this is
2308 not
2309 set the header will be that borrowed from sequence
2310 files, e.g. SEQ-
2311 followed
2312 by the input and output RC File formats.
2313 </description>
2314 </property>
2315
2316 <property>
2317 <name>hive.multi.insert.move.tasks.share.dependencies</name>
2318 <value>false</value>
2319 <description>
2320 If this is set all move tasks for tables/partitions (not
2321 directories)
2322 at the end of a
2323 multi-insert query will only begin once
2324 the dependencies for all these move
2325 tasks have been
2326 met.
2327 Advantages: If
2328 concurrency is enabled, the locks will only be released once the
2329 query has
2330 finished, so with this config enabled, the time when the
2331 table/partition is
2332 generated will be much closer to when the lock on
2333 it is released.
2334 Disadvantages: If concurrency is not enabled, with
2335 this disabled,
2336 the tables/partitions which
2337 are produced by this query
2338 and finish earlier will be available for
2339 querying
2340 much earlier. Since
2341 the locks are only released once the query finishes,
2342 this
2343 does not
2344 apply if concurrency is enabled.
2345 </description>
2346 </property>
2347
2348 <property>
2349 <name>hive.fetch.task.conversion</name>
2350 <value>minimal</value>
2351 <description>
2352 Some select queries can be converted to single FETCH
2353 task minimizing
2354 latency.
2355 Currently the query should be single sourced
2356 not having any subquery and
2357 should not have
2358 any aggregations or
2359 distincts (which incurrs RS), lateral views and
2360 joins.
2361 1. minimal :
2362 SELECT STAR, FILTER on partition columns, LIMIT only
2363 2. more :
2364 SELECT, FILTER, LIMIT only (TABLESAMPLE, virtual columns)
2365 </description>
2366 </property>
2367
2368 <property>
2369 <name>hive.hmshandler.retry.attempts</name>
2370 <value>1</value>
2371 <description>The number of times to retry a HMSHandler call if there
2372 were a connection error
2373 </description>
2374 </property>
2375
2376 <property>
2377 <name>hive.hmshandler.retry.interval</name>
2378 <value>1000</value>
2379 <description>The number of miliseconds between HMSHandler retry
2380 attempts
2381 </description>
2382 </property>
2383
2384 <property>
2385 <name>hive.server.read.socket.timeout</name>
2386 <value>10</value>
2387 <description>Timeout for the HiveServer to close the connection if no
2388 response from the client in N seconds, defaults to 10 seconds.
2389 </description>
2390 </property>
2391
2392 <property>
2393 <name>hive.server.tcp.keepalive</name>
2394 <value>true</value>
2395 <description>Whether to enable TCP keepalive for the Hive server.
2396 Keepalive will prevent accumulation of half-open connections.
2397 </description>
2398 </property>
2399
2400 <property>
2401 <name>hive.decode.partition.name</name>
2402 <value>false</value>
2403 <description>Whether to show the unquoted partition names in query
2404 results.
2405 </description>
2406 </property>
2407
2408 <property>
2409 <name>hive.log4j.file</name>
2410 <value></value>
2411 <description>Hive log4j configuration file.
2412 If the property is not
2413 set, then logging will be initialized using
2414 hive-log4j.properties
2415 found on the classpath.
2416 If the property is set, the value must be a
2417 valid URI (java.net.URI,
2418 e.g. "file:///tmp/my-logging.properties"),
2419 which you can then
2420 extract a URL from and pass to
2421 PropertyConfigurator.configure(URL).
2422 </description>
2423 </property>
2424
2425 <property>
2426 <name>hive.exec.log4j.file</name>
2427 <value></value>
2428 <description>Hive log4j configuration file for execution mode(sub
2429 command).
2430 If the property is not set, then logging will be
2431 initialized using
2432 hive-exec-log4j.properties found on the classpath.
2433 If the property is set, the value must be a valid URI (java.net.URI,
2434 e.g. "file:///tmp/my-logging.properties"), which you can then
2435 extract a URL from and pass to PropertyConfigurator.configure(URL).
2436 </description>
2437 </property>
2438
2439 <property>
2440 <name>hive.exec.infer.bucket.sort</name>
2441 <value>false</value>
2442 <description>
2443 If this is set, when writing partitions, the metadata
2444 will include the
2445 bucketing/sorting
2446 properties with which the data was
2447 written if any (this will not overwrite the
2448 metadata
2449 inherited from
2450 the table if the table is bucketed/sorted)
2451 </description>
2452 </property>
2453
2454 <property>
2455 <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
2456 <value>false</value>
2457 <description>
2458 If this is set, when setting the number of reducers for
2459 the map reduce
2460 task which writes the
2461 final output files, it will
2462 choose a number which is a power of two,
2463 unless the user specifies
2464 the number of reducers to use using mapred.reduce.tasks. The number
2465 of
2466 reducers
2467 may be set to a power of two, only to be followed by a
2468 merge task
2469 meaning preventing
2470 anything from being inferred.
2471 With
2472 hive.exec.infer.bucket.sort set to true:
2473 Advantages: If this is not
2474 set, the number of buckets for partitions will seem
2475 arbitrary,
2476 which
2477 means that the number of mappers used for optimized joins, for
2478 example, will
2479 be very low. With this set, since the number of buckets
2480 used for any
2481 partition is
2482 a power of two, the number of mappers used
2483 for optimized joins will
2484 be the least
2485 number of buckets used by any
2486 partition being joined.
2487 Disadvantages: This may mean a much larger or
2488 much smaller number of reducers
2489 being used in the
2490 final map reduce
2491 job, e.g. if a job was originally going to take 257
2492 reducers,
2493 it will
2494 now take 512 reducers, similarly if the max number of reducers
2495 is
2496 511,
2497 and a job was going to use this many, it will now use 256
2498 reducers.
2499
2500 </description>
2501 </property>
2502
2503 <property>
2504 <name>hive.groupby.orderby.position.alias</name>
2505 <value>false</value>
2506 <description>Whether to enable using Column Position Alias in Group
2507 By or Order By
2508 </description>
2509 </property>
2510
2511 <property>
2512 <name>hive.server2.thrift.min.worker.threads</name>
2513 <value>5</value>
2514 <description>Minimum number of Thrift worker threads</description>
2515 </property>
2516
2517 <property>
2518 <name>hive.server2.thrift.max.worker.threads</name>
2519 <value>100</value>
2520 <description>Maximum number of Thrift worker threads</description>
2521 </property>
2522
2523 <property>
2524 <name>hive.server2.thrift.port</name>
2525 <value>10000</value>
2526 <description>Port number of HiveServer2 Thrift interface.
2527 Can be
2528 overridden by setting $HIVE_SERVER2_THRIFT_PORT
2529 </description>
2530 </property>
2531
2532 <property>
2533 <name>hive.server2.thrift.bind.host</name>
2534 <value>localhost</value>
2535 <description>Bind host on which to run the HiveServer2 Thrift
2536 interface.
2537 Can be overridden by setting
2538 $HIVE_SERVER2_THRIFT_BIND_HOST
2539 </description>
2540 </property>
2541
2542 <property>
2543 <name>hive.server2.authentication</name>
2544 <value>NONE</value>
2545 <description>
2546 Client authentication types.
2547 NONE: no authentication
2548 check
2549 LDAP: LDAP/AD based authentication
2550 KERBEROS: Kerberos/GSSAPI
2551 authentication
2552 CUSTOM: Custom authentication provider
2553 (Use with
2554 property hive.server2.custom.authentication.class)
2555 </description>
2556 </property>
2557
2558 <property>
2559 <name>hive.server2.custom.authentication.class</name>
2560 <value></value>
2561 <description>
2562 Custom authentication class. Used when property
2563 'hive.server2.authentication' is set to 'CUSTOM'. Provided class
2564 must be a proper implementation of the interface
2565 org.apache.hive.service.auth.PasswdAuthenticationProvider.
2566 HiveServer2
2567 will call its Authenticate(user, passed) method to
2568 authenticate
2569 requests.
2570 The implementation may optionally extend the
2571 Hadoop's
2572 org.apache.hadoop.conf.Configured class to grab Hive's
2573 Configuration
2574 object.
2575 </description>
2576 </property>
2577
2578 <property>
2579 <name>>hive.server2.authentication.kerberos.principal</name>
2580 <value></value>
2581 <description>
2582 Kerberos server principal
2583 </description>
2584 </property>
2585
2586 <property>
2587 <name>>hive.server2.authentication.kerberos.keytab</name>
2588 <value></value>
2589 <description>
2590 Kerberos keytab file for server principal
2591 </description>
2592 </property>
2593
2594 <property>
2595 <name>hive.server2.authentication.ldap.url</name>
2596 <value></value>
2597 <description>
2598 LDAP connection URL
2599 </description>
2600 </property>
2601
2602 <property>
2603 <name>hive.server2.authentication.ldap.baseDN</name>
2604 <value></value>
2605 <description>
2606 LDAP base DN
2607 </description>
2608 </property>
2609
2610 <property>
2611 <name>hive.server2.enable.doAs</name>
2612 <value>true</value>
2613 <description>
2614 Setting this property to true will have hive server2
2615 execute
2616 hive operations as the user making the calls to it.
2617 </description>
2618 </property>
2619
2620
2621 </configuration>
2622
2623 <!-- Hive Execution Parameters -->
2624 <property>
2625 <name>mapred.reduce.tasks</name>
2626 <value>-1</value>
2627 <description>The default number of reduce tasks per job. Typically set
2628 to a prime close to the number of available hosts. Ignored when
2629 mapred.job.tracker is "local". Hadoop set this to 1 by default,
2630 whereas hive uses -1 as its default value.
2631 By setting this property to
2632 -1, Hive will automatically figure out what
2633 should be the number of
2634 reducers.
2635 </description>
2636 </property>
2637
2638 <property>
2639 <name>hive.exec.reducers.bytes.per.reducer</name>
2640 <value>1000000000</value>
2641 <description>size per reducer.The default is 1G, i.e if the input size
2642 is 10G, it will use 10 reducers.
2643 </description>
2644 </property>
2645
2646 <property>
2647 <name>hive.exec.reducers.max</name>
2648 <value>999</value>
2649 <description>max number of reducers will be used. If the one
2650 specified
2651 in the configuration parameter mapred.reduce.tasks is
2652 negative, hive
2653 will use this one as the max number of reducers when
2654 automatically
2655 determine number of reducers.
2656 </description>
2657 </property>
2658
2659 <property>
2660 <name>hive.cli.print.header</name>
2661 <value>false</value>
2662 <description>Whether to print the names of the columns in query
2663 output.
2664 </description>
2665 </property>
2666
2667 <property>
2668 <name>hive.cli.print.current.db</name>
2669 <value>false</value>
2670 <description>Whether to include the current database in the hive
2671 prompt.
2672 </description>
2673 </property>
2674
2675 <property>
2676 <name>hive.cli.prompt</name>
2677 <value>hive</value>
2678 <description>Command line prompt configuration value. Other hiveconf
2679 can be used in
2680 this configuration value. Variable substitution will
2681 only be invoked at
2682 the hive
2683 cli startup.
2684 </description>
2685 </property>
2686
2687 <property>
2688 <name>hive.cli.pretty.output.num.cols</name>
2689 <value>-1</value>
2690 <description>The number of columns to use when formatting output
2691 generated
2692 by the DESCRIBE PRETTY table_name command. If the value of
2693 this
2694 property
2695 is -1, then hive will use the auto-detected terminal
2696 width.
2697 </description>
2698 </property>
2699
2700 <property>
2701 <name>hive.exec.scratchdir</name>
2702 <value>/tmp/hive-${user.name}</value>
2703 <description>Scratch space for Hive jobs</description>
2704 </property>
2705
2706 <property>
2707 <name>hive.exec.local.scratchdir</name>
2708 <value>/tmp/${user.name}</value>
2709 <description>Local scratch space for Hive jobs</description>
2710 </property>
2711
2712 <property>
2713 <name>hive.test.mode</name>
2714 <value>false</value>
2715 <description>whether hive is running in test mode. If yes, it turns on
2716 sampling and prefixes the output tablename
2717 </description>
2718 </property>
2719
2720 <property>
2721 <name>hive.test.mode.prefix</name>
2722 <value>test_</value>
2723 <description>if hive is running in test mode, prefixes the output
2724 table by this string
2725 </description>
2726 </property>
2727
2728 <!-- If the input table is not bucketed, the denominator of the tablesample
2729 is determinied by the parameter below -->
2730 <!-- For example, the following query: -->
2731 <!-- INSERT OVERWRITE TABLE dest -->
2732 <!-- SELECT col1 from src -->
2733 <!-- would be converted to -->
2734 <!-- INSERT OVERWRITE TABLE test_dest -->
2735 <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
2736 <property>
2737 <name>hive.test.mode.samplefreq</name>
2738 <value>32</value>
2739 <description>if hive is running in test mode and table is not
2740 bucketed, sampling frequency
2741 </description>
2742 </property>
2743
2744 <property>
2745 <name>hive.test.mode.nosamplelist</name>
2746 <value></value>
2747 <description>if hive is running in test mode, dont sample the above
2748 comma seperated list of tables
2749 </description>
2750 </property>
2751
2752 <property>
2753 <name>hive.metastore.uris</name>
2754 <value></value>
2755 <description>Thrift uri for the remote metastore. Used by metastore
2756 client to connect to remote metastore.
2757 </description>
2758 </property>
2759
2760 <property>
2761 <name>javax.jdo.option.ConnectionURL</name>
2762 <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
2763 <description>JDBC connect string for a JDBC metastore</description>
2764 </property>
2765
2766 <property>
2767 <name>javax.jdo.option.ConnectionDriverName</name>
2768 <value>org.apache.derby.jdbc.EmbeddedDriver</value>
2769 <description>Driver class name for a JDBC metastore</description>
2770 </property>
2771
2772 <property>
2773 <name>javax.jdo.PersistenceManagerFactoryClass</name>
2774 <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
2775 <description>class implementing the jdo persistence</description>
2776 </property>
2777
2778 <property>
2779 <name>javax.jdo.option.DetachAllOnCommit</name>
2780 <value>true</value>
2781 <description>detaches all objects from session so that they can be
2782 used after transaction is committed
2783 </description>
2784 </property>
2785
2786 <property>
2787 <name>javax.jdo.option.NonTransactionalRead</name>
2788 <value>true</value>
2789 <description>reads outside of transactions</description>
2790 </property>
2791
2792 <property>
2793 <name>javax.jdo.option.ConnectionUserName</name>
2794 <value>APP</value>
2795 <description>username to use against metastore database</description>
2796 </property>
2797
2798 <property>
2799 <name>javax.jdo.option.ConnectionPassword</name>
2800 <value>mine</value>
2801 <description>password to use against metastore database</description>
2802 </property>
2803
2804 <property>
2805 <name>javax.jdo.option.Multithreaded</name>
2806 <value>true</value>
2807 <description>Set this to true if multiple threads access metastore
2808 through JDO concurrently.
2809 </description>
2810 </property>
2811
2812 <property>
2813 <name>datanucleus.connectionPoolingType</name>
2814 <value>DBCP</value>
2815 <description>Uses a DBCP connection pool for JDBC metastore
2816 </description>
2817 </property>
2818
2819 <property>
2820 <name>datanucleus.validateTables</name>
2821 <value>false</value>
2822 <description>validates existing schema against code. turn this on if
2823 you want to verify existing schema
2824 </description>
2825 </property>
2826
2827 <property>
2828 <name>datanucleus.validateColumns</name>
2829 <value>false</value>
2830 <description>validates existing schema against code. turn this on if
2831 you want to verify existing schema
2832 </description>
2833 </property>
2834
2835 <property>
2836 <name>datanucleus.validateConstraints</name>
2837 <value>false</value>
2838 <description>validates existing schema against code. turn this on if
2839 you want to verify existing schema
2840 </description>
2841 </property>
2842
2843 <property>
2844 <name>datanucleus.storeManagerType</name>
2845 <value>rdbms</value>
2846 <description>metadata store type</description>
2847 </property>
2848
2849 <property>
2850 <name>datanucleus.autoCreateSchema</name>
2851 <value>true</value>
2852 <description>creates necessary schema on a startup if one doesn't
2853 exist. set this to false, after creating it once
2854 </description>
2855 </property>
2856
2857 <property>
2858 <name>datanucleus.autoStartMechanismMode</name>
2859 <value>checked</value>
2860 <description>throw exception if metadata tables are incorrect
2861 </description>
2862 </property>
2863
2864 <property>
2865 <name>datanucleus.transactionIsolation</name>
2866 <value>read-committed</value>
2867 <description>Default transaction isolation level for identity
2868 generation.
2869 </description>
2870 </property>
2871
2872 <property>
2873 <name>datanucleus.cache.level2</name>
2874 <value>false</value>
2875 <description>Use a level 2 cache. Turn this off if metadata is changed
2876 independently of hive metastore server
2877 </description>
2878 </property>
2879
2880 <property>
2881 <name>datanucleus.cache.level2.type</name>
2882 <value>SOFT</value>
2883 <description>SOFT=soft reference based cache, WEAK=weak reference
2884 based cache.
2885 </description>
2886 </property>
2887
2888 <property>
2889 <name>datanucleus.identifierFactory</name>
2890 <value>datanucleus</value>
2891 <description>Name of the identifier factory to use when generating
2892 table/column names etc. 'datanucleus' is used for backward
2893 compatibility
2894 </description>
2895 </property>
2896
2897 <property>
2898 <name>datanucleus.plugin.pluginRegistryBundleCheck</name>
2899 <value>LOG</value>
2900 <description>Defines what happens when plugin bundles are found and
2901 are duplicated [EXCEPTION|LOG|NONE]
2902 </description>
2903 </property>
2904
2905 <property>
2906 <name>hive.metastore.warehouse.dir</name>
2907 <value>/user/hive/warehouse</value>
2908 <description>location of default database for the warehouse
2909 </description>
2910 </property>
2911
2912 <property>
2913 <name>hive.metastore.execute.setugi</name>
2914 <value>false</value>
2915 <description>In unsecure mode, setting this property to true will
2916 cause the metastore to execute DFS operations using the client's
2917 reported user and group permissions. Note that this property must be
2918 set on both the client and server sides. Further note that its best
2919 effort. If client sets its to true and server sets it to false,
2920 client setting will be ignored.
2921 </description>
2922 </property>
2923
2924 <property>
2925 <name>hive.metastore.event.listeners</name>
2926 <value></value>
2927 <description>list of comma seperated listeners for metastore events.
2928 </description>
2929 </property>
2930
2931 <property>
2932 <name>hive.metastore.partition.inherit.table.properties</name>
2933 <value></value>
2934 <description>list of comma seperated keys occurring in table
2935 properties which will get inherited to newly created partitions. *
2936 implies all the keys will get inherited.
2937 </description>
2938 </property>
2939
2940 <property>
2941 <name>hive.metadata.export.location</name>
2942 <value></value>
2943 <description>When used in conjunction with the
2944 org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
2945 listener, it is the location to which the metadata will be exported.
2946 The default is an empty string, which results in the metadata being
2947 exported to the current user's home directory on HDFS.
2948 </description>
2949 </property>
2950
2951 <property>
2952 <name>hive.metadata.move.exported.metadata.to.trash</name>
2953 <value></value>
2954 <description>When used in conjunction with the
2955 org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
2956 listener, this setting determines if the metadata that is exported
2957 will subsequently be moved to the user's trash directory alongside
2958 the dropped table data. This ensures that the metadata will be
2959 cleaned up along with the dropped table data.
2960 </description>
2961 </property>
2962
2963 <property>
2964 <name>hive.metastore.partition.name.whitelist.pattern</name>
2965 <value></value>
2966 <description>Partition names will be checked against this regex
2967 pattern and rejected if not matched.
2968 </description>
2969 </property>
2970
2971 <property>
2972 <name>hive.metastore.end.function.listeners</name>
2973 <value></value>
2974 <description>list of comma separated listeners for the end of
2975 metastore functions.
2976 </description>
2977 </property>
2978
2979 <property>
2980 <name>hive.metastore.event.expiry.duration</name>
2981 <value>0</value>
2982 <description>Duration after which events expire from events table (in
2983 seconds)
2984 </description>
2985 </property>
2986
2987 <property>
2988 <name>hive.metastore.event.clean.freq</name>
2989 <value>0</value>
2990 <description>Frequency at which timer task runs to purge expired
2991 events in metastore(in seconds).
2992 </description>
2993 </property>
2994
2995 <property>
2996 <name>hive.metastore.connect.retries</name>
2997 <value>5</value>
2998 <description>Number of retries while opening a connection to metastore
2999 </description>
3000 </property>
3001
3002 <property>
3003 <name>hive.metastore.failure.retries</name>
3004 <value>3</value>
3005 <description>Number of retries upon failure of Thrift metastore calls
3006 </description>
3007 </property>
3008
3009 <property>
3010 <name>hive.metastore.client.connect.retry.delay</name>
3011 <value>1</value>
3012 <description>Number of seconds for the client to wait between
3013 consecutive connection attempts
3014 </description>
3015 </property>
3016
3017 <property>
3018 <name>hive.metastore.client.socket.timeout</name>
3019 <value>20</value>
3020 <description>MetaStore Client socket timeout in seconds</description>
3021 </property>
3022
3023 <property>
3024 <name>hive.metastore.rawstore.impl</name>
3025 <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
3026 <description>Name of the class that implements
3027 org.apache.hadoop.hive.metastore.rawstore interface. This class is
3028 used to store and retrieval of raw metadata objects such as table,
3029 database
3030 </description>
3031 </property>
3032
3033 <property>
3034 <name>hive.metastore.batch.retrieve.max</name>
3035 <value>300</value>
3036 <description>Maximum number of objects (tables/partitions) can be
3037 retrieved from metastore in one batch. The higher the number, the
3038 less the number of round trips is needed to the Hive metastore
3039 server, but it may also cause higher memory requirement at the client
3040 side.
3041 </description>
3042 </property>
3043
3044 <property>
3045 <name>hive.metastore.batch.retrieve.table.partition.max</name>
3046 <value>1000</value>
3047 <description>Maximum number of table partitions that metastore
3048 internally retrieves in one batch.
3049 </description>
3050 </property>
3051
3052 <property>
3053 <name>hive.default.fileformat</name>
3054 <value>TextFile</value>
3055 <description>Default file format for CREATE TABLE statement. Options
3056 are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
3057 ... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
3058 </property>
3059
3060 <property>
3061 <name>hive.fileformat.check</name>
3062 <value>true</value>
3063 <description>Whether to check file format or not when loading data
3064 files
3065 </description>
3066 </property>
3067
3068 <property>
3069 <name>hive.map.aggr</name>
3070 <value>true</value>
3071 <description>Whether to use map-side aggregation in Hive Group By
3072 queries
3073 </description>
3074 </property>
3075
3076 <property>
3077 <name>hive.groupby.skewindata</name>
3078 <value>false</value>
3079 <description>Whether there is skew in data to optimize group by
3080 queries
3081 </description>
3082 </property>
3083
3084 <property>
3085 <name>hive.optimize.multigroupby.common.distincts</name>
3086 <value>true</value>
3087 <description>Whether to optimize a multi-groupby query with the same
3088 distinct.
3089 Consider a query like:
3090
3091 from src
3092 insert overwrite table dest1
3093 select col1, count(distinct colx) group by
3094 col1
3095 insert overwrite table
3096 dest2 select col2, count(distinct colx) group by
3097 col2;
3098
3099 With this
3100 parameter set to true, first we spray by the distinct value
3101 (colx),
3102 and then
3103 perform the 2 groups bys. This makes sense if map-side
3104 aggregation is turned
3105 off. However,
3106 with maps-side aggregation, it
3107 might be useful in some cases to treat the
3108 2 inserts independently,
3109 thereby performing the query above in 2MR jobs instead of 3 (due to
3110 spraying
3111 by distinct key first).
3112 If this parameter is turned off, we
3113 dont consider the fact that the
3114 distinct key is the same across
3115 different MR jobs.
3116 </description>
3117 </property>
3118
3119 <property>
3120 <name>hive.groupby.mapaggr.checkinterval</name>
3121 <value>100000</value>
3122 <description>Number of rows after which size of the grouping
3123 keys/aggregation classes is performed
3124 </description>
3125 </property>
3126
3127 <property>
3128 <name>hive.mapred.local.mem</name>
3129 <value>0</value>
3130 <description>For local mode, memory of the mappers/reducers
3131 </description>
3132 </property>
3133
3134 <property>
3135 <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
3136 <value>0.3</value>
3137 <description>Portion of total memory to be used by map-side grup
3138 aggregation hash table, when this group by is followed by map join
3139 </description>
3140 </property>
3141
3142 <property>
3143 <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
3144 <value>0.9</value>
3145 <description>The max memory to be used by map-side grup aggregation
3146 hash table, if the memory usage is higher than this number, force to
3147 flush data
3148 </description>
3149 </property>
3150
3151 <property>
3152 <name>hive.map.aggr.hash.percentmemory</name>
3153 <value>0.5</value>
3154 <description>Portion of total memory to be used by map-side grup
3155 aggregation hash table
3156 </description>
3157 </property>
3158
3159 <property>
3160 <name>hive.map.aggr.hash.min.reduction</name>
3161 <value>0.5</value>
3162 <description>Hash aggregation will be turned off if the ratio between
3163 hash
3164 table size and input rows is bigger than this number. Set to 1 to
3165 make
3166 sure
3167 hash aggregation is never turned off.
3168 </description>
3169 </property>
3170
3171 <property>
3172 <name>hive.optimize.cp</name>
3173 <value>true</value>
3174 <description>Whether to enable column pruner</description>
3175 </property>
3176
3177 <property>
3178 <name>hive.optimize.index.filter</name>
3179 <value>false</value>
3180 <description>Whether to enable automatic use of indexes</description>
3181 </property>
3182
3183 <property>
3184 <name>hive.optimize.index.groupby</name>
3185 <value>false</value>
3186 <description>Whether to enable optimization of group-by queries using
3187 Aggregate indexes.
3188 </description>
3189 </property>
3190
3191 <property>
3192 <name>hive.optimize.ppd</name>
3193 <value>true</value>
3194 <description>Whether to enable predicate pushdown</description>
3195 </property>
3196
3197 <property>
3198 <name>hive.optimize.ppd.storage</name>
3199 <value>true</value>
3200 <description>Whether to push predicates down into storage handlers.
3201 Ignored when hive.optimize.ppd is false.
3202 </description>
3203 </property>
3204
3205 <property>
3206 <name>hive.ppd.recognizetransivity</name>
3207 <value>true</value>
3208 <description>Whether to transitively replicate predicate filters over
3209 equijoin conditions.
3210 </description>
3211 </property>
3212
3213 <property>
3214 <name>hive.optimize.groupby</name>
3215 <value>true</value>
3216 <description>Whether to enable the bucketed group by from bucketed
3217 partitions/tables.
3218 </description>
3219 </property>
3220
3221 <property>
3222 <name>hive.optimize.skewjoin.compiletime</name>
3223 <value>false</value>
3224 <description>Whether to create a separate plan for skewed keys for the
3225 tables in the join.
3226 This is based on the skewed keys stored in the
3227 metadata. At compile time,
3228 the plan is broken
3229 into different joins: one
3230 for the skewed keys, and the other for the
3231 remaining keys. And then,
3232 a
3233 union is performed for the 2 joins generated above. So unless the
3234 same skewed key is present
3235 in both the joined tables, the join for the
3236 skewed key will be
3237 performed as a map-side join.
3238
3239 The main difference
3240 between this paramater and hive.optimize.skewjoin is
3241 that this
3242 parameter
3243 uses the skew information stored in the metastore to
3244 optimize the plan at
3245 compile time itself.
3246 If there is no skew
3247 information in the metadata, this parameter will
3248 not have any affect.
3249 Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
3250 should
3251 be set to true.
3252 Ideally, hive.optimize.skewjoin should be
3253 renamed as
3254 hive.optimize.skewjoin.runtime, but not doing
3255 so for
3256 backward compatibility.
3257
3258 If the skew information is correctly stored in
3259 the metadata,
3260 hive.optimize.skewjoin.compiletime
3261 would change the query
3262 plan to take care of it, and hive.optimize.skewjoin
3263 will be a no-op.
3264 </description>
3265 </property>
3266
3267 <property>
3268 <name>hive.optimize.union.remove</name>
3269 <value>false</value>
3270 <description>
3271 Whether to remove the union and push the operators
3272 between union and the
3273 filesink above
3274 union. This avoids an extra scan
3275 of the output by union. This is
3276 independently useful for union
3277 queries, and specially useful when hive.optimize.skewjoin.compiletime
3278 is set
3279 to true, since an
3280 extra union is inserted.
3281
3282 The merge is triggered
3283 if either of hive.merge.mapfiles or
3284 hive.merge.mapredfiles is set to
3285 true.
3286 If the user has set hive.merge.mapfiles to true and
3287 hive.merge.mapredfiles to false, the idea was the
3288 number of reducers
3289 are few, so the number of files anyway are small.
3290 However, with this
3291 optimization,
3292 we are increasing the number of files possibly by a big
3293 margin. So, we
3294 merge aggresively.
3295 </description>
3296 </property>
3297
3298 <property>
3299 <name>hive.mapred.supports.subdirectories</name>
3300 <value>false</value>
3301 <description>Whether the version of hadoop which is running supports
3302 sub-directories for tables/partitions.
3303 Many hive optimizations can be
3304 applied if the hadoop version supports
3305 sub-directories for
3306 tables/partitions. It was added by MAPREDUCE-1501
3307 </description>
3308 </property>
3309
3310 <property>
3311 <name>hive.multigroupby.singlemr</name>
3312 <value>false</value>
3313 <description>Whether to optimize multi group by query to generate
3314 single M/R
3315 job plan. If the multi group by query has common group by
3316 keys, it will
3317 be
3318 optimized to generate single M/R job.
3319 </description>
3320 </property>
3321
3322 <property>
3323 <name>hive.map.groupby.sorted</name>
3324 <value>false</value>
3325 <description>If the bucketing/sorting properties of the table exactly
3326 match the grouping key, whether to
3327 perform the group by in the mapper
3328 by using BucketizedHiveInputFormat. The
3329 only downside to this
3330 is that
3331 it limits the number of mappers to the number of files.
3332 </description>
3333 </property>
3334
3335 <property>
3336 <name>hive.map.groupby.sorted.testmode</name>
3337 <value>false</value>
3338 <description>If the bucketing/sorting properties of the table exactly
3339 match the grouping key, whether to
3340 perform the group by in the mapper
3341 by using BucketizedHiveInputFormat. If
3342 the test mode is set, the plan
3343 is not converted, but a query property is set to denote the same.
3344 </description>
3345 </property>
3346
3347 <property>
3348 <name>hive.new.job.grouping.set.cardinality</name>
3349 <value>30</value>
3350 <description>
3351 Whether a new map-reduce job should be launched for
3352 grouping
3353 sets/rollups/cubes.
3354 For a query like: select a, b, c, count(1)
3355 from T group by a, b, c with
3356 rollup;
3357 4 rows are created per row: (a, b,
3358 c), (a, b, null), (a, null, null),
3359 (null, null, null).
3360 This can lead to
3361 explosion across map-reduce boundary if the cardinality
3362 of T is very
3363 high,
3364 and map-side aggregation does not do a very good job.
3365
3366 This
3367 parameter decides if hive should add an additional map-reduce job.
3368 If
3369 the grouping set
3370 cardinality (4 in the example above), is more than
3371 this value, a new MR job is
3372 added under the
3373 assumption that the orginal
3374 group by will reduce the data size.
3375 </description>
3376 </property>
3377
3378 <property>
3379 <name>hive.join.emit.interval</name>
3380 <value>1000</value>
3381 <description>How many rows in the right-most join operand Hive should
3382 buffer before emitting the join result.
3383 </description>
3384 </property>
3385
3386 <property>
3387 <name>hive.join.cache.size</name>
3388 <value>25000</value>
3389 <description>How many rows in the joining tables (except the streaming
3390 table) should be cached in memory.
3391 </description>
3392 </property>
3393
3394 <property>
3395 <name>hive.mapjoin.bucket.cache.size</name>
3396 <value>100</value>
3397 <description>How many values in each keys in the map-joined table
3398 should be cached in memory.
3399 </description>
3400 </property>
3401
3402 <property>
3403 <name>hive.mapjoin.cache.numrows</name>
3404 <value>25000</value>
3405 <description>How many rows should be cached by jdbm for map join.
3406 </description>
3407 </property>
3408
3409 <property>
3410 <name>hive.optimize.skewjoin</name>
3411 <value>false</value>
3412 <description>Whether to enable skew join optimization.
3413 The algorithm is
3414 as follows: At runtime, detect the keys with a large
3415 skew. Instead of
3416 processing those keys, store them temporarily in a hdfs directory. In
3417 a
3418 follow-up map-reduce
3419 job, process those skewed keys. The same key
3420 need not be skewed for all
3421 the tables, and so,
3422 the follow-up map-reduce
3423 job (for the skewed keys) would be much faster,
3424 since it would be a
3425 map-join.
3426 </description>
3427 </property>
3428
3429 <property>
3430 <name>hive.skewjoin.key</name>
3431 <value>100000</value>
3432 <description>Determine if we get a skew key in join. If we see more
3433 than the specified number of rows with the same key in join operator,
3434 we think the key as a skew join key.
3435 </description>
3436 </property>
3437
3438 <property>
3439 <name>hive.skewjoin.mapjoin.map.tasks</name>
3440 <value>10000</value>
3441 <description> Determine the number of map task used in the follow up
3442 map join job
3443 for a skew join. It should be used together with
3444 hive.skewjoin.mapjoin.min.split
3445 to perform a fine grained control.
3446 </description>
3447 </property>
3448
3449 <property>
3450 <name>hive.skewjoin.mapjoin.min.split</name>
3451 <value>33554432</value>
3452 <description> Determine the number of map task at most used in the
3453 follow up map join job
3454 for a skew join by specifying the minimum split
3455 size. It should be used
3456 together with
3457 hive.skewjoin.mapjoin.map.tasks
3458 to perform a fine grained control.
3459 </description>
3460 </property>
3461
3462 <property>
3463 <name>hive.mapred.mode</name>
3464 <value>nonstrict</value>
3465 <description>The mode in which the hive operations are being
3466 performed.
3467 In strict mode, some risky queries are not allowed to run.
3468 They
3469 include:
3470 Cartesian Product.
3471 No partition being picked up for a
3472 query.
3473 Comparing bigints and strings.
3474 Comparing bigints and doubles.
3475 Orderby without limit.
3476 </description>
3477 </property>
3478
3479 <property>
3480 <name>hive.enforce.bucketmapjoin</name>
3481 <value>false</value>
3482 <description>If the user asked for bucketed map-side join, and it
3483 cannot be performed,
3484 should the query fail or not ? For eg, if the
3485 buckets in the tables being
3486 joined are
3487 not a multiple of each other,
3488 bucketed map-side join cannot be
3489 performed, and the
3490 query will fail if
3491 hive.enforce.bucketmapjoin is set to true.
3492 </description>
3493 </property>
3494
3495 <property>
3496 <name>hive.exec.script.maxerrsize</name>
3497 <value>100000</value>
3498 <description>Maximum number of bytes a script is allowed to emit to
3499 standard error (per map-reduce task). This prevents runaway scripts
3500 from filling logs partitions to capacity
3501 </description>
3502 </property>
3503
3504 <property>
3505 <name>hive.exec.script.allow.partial.consumption</name>
3506 <value>false</value>
3507 <description> When enabled, this option allows a user script to exit
3508 successfully without consuming all the data from the standard input.
3509 </description>
3510 </property>
3511
3512 <property>
3513 <name>hive.script.operator.id.env.var</name>
3514 <value>HIVE_SCRIPT_OPERATOR_ID</value>
3515 <description> Name of the environment variable that holds the unique
3516 script operator ID in the user's transform function (the custom
3517 mapper/reducer that the user has specified in the query)
3518 </description>
3519 </property>
3520
3521 <property>
3522 <name>hive.script.operator.truncate.env</name>
3523 <value>false</value>
3524 <description>Truncate each environment variable for external script in
3525 scripts operator to 20KB (to fit system limits)
3526 </description>
3527 </property>
3528
3529 <property>
3530 <name>hive.exec.compress.output</name>
3531 <value>false</value>
3532 <description> This controls whether the final outputs of a query (to a
3533 local/hdfs file or a hive table) is compressed. The compression codec
3534 and other options are determined from hadoop config variables
3535 mapred.output.compress*
3536 </description>
3537 </property>
3538
3539 <property>
3540 <name>hive.exec.compress.intermediate</name>
3541 <value>false</value>
3542 <description> This controls whether intermediate files produced by
3543 hive between multiple map-reduce jobs are compressed. The compression
3544 codec and other options are determined from hadoop config variables
3545 mapred.output.compress*
3546 </description>
3547 </property>
3548
3549 <property>
3550 <name>hive.exec.parallel</name>
3551 <value>false</value>
3552 <description>Whether to execute jobs in parallel</description>
3553 </property>
3554
3555 <property>
3556 <name>hive.exec.parallel.thread.number</name>
3557 <value>8</value>
3558 <description>How many jobs at most can be executed in parallel
3559 </description>
3560 </property>
3561
3562 <property>
3563 <name>hive.exec.rowoffset</name>
3564 <value>false</value>
3565 <description>Whether to provide the row offset virtual column
3566 </description>
3567 </property>
3568
3569 <property>
3570 <name>hive.task.progress</name>
3571 <value>false</value>
3572 <description>Whether Hive should periodically update task progress
3573 counters during execution. Enabling this allows task progress to be
3574 monitored more closely in the job tracker, but may impose a
3575 performance penalty. This flag is automatically set to true for jobs
3576 with hive.exec.dynamic.partition set to true.
3577 </description>
3578 </property>
3579
3580 <property>
3581 <name>hive.hwi.war.file</name>
3582 <value>lib/hive-hwi-@VERSION@.war</value>
3583 <description>This sets the path to the HWI war file, relative to
3584 ${HIVE_HOME}.
3585 </description>
3586 </property>
3587
3588 <property>
3589 <name>hive.hwi.listen.host</name>
3590 <value>0.0.0.0</value>
3591 <description>This is the host address the Hive Web Interface will
3592 listen on
3593 </description>
3594 </property>
3595
3596 <property>
3597 <name>hive.hwi.listen.port</name>
3598 <value>9999</value>
3599 <description>This is the port the Hive Web Interface will listen on
3600 </description>
3601 </property>
3602
3603 <property>
3604 <name>hive.exec.pre.hooks</name>
3605 <value></value>
3606 <description>Comma-separated list of pre-execution hooks to be invoked
3607 for each statement. A pre-execution hook is specified as the name of
3608 a Java class which implements the
3609 org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
3610 </description>
3611 </property>
3612
3613 <property>
3614 <name>hive.exec.post.hooks</name>
3615 <value></value>
3616 <description>Comma-separated list of post-execution hooks to be
3617 invoked for each statement. A post-execution hook is specified as the
3618 name of a Java class which implements the
3619 org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
3620 </description>
3621 </property>
3622
3623 <property>
3624 <name>hive.exec.failure.hooks</name>
3625 <value></value>
3626 <description>Comma-separated list of on-failure hooks to be invoked
3627 for each statement. An on-failure hook is specified as the name of
3628 Java class which implements the
3629 org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
3630 </description>
3631 </property>
3632
3633 <property>
3634 <name>hive.metastore.init.hooks</name>
3635 <value></value>
3636 <description>A comma separated list of hooks to be invoked at the
3637 beginning of HMSHandler initialization. Aninit hook is specified as
3638 the name of Java class which extends
3639 org.apache.hadoop.hive.metastore.MetaStoreInitListener.
3640 </description>
3641 </property>
3642
3643 <property>
3644 <name>hive.client.stats.publishers</name>
3645 <value></value>
3646 <description>Comma-separated list of statistics publishers to be
3647 invoked on counters on each job. A client stats publisher is
3648 specified as the name of a Java class which implements the
3649 org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
3650 </description>
3651 </property>
3652
3653 <property>
3654 <name>hive.client.stats.counters</name>
3655 <value></value>
3656 <description>Subset of counters that should be of interest for
3657 hive.client.stats.publishers (when one wants to limit their
3658 publishing). Non-display names should be used
3659 </description>
3660 </property>
3661
3662 <property>
3663 <name>hive.merge.mapfiles</name>
3664 <value>true</value>
3665 <description>Merge small files at the end of a map-only job
3666 </description>
3667 </property>
3668
3669 <property>
3670 <name>hive.merge.mapredfiles</name>
3671 <value>false</value>
3672 <description>Merge small files at the end of a map-reduce job
3673 </description>
3674 </property>
3675
3676 <property>
3677 <name>hive.heartbeat.interval</name>
3678 <value>1000</value>
3679 <description>Send a heartbeat after this interval - used by mapjoin
3680 and filter operators
3681 </description>
3682 </property>
3683
3684 <property>
3685 <name>hive.merge.size.per.task</name>
3686 <value>256000000</value>
3687 <description>Size of merged files at the end of the job</description>
3688 </property>
3689
3690 <property>
3691 <name>hive.merge.smallfiles.avgsize</name>
3692 <value>16000000</value>
3693 <description>When the average output file size of a job is less than
3694 this number, Hive will start an additional map-reduce job to merge
3695 the output files into bigger files. This is only done for map-only
3696 jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
3697 hive.merge.mapredfiles is true.
3698 </description>
3699 </property>
3700
3701 <property>
3702 <name>hive.mapjoin.smalltable.filesize</name>
3703 <value>25000000</value>
3704 <description>The threshold for the input file size of the small
3705 tables; if the file size is smaller than this threshold, it will try
3706 to convert the common join into map join
3707 </description>
3708 </property>
3709
3710 <property>
3711 <name>hive.ignore.mapjoin.hint</name>
3712 <value>true</value>
3713 <description>Ignore the mapjoin hint</description>
3714 </property>
3715
3716 <property>
3717 <name>hive.mapjoin.localtask.max.memory.usage</name>
3718 <value>0.90</value>
3719 <description>This number means how much memory the local task can take
3720 to hold the key/value into in-memory hash table; If the local task's
3721 memory usage is more than this number, the local task will be abort
3722 by themself. It means the data of small table is too large to be hold
3723 in the memory.
3724 </description>
3725 </property>
3726
3727 <property>
3728 <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
3729 <value>0.55</value>
3730 <description>This number means how much memory the local task can take
3731 to hold the key/value into in-memory hash table when this map join
3732 followed by a group by; If the local task's memory usage is more than
3733 this number, the local task will be abort by themself. It means the
3734 data of small table is too large to be hold in the memory.
3735 </description>
3736 </property>
3737
3738 <property>
3739 <name>hive.mapjoin.check.memory.rows</name>
3740 <value>100000</value>
3741 <description>The number means after how many rows processed it needs
3742 to check the memory usage
3743 </description>
3744 </property>
3745
3746 <property>
3747 <name>hive.auto.convert.join</name>
3748 <value>false</value>
3749 <description>Whether Hive enable the optimization about converting
3750 common join into mapjoin based on the input file size
3751 </description>
3752 </property>
3753
3754 <property>
3755 <name>hive.auto.convert.join.noconditionaltask</name>
3756 <value>true</value>
3757 <description>Whether Hive enable the optimization about converting
3758 common join into mapjoin based on the input file
3759 size. If this
3760 paramater is on, and the sum of size for n-1 of the
3761 tables/partitions
3762 for a n-way join is smaller than the
3763 specified size, the join is
3764 directly converted to a mapjoin (there is no
3765 conditional task).
3766 </description>
3767 </property>
3768
3769 <property>
3770 <name>hive.auto.convert.join.noconditionaltask.size</name>
3771 <value>10000000</value>
3772 <description>If hive.auto.convert.join.noconditionaltask is off, this
3773 parameter does not take affect. However, if it
3774 is on, and the sum of
3775 size for n-1 of the tables/partitions for a n-way
3776 join is smaller than
3777 this size, the join is directly
3778 converted to a mapjoin(there is no
3779 conditional task). The default is 10MB
3780 </description>
3781 </property>
3782
3783 <property>
3784 <name>hive.optimize.mapjoin.mapreduce</name>
3785 <value>false</value>
3786 <description>If hive.auto.convert.join is off, this parameter does not
3787 take
3788 affect. If it is on, and if there are map-join jobs followed by a
3789 map-reduce
3790 job (for e.g a group by), each map-only job is merged with
3791 the
3792 following
3793 map-reduce job.
3794 </description>
3795 </property>
3796
3797 <property>
3798 <name>hive.script.auto.progress</name>
3799 <value>false</value>
3800 <description>Whether Hive Tranform/Map/Reduce Clause should
3801 automatically send progress information to TaskTracker to avoid the
3802 task getting killed because of inactivity. Hive sends progress
3803 information when the script is outputting to stderr. This option
3804 removes the need of periodically producing stderr messages, but users
3805 should be cautious because this may prevent infinite loops in the
3806 scripts to be killed by TaskTracker.
3807 </description>
3808 </property>
3809
3810 <property>
3811 <name>hive.script.serde</name>
3812 <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
3813 <description>The default serde for trasmitting input data to and
3814 reading output data from the user scripts.
3815 </description>
3816 </property>
3817
3818 <property>
3819 <name>hive.binary.record.max.length</name>
3820 <value>1000</value>
3821 <description>Read from a binary stream and treat each
3822 hive.binary.record.max.length bytes as a record.
3823 The last record
3824 before the end of stream can have less than
3825 hive.binary.record.max.length bytes
3826 </description>
3827 </property>
3828
3829
3830 <property>
3831 <name>hive.script.recordreader</name>
3832 <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
3833 <description>The default record reader for reading data from the user
3834 scripts.
3835 </description>
3836 </property>
3837
3838 <property>
3839 <name>hive.script.recordwriter</name>
3840 <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
3841 <description>The default record writer for writing data to the user
3842 scripts.
3843 </description>
3844 </property>
3845
3846 <property>
3847 <name>hive.input.format</name>
3848 <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
3849 <description>The default input format. Set this to HiveInputFormat if
3850 you encounter problems with CombineHiveInputFormat.
3851 </description>
3852 </property>
3853
3854 <property>
3855 <name>hive.udtf.auto.progress</name>
3856 <value>false</value>
3857 <description>Whether Hive should automatically send progress
3858 information to TaskTracker when using UDTF's to prevent the task
3859 getting killed because of inactivity. Users should be cautious
3860 because this may prevent TaskTracker from killing tasks with infinte
3861 loops.
3862 </description>
3863 </property>
3864
3865 <property>
3866 <name>hive.mapred.reduce.tasks.speculative.execution</name>
3867 <value>true</value>
3868 <description>Whether speculative execution for reducers should be
3869 turned on.
3870 </description>
3871 </property>
3872
3873 <property>
3874 <name>hive.exec.counters.pull.interval</name>
3875 <value>1000</value>
3876 <description>The interval with which to poll the JobTracker for the
3877 counters the running job. The smaller it is the more load there will
3878 be on the jobtracker, the higher it is the less granular the caught
3879 will be.
3880 </description>
3881 </property>
3882
3883 <property>
3884 <name>hive.querylog.location</name>
3885 <value>/tmp/${user.name}</value>
3886 <description>
3887 Location of Hive run time structured log file
3888 </description>
3889 </property>
3890
3891 <property>
3892 <name>hive.querylog.enable.plan.progress</name>
3893 <value>true</value>
3894 <description>
3895 Whether to log the plan's progress every time a job's
3896 progress is checked.
3897 These logs are written to the location specified
3898 by
3899 hive.querylog.location
3900 </description>
3901 </property>
3902
3903 <property>
3904 <name>hive.querylog.plan.progress.interval</name>
3905 <value>60000</value>
3906 <description>
3907 The interval to wait between logging the plan's progress
3908 in
3909 milliseconds.
3910 If there is a whole number percentage change in the
3911 progress of the
3912 mappers or the reducers,
3913 the progress is logged
3914 regardless of this value.
3915 The actual interval will be the ceiling of
3916 (this value divided by the
3917 value of
3918 hive.exec.counters.pull.interval)
3919 multiplied by the value of hive.exec.counters.pull.interval
3920 I.e. if it
3921 is not divide evenly by the value of
3922 hive.exec.counters.pull.interval
3923 it will be
3924 logged less frequently than specified.
3925 This only has an
3926 effect if hive.querylog.enable.plan.progress is set to
3927 true.
3928 </description>
3929 </property>
3930
3931 <property>
3932 <name>hive.enforce.bucketing</name>
3933 <value>false</value>
3934 <description>Whether bucketing is enforced. If true, while inserting
3935 into the table, bucketing is enforced.
3936 </description>
3937 </property>
3938
3939 <property>
3940 <name>hive.enforce.sorting</name>
3941 <value>false</value>
3942 <description>Whether sorting is enforced. If true, while inserting
3943 into the table, sorting is enforced.
3944 </description>
3945 </property>
3946
3947 <property>
3948 <name>hive.optimize.bucketingsorting</name>
3949 <value>true</value>
3950 <description>If hive.enforce.bucketing or hive.enforce.sorting is
3951 true, dont create a reducer for enforcing
3952 bucketing/sorting for
3953 queries of the form:
3954 insert overwrite table T2 select * from T1;
3955 where
3956 T1 and T2 are bucketed/sorted by the same keys into the same number
3957 of buckets.
3958 </description>
3959 </property>
3960
3961 <property>
3962 <name>hive.enforce.sortmergebucketmapjoin</name>
3963 <value>false</value>
3964 <description>If the user asked for sort-merge bucketed map-side join,
3965 and it cannot be performed,
3966 should the query fail or not ?
3967 </description>
3968 </property>
3969
3970 <property>
3971 <name>hive.auto.convert.sortmerge.join</name>
3972 <value>false</value>
3973 <description>Will the join be automatically converted to a sort-merge
3974 join, if the joined tables pass
3975 the criteria for sort-merge join.
3976 </description>
3977 </property>
3978
3979 <property>
3980 <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
3981 </name>
3982 <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
3983 </value>
3984 <description>The policy to choose the big table for automatic
3985 conversion to sort-merge join.
3986 By default, the table with the largest
3987 partitions is assigned the big
3988 table. All policies are:
3989 . based on
3990 position of the table - the leftmost table is selected
3991 org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
3992 . based on
3993 total size (all the partitions selected in the query) of
3994 the table
3995 org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
3996 . based on average size (all the partitions selected in the query) of
3997 the table
3998 org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
3999 New policies can be added in future.
4000 </description>
4001 </property>
4002
4003 <property>
4004 <name>hive.metastore.ds.connection.url.hook</name>
4005 <value></value>
4006 <description>Name of the hook to use for retriving the JDO connection
4007 URL. If empty, the value in javax.jdo.option.ConnectionURL is used
4008 </description>
4009 </property>
4010
4011 <property>
4012 <name>hive.metastore.ds.retry.attempts</name>
4013 <value>1</value>
4014 <description>The number of times to retry a metastore call if there
4015 were a connection error
4016 </description>
4017 </property>
4018
4019 <property>
4020 <name>hive.metastore.ds.retry.interval</name>
4021 <value>1000</value>
4022 <description>The number of miliseconds between metastore retry
4023 attempts
4024 </description>
4025 </property>
4026
4027 <property>
4028 <name>hive.metastore.server.min.threads</name>
4029 <value>200</value>
4030 <description>Minimum number of worker threads in the Thrift server's
4031 pool.
4032 </description>
4033 </property>
4034
4035 <property>
4036 <name>hive.metastore.server.max.threads</name>
4037 <value>100000</value>
4038 <description>Maximum number of worker threads in the Thrift server's
4039 pool.
4040 </description>
4041 </property>
4042
4043 <property>
4044 <name>hive.metastore.server.tcp.keepalive</name>
4045 <value>true</value>
4046 <description>Whether to enable TCP keepalive for the metastore server.
4047 Keepalive will prevent accumulation of half-open connections.
4048 </description>
4049 </property>
4050
4051 <property>
4052 <name>hive.metastore.sasl.enabled</name>
4053 <value>false</value>
4054 <description>If true, the metastore thrift interface will be secured
4055 with SASL. Clients must authenticate with Kerberos.
4056 </description>
4057 </property>
4058
4059 <property>
4060 <name>hive.metastore.thrift.framed.transport.enabled</name>
4061 <value>false</value>
4062 <description>If true, the metastore thrift interface will use
4063 TFramedTransport. When false (default) a standard TTransport is used.
4064 </description>
4065 </property>
4066
4067 <property>
4068 <name>hive.metastore.kerberos.keytab.file</name>
4069 <value></value>
4070 <description>The path to the Kerberos Keytab file containing the
4071 metastore thrift server's service principal.
4072 </description>
4073 </property>
4074
4075 <property>
4076 <name>hive.metastore.kerberos.principal</name>
4077 <value>hive-metastore/_HOST@EXAMPLE.COM</value>
4078 <description>The service principal for the metastore thrift server.
4079 The special string _HOST will be replaced automatically with the
4080 correct host name.
4081 </description>
4082 </property>
4083
4084 <property>
4085 <name>hive.cluster.delegation.token.store.class</name>
4086 <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
4087 <description>The delegation token store implementation. Set to
4088 org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
4089 cluster.
4090 </description>
4091 </property>
4092
4093 <property>
4094 <name>hive.cluster.delegation.token.store.zookeeper.connectString
4095 </name>
4096 <value>localhost:2181</value>
4097 <description>The ZooKeeper token store connect string.</description>
4098 </property>
4099
4100 <property>
4101 <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
4102 <value>/hive/cluster/delegation</value>
4103 <description>The root path for token store data.</description>
4104 </property>
4105
4106 <property>
4107 <name>hive.cluster.delegation.token.store.zookeeper.acl</name>
4108 <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
4109 </value>
4110 <description>ACL for token store entries. List comma separated all
4111 server principals for the cluster.
4112 </description>
4113 </property>
4114
4115 <property>
4116 <name>hive.metastore.cache.pinobjtypes</name>
4117 <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
4118 </value>
4119 <description>List of comma separated metastore object types that
4120 should be pinned in the cache
4121 </description>
4122 </property>
4123
4124 <property>
4125 <name>hive.optimize.reducededuplication</name>
4126 <value>true</value>
4127 <description>Remove extra map-reduce jobs if the data is already
4128 clustered by the same key which needs to be used again. This should
4129 always be set to true. Since it is a new feature, it has been made
4130 configurable.
4131 </description>
4132 </property>
4133
4134 <property>
4135 <name>hive.optimize.reducededuplication.min.reducer</name>
4136 <value>4</value>
4137 <description>Reduce deduplication merges two RSs by moving
4138 key/parts/reducer-num of the child RS to parent RS.
4139 That means if
4140 reducer-num of the child RS is fixed (order by or forced
4141 bucketing)
4142 and small, it can make very slow, single MR.
4143 The optimization will be
4144 disabled if number of reducers is less than
4145 specified value.
4146 </description>
4147 </property>
4148
4149 <property>
4150 <name>hive.exec.dynamic.partition</name>
4151 <value>true</value>
4152 <description>Whether or not to allow dynamic partitions in DML/DDL.
4153 </description>
4154 </property>
4155
4156 <property>
4157 <name>hive.exec.dynamic.partition.mode</name>
4158 <value>strict</value>
4159 <description>In strict mode, the user must specify at least one static
4160 partition in case the user accidentally overwrites all partitions.
4161 </description>
4162 </property>
4163
4164 <property>
4165 <name>hive.exec.max.dynamic.partitions</name>
4166 <value>1000</value>
4167 <description>Maximum number of dynamic partitions allowed to be
4168 created in total.
4169 </description>
4170 </property>
4171
4172 <property>
4173 <name>hive.exec.max.dynamic.partitions.pernode</name>
4174 <value>100</value>
4175 <description>Maximum number of dynamic partitions allowed to be
4176 created in each mapper/reducer node.
4177 </description>
4178 </property>
4179
4180 <property>
4181 <name>hive.exec.max.created.files</name>
4182 <value>100000</value>
4183 <description>Maximum number of HDFS files created by all
4184 mappers/reducers in a MapReduce job.
4185 </description>
4186 </property>
4187
4188 <property>
4189 <name>hive.exec.default.partition.name</name>
4190 <value>__HIVE_DEFAULT_PARTITION__</value>
4191 <description>The default partition name in case the dynamic partition
4192 column value is null/empty string or anyother values that cannot be
4193 escaped. This value must not contain any special character used in
4194 HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
4195 dynamic partition value should not contain this value to avoid
4196 confusions.
4197 </description>
4198 </property>
4199
4200 <property>
4201 <name>hive.stats.dbclass</name>
4202 <value>jdbc:derby</value>
4203 <description>The default database that stores temporary hive
4204 statistics.
4205 </description>
4206 </property>
4207
4208 <property>
4209 <name>hive.stats.autogather</name>
4210 <value>true</value>
4211 <description>A flag to gather statistics automatically during the
4212 INSERT OVERWRITE command.
4213 </description>
4214 </property>
4215
4216 <property>
4217 <name>hive.stats.jdbcdriver</name>
4218 <value>org.apache.derby.jdbc.EmbeddedDriver</value>
4219 <description>The JDBC driver for the database that stores temporary
4220 hive statistics.
4221 </description>
4222 </property>
4223
4224 <property>
4225 <name>hive.stats.dbconnectionstring</name>
4226 <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
4227 <description>The default connection string for the database that
4228 stores temporary hive statistics.
4229 </description>
4230 </property>
4231
4232 <property>
4233 <name>hive.stats.default.publisher</name>
4234 <value></value>
4235 <description>The Java class (implementing the StatsPublisher
4236 interface) that is used by default if hive.stats.dbclass is not JDBC
4237 or HBase.
4238 </description>
4239 </property>
4240
4241 <property>
4242 <name>hive.stats.default.aggregator</name>
4243 <value></value>
4244 <description>The Java class (implementing the StatsAggregator
4245 interface) that is used by default if hive.stats.dbclass is not JDBC
4246 or HBase.
4247 </description>
4248 </property>
4249
4250 <property>
4251 <name>hive.stats.jdbc.timeout</name>
4252 <value>30</value>
4253 <description>Timeout value (number of seconds) used by JDBC connection
4254 and statements.
4255 </description>
4256 </property>
4257
4258 <property>
4259 <name>hive.stats.retries.max</name>
4260 <value>0</value>
4261 <description>Maximum number of retries when stats publisher/aggregator
4262 got an exception updating intermediate database. Default is no tries
4263 on failures.
4264 </description>
4265 </property>
4266
4267 <property>
4268 <name>hive.stats.retries.wait</name>
4269 <value>3000</value>
4270 <description>The base waiting window (in milliseconds) before the next
4271 retry. The actual wait time is calculated by baseWindow * failues
4272 baseWindow * (failure 1) * (random number between [0.0,1.0]).
4273 </description>
4274 </property>
4275
4276 <property>
4277 <name>hive.stats.reliable</name>
4278 <value>false</value>
4279 <description>Whether queries will fail because stats cannot be
4280 collected completely accurately.
4281 If this is set to true,
4282 reading/writing from/into a partition may fail
4283 becuase the stats
4284 could
4285 not be computed accurately.
4286 </description>
4287 </property>
4288
4289 <property>
4290 <name>hive.stats.collect.tablekeys</name>
4291 <value>false</value>
4292 <description>Whether join and group by keys on tables are derived and
4293 maintained in the QueryPlan.
4294 This is useful to identify how tables are
4295 accessed and to determine if
4296 they should be bucketed.
4297 </description>
4298 </property>
4299
4300 <property>
4301 <name>hive.stats.collect.scancols</name>
4302 <value>false</value>
4303 <description>Whether column accesses are tracked in the QueryPlan.
4304 This is useful to identify how tables are accessed and to determine
4305 if there are wasted columns that can be trimmed.
4306 </description>
4307 </property>
4308
4309 <property>
4310 <name>hive.stats.ndv.error</name>
4311 <value>20.0</value>
4312 <description>Standard error expressed in percentage. Provides a
4313 tradeoff between accuracy and compute cost.A lower value for error
4314 indicates higher accuracy and a higher compute cost.
4315 </description>
4316 </property>
4317
4318 <property>
4319 <name>hive.stats.key.prefix.max.length</name>
4320 <value>200</value>
4321 <description>
4322 Determines if when the prefix of the key used for
4323 intermediate stats collection
4324 exceeds a certain length, a hash of the
4325 key is used instead. If the
4326 value &lt; 0 then hashing
4327 is never used, if
4328 the value >= 0 then hashing is used only when the key
4329 prefixes length
4330 exceeds that value. The key prefix is defined as everything preceding
4331 the
4332 task ID in the key.
4333 </description>
4334 </property>
4335
4336 <property>
4337 <name>hive.support.concurrency</name>
4338 <value>false</value>
4339 <description>Whether hive supports concurrency or not. A zookeeper
4340 instance must be up and running for the default hive lock manager to
4341 support read-write locks.
4342 </description>
4343 </property>
4344
4345 <property>
4346 <name>hive.lock.numretries</name>
4347 <value>100</value>
4348 <description>The number of times you want to try to get all the locks
4349 </description>
4350 </property>
4351
4352 <property>
4353 <name>hive.unlock.numretries</name>
4354 <value>10</value>
4355 <description>The number of times you want to retry to do one unlock
4356 </description>
4357 </property>
4358
4359 <property>
4360 <name>hive.lock.sleep.between.retries</name>
4361 <value>60</value>
4362 <description>The sleep time (in seconds) between various retries
4363 </description>
4364 </property>
4365
4366 <property>
4367 <name>hive.zookeeper.quorum</name>
4368 <value></value>
4369 <description>The list of zookeeper servers to talk to. This is only
4370 needed for read/write locks.
4371 </description>
4372 </property>
4373
4374 <property>
4375 <name>hive.zookeeper.client.port</name>
4376 <value>2181</value>
4377 <description>The port of zookeeper servers to talk to. This is only
4378 needed for read/write locks.
4379 </description>
4380 </property>
4381
4382 <property>
4383 <name>hive.zookeeper.session.timeout</name>
4384 <value>600000</value>
4385 <description>Zookeeper client's session timeout. The client is
4386 disconnected, and as a result, all locks released, if a heartbeat is
4387 not sent in the timeout.
4388 </description>
4389 </property>
4390
4391 <property>
4392 <name>hive.zookeeper.namespace</name>
4393 <value>hive_zookeeper_namespace</value>
4394 <description>The parent node under which all zookeeper nodes are
4395 created.
4396 </description>
4397 </property>
4398
4399 <property>
4400 <name>hive.zookeeper.clean.extra.nodes</name>
4401 <value>false</value>
4402 <description>Clean extra nodes at the end of the session.
4403 </description>
4404 </property>
4405
4406 <property>
4407 <name>fs.har.impl</name>
4408 <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
4409 <description>The implementation for accessing Hadoop Archives. Note
4410 that this won't be applicable to Hadoop vers less than 0.20
4411 </description>
4412 </property>
4413
4414 <property>
4415 <name>hive.archive.enabled</name>
4416 <value>false</value>
4417 <description>Whether archiving operations are permitted</description>
4418 </property>
4419
4420 <property>
4421 <name>hive.fetch.output.serde</name>
4422 <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
4423 <description>The serde used by FetchTask to serialize the fetch
4424 output.
4425 </description>
4426 </property>
4427
4428 <property>
4429 <name>hive.exec.mode.local.auto</name>
4430 <value>false</value>
4431 <description> Let hive determine whether to run in local mode
4432 automatically
4433 </description>
4434 </property>
4435
4436 <property>
4437 <name>hive.exec.drop.ignorenonexistent</name>
4438 <value>true</value>
4439 <description>
4440 Do not report an error if DROP TABLE/VIEW specifies a
4441 non-existent
4442 table/view
4443 </description>
4444 </property>
4445
4446 <property>
4447 <name>hive.exec.show.job.failure.debug.info</name>
4448 <value>true</value>
4449 <description>
4450 If a job fails, whether to provide a link in the CLI to
4451 the task with
4452 the
4453 most failures, along with debugging hints if
4454 applicable.
4455 </description>
4456 </property>
4457
4458 <property>
4459 <name>hive.auto.progress.timeout</name>
4460 <value>0</value>
4461 <description>
4462 How long to run autoprogressor for the script/UDTF
4463 operators (in
4464 seconds).
4465 Set to 0 for forever.
4466 </description>
4467 </property>
4468
4469 <!-- HBase Storage Handler Parameters -->
4470
4471 <property>
4472 <name>hive.hbase.wal.enabled</name>
4473 <value>true</value>
4474 <description>Whether writes to HBase should be forced to the
4475 write-ahead log. Disabling this improves HBase write performance at
4476 the risk of lost writes in case of a crash.
4477 </description>
4478 </property>
4479
4480 <property>
4481 <name>hive.table.parameters.default</name>
4482 <value></value>
4483 <description>Default property values for newly created tables
4484 </description>
4485 </property>
4486
4487 <property>
4488 <name>hive.entity.separator</name>
4489 <value>@</value>
4490 <description>Separator used to construct names of tables and
4491 partitions. For example, dbname@tablename@partitionname
4492 </description>
4493 </property>
4494
4495 <property>
4496 <name>hive.ddl.createtablelike.properties.whitelist</name>
4497 <value></value>
4498 <description>Table Properties to copy over when executing a Create
4499 Table Like.
4500 </description>
4501 </property>
4502
4503 <property>
4504 <name>hive.variable.substitute</name>
4505 <value>true</value>
4506 <description>This enables substitution using syntax like ${var}
4507 ${system:var} and ${env:var}.
4508 </description>
4509 </property>
4510
4511 <property>
4512 <name>hive.variable.substitute.depth</name>
4513 <value>40</value>
4514 <description>The maximum replacements the substitution engine will do.
4515 </description>
4516 </property>
4517
4518 <property>
4519 <name>hive.conf.validation</name>
4520 <value>true</value>
4521 <description>Eables type checking for registered hive configurations
4522 </description>
4523 </property>
4524
4525 <property>
4526 <name>hive.security.authorization.enabled</name>
4527 <value>false</value>
4528 <description>enable or disable the hive client authorization
4529 </description>
4530 </property>
4531
4532 <property>
4533 <name>hive.security.authorization.createtable.user.grants</name>
4534 <value></value>
4535 <description>the privileges automatically granted to some users
4536 whenever a table gets created.
4537 An example like
4538 "userX,userY:select;userZ:create" will grant select
4539 privilege to userX
4540 and userY,
4541 and grant create privilege to userZ whenever a new table
4542 created.
4543 </description>
4544 </property>
4545
4546 <property>
4547 <name>hive.security.authorization.createtable.group.grants</name>
4548 <value></value>
4549 <description>the privileges automatically granted to some groups
4550 whenever a table gets created.
4551 An example like
4552 "groupX,groupY:select;groupZ:create" will grant select
4553 privilege to
4554 groupX and groupY,
4555 and grant create privilege to groupZ whenever a new
4556 table created.
4557 </description>
4558 </property>
4559
4560 <property>
4561 <name>hive.security.authorization.createtable.role.grants</name>
4562 <value></value>
4563 <description>the privileges automatically granted to some roles
4564 whenever a table gets created.
4565 An example like
4566 "roleX,roleY:select;roleZ:create" will grant select
4567 privilege to roleX
4568 and roleY,
4569 and grant create privilege to roleZ whenever a new table
4570 created.
4571 </description>
4572 </property>
4573
4574 <property>
4575 <name>hive.security.authorization.createtable.owner.grants</name>
4576 <value></value>
4577 <description>the privileges automatically granted to the owner
4578 whenever a table gets created.
4579 An example like "select,drop" will
4580 grant select and drop privilege to
4581 the owner of the table
4582 </description>
4583 </property>
4584
4585 <property>
4586 <name>hive.metastore.authorization.storage.checks</name>
4587 <value>false</value>
4588 <description>Should the metastore do authorization checks against the
4589 underlying storage
4590 for operations like drop-partition (disallow the
4591 drop-partition if the
4592 user in
4593 question doesn't have permissions to
4594 delete the corresponding directory
4595 on the storage).
4596 </description>
4597 </property>
4598
4599 <property>
4600 <name>hive.error.on.empty.partition</name>
4601 <value>false</value>
4602 <description>Whether to throw an excpetion if dynamic partition insert
4603 generates empty results.
4604 </description>
4605 </property>
4606
4607 <property>
4608 <name>hive.index.compact.file.ignore.hdfs</name>
4609 <value>false</value>
4610 <description>True the hdfs location stored in the index file will be
4611 igbored at runtime.
4612 If the data got moved or the name of the cluster
4613 got changed, the index
4614 data should still be usable.
4615 </description>
4616 </property>
4617
4618 <property>
4619 <name>hive.optimize.index.filter.compact.minsize</name>
4620 <value>5368709120</value>
4621 <description>Minimum size (in bytes) of the inputs on which a compact
4622 index is automatically used.
4623 </description>
4624 </property>
4625
4626 <property>
4627 <name>hive.optimize.index.filter.compact.maxsize</name>
4628 <value>-1</value>
4629 <description>Maximum size (in bytes) of the inputs on which a compact
4630 index is automatically used.
4631 A negative number is equivalent to
4632 infinity.
4633 </description>
4634 </property>
4635
4636 <property>
4637 <name>hive.index.compact.query.max.size</name>
4638 <value>10737418240</value>
4639 <description>The maximum number of bytes that a query using the
4640 compact index can read. Negative value is equivalent to infinity.
4641 </description>
4642 </property>
4643
4644 <property>
4645 <name>hive.index.compact.query.max.entries</name>
4646 <value>10000000</value>
4647 <description>The maximum number of index entries to read during a
4648 query that uses the compact index. Negative value is equivalent to
4649 infinity.
4650 </description>
4651 </property>
4652
4653 <property>
4654 <name>hive.index.compact.binary.search</name>
4655 <value>true</value>
4656 <description>Whether or not to use a binary search to find the entries
4657 in an index table that match the filter, where possible
4658 </description>
4659 </property>
4660
4661 <property>
4662 <name>hive.exim.uri.scheme.whitelist</name>
4663 <value>hdfs,pfile</value>
4664 <description>A comma separated list of acceptable URI schemes for
4665 import and export.
4666 </description>
4667 </property>
4668
4669 <property>
4670 <name>hive.lock.mapred.only.operation</name>
4671 <value>false</value>
4672 <description>This param is to control whether or not only do lock on
4673 queries
4674 that need to execute at least one mapred job.
4675 </description>
4676 </property>
4677
4678 <property>
4679 <name>hive.limit.row.max.size</name>
4680 <value>100000</value>
4681 <description>When trying a smaller subset of data for simple LIMIT,
4682 how much size we need to guarantee
4683 each row to have at least.
4684 </description>
4685 </property>
4686
4687 <property>
4688 <name>hive.limit.optimize.limit.file</name>
4689 <value>10</value>
4690 <description>When trying a smaller subset of data for simple LIMIT,
4691 maximum number of files we can
4692 sample.
4693 </description>
4694 </property>
4695
4696 <property>
4697 <name>hive.limit.optimize.enable</name>
4698 <value>false</value>
4699 <description>Whether to enable to optimization to trying a smaller
4700 subset of data for simple LIMIT first.
4701 </description>
4702 </property>
4703
4704 <property>
4705 <name>hive.limit.optimize.fetch.max</name>
4706 <value>50000</value>
4707 <description>Maximum number of rows allowed for a smaller subset of
4708 data for simple LIMIT, if it is a fetch query.
4709 Insert queries are not
4710 restricted by this limit.
4711 </description>
4712 </property>
4713
4714 <property>
4715 <name>hive.rework.mapredwork</name>
4716 <value>false</value>
4717 <description>should rework the mapred work or not.
4718 This is first
4719 introduced by SymlinkTextInputFormat to replace symlink
4720 files with
4721 real paths at compile time.
4722 </description>
4723 </property>
4724
4725 <property>
4726 <name>hive.exec.concatenate.check.index</name>
4727 <value>true</value>
4728 <description>If this sets to true, hive will throw error when doing
4729 'alter table tbl_name [partSpec] concatenate' on a table/partition
4730 that has indexes on it. The reason the user want to set this to true
4731 is because it can help user to avoid handling all index drop,
4732 recreation,
4733 rebuild work. This is very helpful for tables with
4734 thousands of partitions.
4735 </description>
4736 </property>
4737
4738 <property>
4739 <name>hive.sample.seednumber</name>
4740 <value>0</value>
4741 <description>A number used to percentage sampling. By changing this
4742 number, user will change the subsets
4743 of data sampled.
4744 </description>
4745 </property>
4746
4747 <property>
4748 <name>hive.io.exception.handlers</name>
4749 <value></value>
4750 <description>A list of io exception handler class names. This is used
4751 to construct a list exception handlers to handle exceptions thrown
4752 by
4753 record readers
4754 </description>
4755 </property>
4756
4757 <property>
4758 <name>hive.autogen.columnalias.prefix.label</name>
4759 <value>_c</value>
4760 <description>String used as a prefix when auto generating column
4761 alias.
4762 By default the prefix label will be appended with a column
4763 position
4764 number to form the column alias. Auto generation would happen
4765 if an
4766 aggregate function is used in a select clause without an
4767 explicit
4768 alias.
4769 </description>
4770 </property>
4771
4772 <property>
4773 <name>hive.autogen.columnalias.prefix.includefuncname</name>
4774 <value>false</value>
4775 <description>Whether to include function name in the column alias auto
4776 generated by hive.
4777 </description>
4778 </property>
4779
4780 <property>
4781 <name>hive.exec.perf.logger</name>
4782 <value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
4783 <description>The class responsible logging client side performance
4784 metrics. Must be a subclass of
4785 org.apache.hadoop.hive.ql.log.PerfLogger
4786 </description>
4787 </property>
4788
4789 <property>
4790 <name>hive.start.cleanup.scratchdir</name>
4791 <value>false</value>
4792 <description>To cleanup the hive scratchdir while starting the hive
4793 server
4794 </description>
4795 </property>
4796
4797 <property>
4798 <name>hive.output.file.extension</name>
4799 <value></value>
4800 <description>String used as a file extension for output files. If not
4801 set, defaults to the codec extension for text files (e.g. ".gz"), or
4802 no extension otherwise.
4803 </description>
4804 </property>
4805
4806 <property>
4807 <name>hive.insert.into.multilevel.dirs</name>
4808 <value>false</value>
4809 <description>Where to insert into multilevel directories like
4810 "insert
4811 directory '/HIVEFT25686/chinna/' from table"
4812 </description>
4813 </property>
4814
4815 <property>
4816 <name>hive.warehouse.subdir.inherit.perms</name>
4817 <value>false</value>
4818 <description>Set this to true if the the table directories should
4819 inherit the
4820 permission of the warehouse or database directory instead
4821 of being created
4822 with the permissions derived from dfs umask
4823 </description>
4824 </property>
4825
4826 <property>
4827 <name>hive.exec.job.debug.capture.stacktraces</name>
4828 <value>true</value>
4829 <description>Whether or not stack traces parsed from the task logs of
4830 a sampled failed task for
4831 each failed job should be stored in the
4832 SessionState
4833 </description>
4834 </property>
4835
4836 <property>
4837 <name>hive.exec.driver.run.hooks</name>
4838 <value></value>
4839 <description>A comma separated list of hooks which implement
4840 HiveDriverRunHook and will be run at the
4841 beginning and end of
4842 Driver.run, these will be run in the order specified
4843 </description>
4844 </property>
4845
4846 <property>
4847 <name>hive.ddl.output.format</name>
4848 <value>text</value>
4849 <description>
4850 The data format to use for DDL output. One of "text" (for
4851 human
4852 readable text) or "json" (for a json object).
4853 </description>
4854 </property>
4855
4856 <property>
4857 <name>hive.transform.escape.input</name>
4858 <value>false</value>
4859 <description>
4860 This adds an option to escape special chars (newlines,
4861 carriage returns
4862 and
4863 tabs) when they are passed to the user script.
4864 This is useful if the hive
4865 tables
4866 can contain data that contains
4867 special characters.
4868 </description>
4869 </property>
4870
4871 <property>
4872 <name>hive.exec.rcfile.use.explicit.header</name>
4873 <value>true</value>
4874 <description>
4875 If this is set the header for RC Files will simply be
4876 RCF. If this is
4877 not
4878 set the header will be that borrowed from sequence
4879 files, e.g. SEQ-
4880 followed
4881 by the input and output RC File formats.
4882 </description>
4883 </property>
4884
4885 <property>
4886 <name>hive.multi.insert.move.tasks.share.dependencies</name>
4887 <value>false</value>
4888 <description>
4889 If this is set all move tasks for tables/partitions (not
4890 directories)
4891 at the end of a
4892 multi-insert query will only begin once
4893 the dependencies for all these move tasks
4894 have been
4895 met.
4896 Advantages: If
4897 concurrency is enabled, the locks will only be released once the
4898 query has
4899 finished, so with this config enabled, the time when the
4900 table/partition is
4901 generated will be much closer to when the lock on
4902 it is released.
4903 Disadvantages: If concurrency is not enabled, with
4904 this disabled, the
4905 tables/partitions which
4906 are produced by this query
4907 and finish earlier will be available for
4908 querying
4909 much earlier. Since
4910 the locks are only released once the query finishes,
4911 this
4912 does not
4913 apply if concurrency is enabled.
4914 </description>
4915 </property>
4916
4917 <property>
4918 <name>hive.fetch.task.conversion</name>
4919 <value>minimal</value>
4920 <description>
4921 Some select queries can be converted to single FETCH task
4922 minimizing
4923 latency.
4924 Currently the query should be single sourced not
4925 having any subquery and
4926 should not have
4927 any aggregations or distincts
4928 (which incurrs RS), lateral views and
4929 joins.
4930 1. minimal : SELECT STAR,
4931 FILTER on partition columns, LIMIT only
4932 2. more : SELECT, FILTER,
4933 LIMIT only (TABLESAMPLE, virtual columns)
4934 </description>
4935 </property>
4936
4937 <property>
4938 <name>hive.hmshandler.retry.attempts</name>
4939 <value>1</value>
4940 <description>The number of times to retry a HMSHandler call if there
4941 were a connection error
4942 </description>
4943 </property>
4944
4945 <property>
4946 <name>hive.hmshandler.retry.interval</name>
4947 <value>1000</value>
4948 <description>The number of miliseconds between HMSHandler retry
4949 attempts
4950 </description>
4951 </property>
4952
4953 <property>
4954 <name>hive.server.read.socket.timeout</name>
4955 <value>10</value>
4956 <description>Timeout for the HiveServer to close the connection if no
4957 response from the client in N seconds, defaults to 10 seconds.
4958 </description>
4959 </property>
4960
4961 <property>
4962 <name>hive.server.tcp.keepalive</name>
4963 <value>true</value>
4964 <description>Whether to enable TCP keepalive for the Hive server.
4965 Keepalive will prevent accumulation of half-open connections.
4966 </description>
4967 </property>
4968
4969 <property>
4970 <name>hive.decode.partition.name</name>
4971 <value>false</value>
4972 <description>Whether to show the unquoted partition names in query
4973 results.
4974 </description>
4975 </property>
4976
4977 <property>
4978 <name>hive.log4j.file</name>
4979 <value></value>
4980 <description>Hive log4j configuration file.
4981 If the property is not set,
4982 then logging will be initialized using
4983 hive-log4j.properties found on
4984 the classpath.
4985 If the property is set, the value must be a valid URI
4986 (java.net.URI,
4987 e.g. "file:///tmp/my-logging.properties"), which you
4988 can then extract
4989 a URL from and pass to
4990 PropertyConfigurator.configure(URL).
4991 </description>
4992 </property>
4993
4994 <property>
4995 <name>hive.exec.log4j.file</name>
4996 <value></value>
4997 <description>Hive log4j configuration file for execution mode(sub
4998 command).
4999 If the property is not set, then logging will be initialized
5000 using
5001 hive-exec-log4j.properties found on the classpath.
5002 If the
5003 property is set, the value must be a valid URI (java.net.URI,
5004 e.g.
5005 "file:///tmp/my-logging.properties"), which you can then extract
5006 a URL
5007 from and pass to PropertyConfigurator.configure(URL).
5008 </description>
5009 </property>
5010
5011 <property>
5012 <name>hive.exec.infer.bucket.sort</name>
5013 <value>false</value>
5014 <description>
5015 If this is set, when writing partitions, the metadata
5016 will include the
5017 bucketing/sorting
5018 properties with which the data was
5019 written if any (this will not overwrite the
5020 metadata
5021 inherited from the
5022 table if the table is bucketed/sorted)
5023 </description>
5024 </property>
5025
5026 <property>
5027 <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
5028 <value>false</value>
5029 <description>
5030 If this is set, when setting the number of reducers for
5031 the map reduce
5032 task which writes the
5033 final output files, it will choose
5034 a number which is a power of two,
5035 unless the user specifies
5036 the number
5037 of reducers to use using mapred.reduce.tasks. The number of
5038 reducers
5039 may be set to a power of two, only to be followed by a merge task
5040 meaning preventing
5041 anything from being inferred.
5042 With
5043 hive.exec.infer.bucket.sort set to true:
5044 Advantages: If this is not
5045 set, the number of buckets for partitions will seem
5046 arbitrary,
5047 which
5048 means that the number of mappers used for optimized joins, for
5049 example, will
5050 be very low. With this set, since the number of buckets
5051 used for any
5052 partition is
5053 a power of two, the number of mappers used
5054 for optimized joins will be
5055 the least
5056 number of buckets used by any
5057 partition being joined.
5058 Disadvantages: This may mean a much larger or
5059 much smaller number of reducers being
5060 used in the
5061 final map reduce job,
5062 e.g. if a job was originally going to take 257
5063 reducers,
5064 it will now
5065 take 512 reducers, similarly if the max number of reducers
5066 is 511,
5067 and
5068 a job was going to use this many, it will now use 256 reducers.
5069
5070 </description>
5071 </property>
5072
5073 <property>
5074 <name>hive.groupby.orderby.position.alias</name>
5075 <value>false</value>
5076 <description>Whether to enable using Column Position Alias in Group By
5077 or Order By
5078 </description>
5079 </property>
5080
5081 <property>
5082 <name>hive.server2.thrift.min.worker.threads</name>
5083 <value>5</value>
5084 <description>Minimum number of Thrift worker threads</description>
5085 </property>
5086
5087 <property>
5088 <name>hive.server2.thrift.max.worker.threads</name>
5089 <value>100</value>
5090 <description>Maximum number of Thrift worker threads</description>
5091 </property>
5092
5093 <property>
5094 <name>hive.server2.thrift.port</name>
5095 <value>10000</value>
5096 <description>Port number of HiveServer2 Thrift interface.
5097 Can be
5098 overridden by setting $HIVE_SERVER2_THRIFT_PORT
5099 </description>
5100 </property>
5101
5102 <property>
5103 <name>hive.server2.thrift.bind.host</name>
5104 <value>localhost</value>
5105 <description>Bind host on which to run the HiveServer2 Thrift
5106 interface.
5107 Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST
5108 </description>
5109 </property>
5110
5111 <property>
5112 <name>hive.server2.authentication</name>
5113 <value>NONE</value>
5114 <description>
5115 Client authentication types.
5116 NONE: no authentication check
5117 LDAP: LDAP/AD based authentication
5118 KERBEROS: Kerberos/GSSAPI
5119 authentication
5120 CUSTOM: Custom authentication provider
5121 (Use with
5122 property hive.server2.custom.authentication.class)
5123 </description>
5124 </property>
5125
5126 <property>
5127 <name>hive.server2.custom.authentication.class</name>
5128 <value></value>
5129 <description>
5130 Custom authentication class. Used when property
5131 'hive.server2.authentication' is set to 'CUSTOM'. Provided class
5132 must
5133 be a proper implementation of the interface
5134 org.apache.hive.service.auth.PasswdAuthenticationProvider.
5135 HiveServer2
5136 will call its Authenticate(user, passed) method to
5137 authenticate requests.
5138 The implementation may optionally extend the
5139 Hadoop's
5140 org.apache.hadoop.conf.Configured class to grab Hive's
5141 Configuration
5142 object.
5143 </description>
5144 </property>
5145
5146 <property>
5147 <name>>hive.server2.authentication.kerberos.principal</name>
5148 <value></value>
5149 <description>
5150 Kerberos server principal
5151 </description>
5152 </property>
5153
5154 <property>
5155 <name>>hive.server2.authentication.kerberos.keytab</name>
5156 <value></value>
5157 <description>
5158 Kerberos keytab file for server principal
5159 </description>
5160 </property>
5161
5162 <property>
5163 <name>hive.server2.authentication.ldap.url</name>
5164 <value></value>
5165 <description>
5166 LDAP connection URL
5167 </description>
5168 </property>
5169
5170 <property>
5171 <name>hive.server2.authentication.ldap.baseDN</name>
5172 <value></value>
5173 <description>
5174 LDAP base DN
5175 </description>
5176 </property>
5177
5178 <property>
5179 <name>hive.server2.enable.doAs</name>
5180 <value>true</value>
5181 <description>
5182 Setting this property to true will have hive server2
5183 execute
5184 hive operations as the user making the calls to it.
5185 </description>
5186 </property>
5187
5188
5189</configuration>