blob: 23a842a99b85495f498921c772cb38a54c61268a [file] [log] [blame]
buyingyi657ce582013-03-11 06:49:18 +00001<?xml version="1.0"?>
Till Westmann276bbc22013-06-05 18:56:27 -07002<!--
3 ! Copyright 2009-2013 by The Regents of the University of California
4 ! Licensed under the Apache License, Version 2.0 (the "License");
5 ! you may not use this file except in compliance with the License.
6 ! you may obtain a copy of the License from
7 !
8 ! http://www.apache.org/licenses/LICENSE-2.0
9 !
10 ! Unless required by applicable law or agreed to in writing, software
11 ! distributed under the License is distributed on an "AS IS" BASIS,
12 ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ! See the License for the specific language governing permissions and
14 ! limitations under the License.
15 !-->
buyingyi657ce582013-03-11 06:49:18 +000016<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
17
18<configuration>
19
20 <!-- Hive Configuration can either be stored in this file or in the hadoop
21 configuration files -->
22 <!-- that are implied by Hadoop setup variables. -->
23 <!-- Aside from Hadoop setup variables - this file is provided as a convenience
24 so that Hive -->
25 <!-- users do not have to edit hadoop configuration files (that may be managed
26 as a centralized -->
27 <!-- resource). -->
28
29 <!-- Hive Execution Parameters -->
30 <property>
31 <name>mapred.reduce.tasks</name>
32 <value>-1</value>
33 <description>The default number of reduce tasks per job. Typically set
34 to a prime close to the number of available hosts. Ignored when
35 mapred.job.tracker is "local". Hadoop set this to 1 by default,
36 whereas hive uses -1 as its default value.
37 By setting this property to -1, Hive will automatically figure out what
38 should be the number of reducers.
buyingyi23e2e752013-06-28 21:52:07 -070039 </description>
40 </property>
buyingyi657ce582013-03-11 06:49:18 +000041
42 <property>
43 <name>hive.hyracks.connectorpolicy</name>
44 <value>PIPELINING</value>
45 </property>
46
47 <property>
48 <name>hive.hyracks.parrallelism</name>
49 <value>4</value>
50 </property>
51
52 <property>
53 <name>hive.algebricks.groupby.external</name>
54 <value>true</value>
55 </property>
56
57 <property>
58 <name>hive.algebricks.groupby.external.memory</name>
59 <value>33554432</value>
60 </property>
61
62 <property>
63 <name>hive.algebricks.sort.memory</name>
64 <value>33554432</value>
65 </property>
66
67 <property>
68 <name>hive.exec.reducers.bytes.per.reducer</name>
69 <value>1000000000</value>
70 <description>size per reducer.The default is 1G, i.e if the input size
71 is 10G, it will use 10 reducers.</description>
72 </property>
73
74 <property>
75 <name>hive.exec.reducers.max</name>
76 <value>999</value>
77 <description>max number of reducers will be used. If the one
78 specified in the configuration parameter mapred.reduce.tasks is
79 negative, hive will use this one as the max number of reducers when
80 automatically determine number of reducers.</description>
81 </property>
82
83 <property>
84 <name>hive.exec.scratchdir</name>
85 <value>/hive-${user.name}</value>
86 <description>Scratch space for Hive jobs</description>
87 </property>
88
89 <property>
90 <name>hive.test.mode</name>
91 <value>false</value>
92 <description>whether hive is running in test mode. If yes, it turns on
93 sampling and prefixes the output tablename</description>
94 </property>
95
96 <property>
97 <name>hive.test.mode.prefix</name>
98 <value>test_</value>
99 <description>if hive is running in test mode, prefixes the output
100 table by this string</description>
101 </property>
102
103 <!-- If the input table is not bucketed, the denominator of the tablesample
104 is determinied by the parameter below -->
105 <!-- For example, the following query: -->
106 <!-- INSERT OVERWRITE TABLE dest -->
107 <!-- SELECT col1 from src -->
108 <!-- would be converted to -->
109 <!-- INSERT OVERWRITE TABLE test_dest -->
110 <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
111 <property>
112 <name>hive.test.mode.samplefreq</name>
113 <value>32</value>
114 <description>if hive is running in test mode and table is not
115 bucketed, sampling frequency</description>
116 </property>
117
118 <property>
119 <name>hive.test.mode.nosamplelist</name>
120 <value></value>
121 <description>if hive is running in test mode, dont sample the above
122 comma seperated list of tables</description>
123 </property>
124
125 <property>
126 <name>hive.metastore.local</name>
127 <value>true</value>
128 <description>controls whether to connect to remove metastore server or
129 open a new metastore server in Hive Client JVM</description>
130 </property>
131
132 <property>
133 <name>javax.jdo.option.ConnectionURL</name>
134 <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
135 <description>JDBC connect string for a JDBC metastore</description>
136 </property>
137
138 <property>
139 <name>javax.jdo.option.ConnectionDriverName</name>
140 <value>org.apache.derby.jdbc.EmbeddedDriver</value>
141 <description>Driver class name for a JDBC metastore</description>
142 </property>
143
144 <property>
145 <name>javax.jdo.PersistenceManagerFactoryClass</name>
146 <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
147 <description>class implementing the jdo persistence</description>
148 </property>
149
150 <property>
151 <name>datanucleus.connectionPoolingType</name>
152 <value>DBCP</value>
153 <description>Uses a DBCP connection pool for JDBC metastore
154 </description>
155 </property>
156
157 <property>
158 <name>javax.jdo.option.DetachAllOnCommit</name>
159 <value>true</value>
160 <description>detaches all objects from session so that they can be
161 used after transaction is committed</description>
162 </property>
163
164 <property>
165 <name>javax.jdo.option.NonTransactionalRead</name>
166 <value>true</value>
167 <description>reads outside of transactions</description>
168 </property>
169
170 <property>
171 <name>javax.jdo.option.ConnectionUserName</name>
172 <value>APP</value>
173 <description>username to use against metastore database</description>
174 </property>
175
176 <property>
177 <name>javax.jdo.option.ConnectionPassword</name>
178 <value>mine</value>
179 <description>password to use against metastore database</description>
180 </property>
181
182 <property>
183 <name>datanucleus.validateTables</name>
184 <value>false</value>
185 <description>validates existing schema against code. turn this on if
186 you want to verify existing schema </description>
187 </property>
188
189 <property>
190 <name>datanucleus.validateColumns</name>
191 <value>false</value>
192 <description>validates existing schema against code. turn this on if
193 you want to verify existing schema </description>
194 </property>
195
196 <property>
197 <name>datanucleus.validateConstraints</name>
198 <value>false</value>
199 <description>validates existing schema against code. turn this on if
200 you want to verify existing schema </description>
201 </property>
202
203 <property>
204 <name>datanucleus.storeManagerType</name>
205 <value>rdbms</value>
206 <description>metadata store type</description>
207 </property>
208
209 <property>
210 <name>datanucleus.autoCreateSchema</name>
211 <value>true</value>
212 <description>creates necessary schema on a startup if one doesn't
213 exist. set this to false, after creating it once</description>
214 </property>
215
216 <property>
217 <name>datanucleus.autoStartMechanismMode</name>
218 <value>checked</value>
219 <description>throw exception if metadata tables are incorrect
220 </description>
221 </property>
222
223 <property>
224 <name>datanucleus.transactionIsolation</name>
225 <value>read-committed</value>
226 <description>Default transaction isolation level for identity
227 generation. </description>
228 </property>
229
230 <property>
231 <name>datanucleus.cache.level2</name>
232 <value>false</value>
233 <description>Use a level 2 cache. Turn this off if metadata is changed
234 independently of hive metastore server</description>
235 </property>
236
237 <property>
238 <name>datanucleus.cache.level2.type</name>
239 <value>SOFT</value>
240 <description>SOFT=soft reference based cache, WEAK=weak reference
241 based cache.</description>
242 </property>
243
244 <property>
245 <name>datanucleus.identifierFactory</name>
246 <value>datanucleus</value>
247 <description>Name of the identifier factory to use when generating
248 table/column names etc. 'datanucleus' is used for backward
249 compatibility</description>
250 </property>
251
252 <property>
253 <name>hive.metastore.warehouse.dir</name>
254 <value>/user/hivesterix</value>
255 <description>location of default database for the warehouse
256 </description>
257 </property>
258
259 <property>
260 <name>hive.metastore.connect.retries</name>
261 <value>5</value>
262 <description>Number of retries while opening a connection to metastore
263 </description>
264 </property>
265
266 <property>
267 <name>hive.metastore.rawstore.impl</name>
268 <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
269 <description>Name of the class that implements
270 org.apache.hadoop.hive.metastore.rawstore interface. This class is
271 used to store and retrieval of raw metadata objects such as table,
272 database</description>
273 </property>
274
275 <property>
276 <name>hive.default.fileformat</name>
277 <value>TextFile</value>
278 <description>Default file format for CREATE TABLE statement. Options
279 are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
280 ... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
281 </property>
282
283 <property>
284 <name>hive.fileformat.check</name>
285 <value>true</value>
286 <description>Whether to check file format or not when loading data
287 files</description>
288 </property>
289
290 <property>
291 <name>hive.map.aggr</name>
292 <value>true</value>
293 <description>Whether to use map-side aggregation in Hive Group By
294 queries</description>
295 </property>
296
297 <property>
298 <name>hive.groupby.skewindata</name>
299 <value>false</value>
300 <description>Whether there is skew in data to optimize group by
301 queries</description>
302 </property>
303
304 <property>
305 <name>hive.groupby.mapaggr.checkinterval</name>
306 <value>100000</value>
307 <description>Number of rows after which size of the grouping
308 keys/aggregation classes is performed</description>
309 </property>
310
311 <property>
312 <name>hive.mapred.local.mem</name>
313 <value>0</value>
314 <description>For local mode, memory of the mappers/reducers
315 </description>
316 </property>
317
318 <property>
319 <name>hive.map.aggr.hash.percentmemory</name>
320 <value>0.5</value>
321 <description>Portion of total memory to be used by map-side grup
322 aggregation hash table</description>
323 </property>
324
325 <property>
326 <name>hive.map.aggr.hash.min.reduction</name>
327 <value>0.5</value>
328 <description>Hash aggregation will be turned off if the ratio between
329 hash
330 table size and input rows is bigger than this number. Set to 1 to make
331 sure
332 hash aggregation is never turned off.</description>
333 </property>
334
335 <property>
336 <name>hive.optimize.cp</name>
337 <value>true</value>
338 <description>Whether to enable column pruner</description>
339 </property>
340
341 <property>
342 <name>hive.optimize.ppd</name>
343 <value>true</value>
344 <description>Whether to enable predicate pushdown</description>
345 </property>
346
347 <property>
348 <name>hive.optimize.pruner</name>
349 <value>true</value>
350 <description>Whether to enable the new partition pruner which depends
351 on predicate pushdown. If this is disabled,
352 the old partition pruner which is based on AST will be enabled.
353 </description>
354 </property>
355
356 <property>
357 <name>hive.optimize.groupby</name>
358 <value>true</value>
359 <description>Whether to enable the bucketed group by from bucketed
360 partitions/tables.</description>
361 </property>
362
363 <property>
364 <name>hive.join.emit.interval</name>
365 <value>1000</value>
366 <description>How many rows in the right-most join operand Hive should
367 buffer before emitting the join result. </description>
368 </property>
369
370 <property>
371 <name>hive.join.cache.size</name>
372 <value>25000</value>
373 <description>How many rows in the joining tables (except the streaming
374 table) should be cached in memory. </description>
375 </property>
376
377 <property>
378 <name>hive.mapjoin.bucket.cache.size</name>
379 <value>100</value>
380 <description>How many values in each keys in the map-joined table
381 should be cached in memory. </description>
382 </property>
383
384 <property>
385 <name>hive.mapjoin.maxsize</name>
386 <value>100000</value>
387 <description>Maximum # of rows of the small table that can be handled
388 by map-side join. If the size is reached and hive.task.progress is
389 set, a fatal error counter is set and the job will be killed.
390 </description>
391 </property>
392
393 <property>
394 <name>hive.mapjoin.cache.numrows</name>
395 <value>25000</value>
396 <description>How many rows should be cached by jdbm for map join.
397 </description>
398 </property>
399
400 <property>
401 <name>hive.optimize.skewjoin</name>
402 <value>false</value>
403 <description>Whether to enable skew join optimization. </description>
404 </property>
405
406 <property>
407 <name>hive.skewjoin.key</name>
408 <value>100000</value>
409 <description>Determine if we get a skew key in join. If we see more
410 than the specified number of rows with the same key in join operator,
411 we think the key as a skew join key. </description>
412 </property>
413
414 <property>
415 <name>hive.skewjoin.mapjoin.map.tasks</name>
416 <value>10000</value>
417 <description> Determine the number of map task used in the follow up
418 map join job
419 for a skew join. It should be used together with
420 hive.skewjoin.mapjoin.min.split
421 to perform a fine grained control.</description>
422 </property>
423
424 <property>
425 <name>hive.skewjoin.mapjoin.min.split</name>
426 <value>33554432</value>
427 <description> Determine the number of map task at most used in the
428 follow up map join job
429 for a skew join by specifying the minimum split size. It should be used
430 together with
431 hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.</description>
432 </property>
433
434 <property>
435 <name>hive.mapred.mode</name>
436 <value>nonstrict</value>
437 <description>The mode in which the hive operations are being
438 performed. In strict mode, some risky queries are not allowed to run
439 </description>
440 </property>
441
442 <property>
443 <name>hive.exec.script.maxerrsize</name>
444 <value>100000</value>
445 <description>Maximum number of bytes a script is allowed to emit to
446 standard error (per map-reduce task). This prevents runaway scripts
447 from filling logs partitions to capacity </description>
448 </property>
449
450 <property>
451 <name>hive.exec.script.allow.partial.consumption</name>
452 <value>false</value>
453 <description> When enabled, this option allows a user script to exit
454 successfully without consuming all the data from the standard input.
455 </description>
456 </property>
457
458 <property>
459 <name>hive.script.operator.id.env.var</name>
460 <value>HIVE_SCRIPT_OPERATOR_ID</value>
461 <description> Name of the environment variable that holds the unique
462 script operator ID in the user's transform function (the custom
463 mapper/reducer that the user has specified in the query)
464 </description>
465 </property>
466
467 <property>
468 <name>hive.exec.compress.output</name>
469 <value>false</value>
470 <description> This controls whether the final outputs of a query (to a
471 local/hdfs file or a hive table) is compressed. The compression codec
472 and other options are determined from hadoop config variables
473 mapred.output.compress* </description>
474 </property>
475
476 <property>
477 <name>hive.exec.compress.intermediate</name>
478 <value>false</value>
479 <description> This controls whether intermediate files produced by
480 hive between multiple map-reduce jobs are compressed. The compression
481 codec and other options are determined from hadoop config variables
482 mapred.output.compress* </description>
483 </property>
484
485 <property>
486 <name>hive.exec.parallel</name>
487 <value>false</value>
488 <description>Whether to execute jobs in parallel</description>
489 </property>
490
491 <property>
492 <name>hive.exec.parallel.thread.number</name>
493 <value>8</value>
494 <description>How many jobs at most can be executed in parallel
495 </description>
496 </property>
497
498 <property>
499 <name>hive.hwi.war.file</name>
500 <value>lib\hive-hwi-0.7.0.war</value>
501 <description>This sets the path to the HWI war file, relative to
502 ${HIVE_HOME}. </description>
503 </property>
504
505 <property>
506 <name>hive.hwi.listen.host</name>
507 <value>0.0.0.0</value>
508 <description>This is the host address the Hive Web Interface will
509 listen on</description>
510 </property>
511
512 <property>
513 <name>hive.hwi.listen.port</name>
514 <value>9999</value>
515 <description>This is the port the Hive Web Interface will listen on
516 </description>
517 </property>
518
519 <property>
520 <name>hive.exec.pre.hooks</name>
521 <value></value>
522 <description>Pre Execute Hook for Tests</description>
523 </property>
524
525 <property>
526 <name>hive.merge.mapfiles</name>
527 <value>true</value>
528 <description>Merge small files at the end of a map-only job
529 </description>
530 </property>
531
532 <property>
533 <name>hive.merge.mapredfiles</name>
534 <value>false</value>
535 <description>Merge small files at the end of a map-reduce job
536 </description>
537 </property>
538
539 <property>
540 <name>hive.heartbeat.interval</name>
541 <value>1000</value>
542 <description>Send a heartbeat after this interval - used by mapjoin
543 and filter operators</description>
544 </property>
545
546 <property>
547 <name>hive.merge.size.per.task</name>
548 <value>256000000</value>
549 <description>Size of merged files at the end of the job</description>
550 </property>
551
552 <property>
553 <name>hive.merge.size.smallfiles.avgsize</name>
554 <value>16000000</value>
555 <description>When the average output file size of a job is less than
556 this number, Hive will start an additional map-reduce job to merge
557 the output files into bigger files. This is only done for map-only
558 jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
559 hive.merge.mapredfiles is true.</description>
560 </property>
561
562 <property>
563 <name>hive.script.auto.progress</name>
564 <value>false</value>
565 <description>Whether Hive Tranform/Map/Reduce Clause should
566 automatically send progress information to TaskTracker to avoid the
567 task getting killed because of inactivity. Hive sends progress
568 information when the script is outputting to stderr. This option
569 removes the need of periodically producing stderr messages, but users
570 should be cautious because this may prevent infinite loops in the
571 scripts to be killed by TaskTracker. </description>
572 </property>
573
574 <property>
575 <name>hive.script.serde</name>
576 <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
577 <description>The default serde for trasmitting input data to and
578 reading output data from the user scripts. </description>
579 </property>
580
581 <property>
582 <name>hive.script.recordreader</name>
583 <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
584 <description>The default record reader for reading data from the user
585 scripts. </description>
586 </property>
587
588 <property>
589 <name>hive.script.recordwriter</name>
590 <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
591 <description>The default record writer for writing data to the user
592 scripts. </description>
593 </property>
594
595 <property>
596 <name>hive.input.format</name>
597 <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
598 <description>The default input format, if it is not specified, the
599 system assigns it. It is set to HiveInputFormat for hadoop versions
600 17, 18 and 19, whereas it is set to CombinedHiveInputFormat for
601 hadoop 20. The user can always overwrite it - if there is a bug in
602 CombinedHiveInputFormat, it can always be manually set to
603 HiveInputFormat. </description>
604 </property>
605
606 <property>
607 <name>hive.udtf.auto.progress</name>
608 <value>false</value>
609 <description>Whether Hive should automatically send progress
610 information to TaskTracker when using UDTF's to prevent the task
611 getting killed because of inactivity. Users should be cautious
612 because this may prevent TaskTracker from killing tasks with infinte
613 loops. </description>
614 </property>
615
616 <property>
617 <name>hive.mapred.reduce.tasks.speculative.execution</name>
618 <value>true</value>
619 <description>Whether speculative execution for reducers should be
620 turned on. </description>
621 </property>
622
623 <property>
624 <name>hive.exec.counters.pull.interval</name>
625 <value>1000</value>
626 <description>The interval with which to poll the JobTracker for the
627 counters the running job. The smaller it is the more load there will
628 be on the jobtracker, the higher it is the less granular the caught
629 will be.</description>
630 </property>
631
632 <property>
633 <name>hive.enforce.bucketing</name>
634 <value>false</value>
635 <description>Whether bucketing is enforced. If true, while inserting
636 into the table, bucketing is enforced. </description>
637 </property>
638
639 <property>
640 <name>hive.enforce.sorting</name>
641 <value>false</value>
642 <description>Whether sorting is enforced. If true, while inserting
643 into the table, sorting is enforced. </description>
644 </property>
645
646 <property>
647 <name>hive.metastore.ds.connection.url.hook</name>
648 <value></value>
649 <description>Name of the hook to use for retriving the JDO connection
650 URL. If empty, the value in javax.jdo.option.ConnectionURL is used
651 </description>
652 </property>
653
654 <property>
655 <name>hive.metastore.ds.retry.attempts</name>
656 <value>1</value>
657 <description>The number of times to retry a metastore call if there
658 were a connection error</description>
659 </property>
660
661 <property>
662 <name>hive.metastore.ds.retry.interval</name>
663 <value>1000</value>
664 <description>The number of miliseconds between metastore retry
665 attempts</description>
666 </property>
667
668 <property>
669 <name>hive.metastore.server.min.threads</name>
670 <value>200</value>
671 <description>Minimum number of worker threads in the Thrift server's
672 pool.</description>
673 </property>
674
675 <property>
676 <name>hive.metastore.server.max.threads</name>
677 <value>100000</value>
678 <description>Maximum number of worker threads in the Thrift server's
679 pool.</description>
680 </property>
681
682 <property>
683 <name>hive.metastore.server.tcp.keepalive</name>
684 <value>true</value>
685 <description>Whether to enable TCP keepalive for the metastore server.
686 Keepalive will prevent accumulation of half-open connections.
687 </description>
688 </property>
689
690 <property>
691 <name>hive.optimize.reducededuplication</name>
692 <value>true</value>
693 <description>Remove extra map-reduce jobs if the data is already
694 clustered by the same key which needs to be used again. This should
695 always be set to true. Since it is a new feature, it has been made
696 configurable.</description>
697 </property>
698
699 <property>
700 <name>hive.exec.dynamic.partition</name>
701 <value>false</value>
702 <description>Whether or not to allow dynamic partitions in DML/DDL.
703 </description>
704 </property>
705
706 <property>
707 <name>hive.exec.dynamic.partition.mode</name>
708 <value>strict</value>
709 <description>In strict mode, the user must specify at least one static
710 partition in case the user accidentally overwrites all partitions.
711 </description>
712 </property>
713
714 <property>
715 <name>hive.exec.max.dynamic.partitions</name>
716 <value>1000</value>
717 <description>Maximum number of dynamic partitions allowed to be
718 created in total.</description>
719 </property>
720
721 <property>
722 <name>hive.exec.max.dynamic.partitions.pernode</name>
723 <value>100</value>
724 <description>Maximum number of dynamic partitions allowed to be
725 created in each mapper/reducer node.</description>
726 </property>
727
728 <property>
729 <name>hive.default.partition.name</name>
730 <value>__HIVE_DEFAULT_PARTITION__</value>
731 <description>The default partition name in case the dynamic partition
732 column value is null/empty string or anyother values that cannot be
733 escaped. This value must not contain any special character used in
734 HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
735 dynamic partition value should not contain this value to avoid
736 confusions.</description>
737 </property>
738
739 <property>
740 <name>fs.har.impl</name>
741 <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
742 <description>The implementation for accessing Hadoop Archives. Note
743 that this won't be applicable to Hadoop vers less than 0.20
744 </description>
745 </property>
746
747 <property>
748 <name>hive.archive.enabled</name>
749 <value>false</value>
750 <description>Whether archiving operations are permitted</description>
751 </property>
752
753 <property>
754 <name>hive.archive.har.parentdir.settable</name>
755 <value>false</value>
756 <description>In new Hadoop versions, the parent directory must be set
757 while
758 creating a HAR. Because this functionality is hard to detect with just
759 version
760 numbers, this conf var needs to be set manually.</description>
761 </property>
762
763 <!-- HBase Storage Handler Parameters -->
764
765 <property>
766 <name>hive.hbase.wal.enabled</name>
767 <value>true</value>
768 <description>Whether writes to HBase should be forced to the
769 write-ahead log. Disabling this improves HBase write performance at
770 the risk of lost writes in case of a crash.</description>
771 </property>
772
773</configuration>