blob: 587eede6745b6545783fe4e2d996e5167c0a32a2 [file] [log] [blame]
buyingyic73348c2012-11-02 00:31:31 +00001<?xml version="1.0"?>
2<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
4<configuration>
5
6 <!-- Hive Configuration can either be stored in this file or in the hadoop
7 configuration files -->
8 <!-- that are implied by Hadoop setup variables. -->
9 <!-- Aside from Hadoop setup variables - this file is provided as a convenience
10 so that Hive -->
11 <!-- users do not have to edit hadoop configuration files (that may be managed
12 as a centralized -->
13 <!-- resource). -->
14
15 <!-- Hive Execution Parameters -->
16 <property>
17 <name>mapred.reduce.tasks</name>
18 <value>-1</value>
19 <description>The default number of reduce tasks per job. Typically set
20 to a prime close to the number of available hosts. Ignored when
21 mapred.job.tracker is "local". Hadoop set this to 1 by default,
22 whereas hive uses -1 as its default value.
23 By setting this property to -1, Hive will automatically figure out what
24 should be the number of reducers.
25 </description>
buyingyic73348c2012-11-02 00:31:31 +000026
buyingyi657ce582013-03-11 06:49:18 +000027 <property>
28 <name>hive.hyracks.connectorpolicy</name>
29 <value>PIPELINING</value>
30 </property>
buyingyic73348c2012-11-02 00:31:31 +000031
32 <property>
33 <name>hive.hyracks.parrallelism</name>
34 <value>4</value>
35 </property>
36
37 <property>
38 <name>hive.algebricks.groupby.external</name>
39 <value>true</value>
40 </property>
41
42 <property>
43 <name>hive.algebricks.groupby.external.memory</name>
buyingyi657ce582013-03-11 06:49:18 +000044 <value>33554432</value>
buyingyic73348c2012-11-02 00:31:31 +000045 </property>
46
47 <property>
48 <name>hive.algebricks.sort.memory</name>
buyingyi657ce582013-03-11 06:49:18 +000049 <value>33554432</value>
buyingyic73348c2012-11-02 00:31:31 +000050 </property>
51
52 <property>
53 <name>hive.exec.reducers.bytes.per.reducer</name>
54 <value>1000000000</value>
55 <description>size per reducer.The default is 1G, i.e if the input size
56 is 10G, it will use 10 reducers.</description>
57 </property>
58
59 <property>
60 <name>hive.exec.reducers.max</name>
61 <value>999</value>
62 <description>max number of reducers will be used. If the one
63 specified in the configuration parameter mapred.reduce.tasks is
64 negative, hive will use this one as the max number of reducers when
65 automatically determine number of reducers.</description>
66 </property>
67
68 <property>
69 <name>hive.exec.scratchdir</name>
70 <value>/hive-${user.name}</value>
71 <description>Scratch space for Hive jobs</description>
72 </property>
73
74 <property>
75 <name>hive.test.mode</name>
76 <value>false</value>
77 <description>whether hive is running in test mode. If yes, it turns on
78 sampling and prefixes the output tablename</description>
79 </property>
80
81 <property>
82 <name>hive.test.mode.prefix</name>
83 <value>test_</value>
84 <description>if hive is running in test mode, prefixes the output
85 table by this string</description>
86 </property>
87
88 <!-- If the input table is not bucketed, the denominator of the tablesample
89 is determinied by the parameter below -->
90 <!-- For example, the following query: -->
91 <!-- INSERT OVERWRITE TABLE dest -->
92 <!-- SELECT col1 from src -->
93 <!-- would be converted to -->
94 <!-- INSERT OVERWRITE TABLE test_dest -->
95 <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
96 <property>
97 <name>hive.test.mode.samplefreq</name>
98 <value>32</value>
99 <description>if hive is running in test mode and table is not
100 bucketed, sampling frequency</description>
101 </property>
102
103 <property>
104 <name>hive.test.mode.nosamplelist</name>
105 <value></value>
106 <description>if hive is running in test mode, dont sample the above
107 comma seperated list of tables</description>
108 </property>
109
110 <property>
111 <name>hive.metastore.local</name>
112 <value>true</value>
113 <description>controls whether to connect to remove metastore server or
114 open a new metastore server in Hive Client JVM</description>
115 </property>
116
117 <property>
118 <name>javax.jdo.option.ConnectionURL</name>
119 <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
120 <description>JDBC connect string for a JDBC metastore</description>
121 </property>
122
123 <property>
124 <name>javax.jdo.option.ConnectionDriverName</name>
125 <value>org.apache.derby.jdbc.EmbeddedDriver</value>
126 <description>Driver class name for a JDBC metastore</description>
127 </property>
128
129 <property>
130 <name>javax.jdo.PersistenceManagerFactoryClass</name>
131 <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
132 <description>class implementing the jdo persistence</description>
133 </property>
134
135 <property>
136 <name>datanucleus.connectionPoolingType</name>
137 <value>DBCP</value>
138 <description>Uses a DBCP connection pool for JDBC metastore
139 </description>
140 </property>
141
142 <property>
143 <name>javax.jdo.option.DetachAllOnCommit</name>
144 <value>true</value>
145 <description>detaches all objects from session so that they can be
146 used after transaction is committed</description>
147 </property>
148
149 <property>
150 <name>javax.jdo.option.NonTransactionalRead</name>
151 <value>true</value>
152 <description>reads outside of transactions</description>
153 </property>
154
155 <property>
156 <name>javax.jdo.option.ConnectionUserName</name>
157 <value>APP</value>
158 <description>username to use against metastore database</description>
159 </property>
160
161 <property>
162 <name>javax.jdo.option.ConnectionPassword</name>
163 <value>mine</value>
164 <description>password to use against metastore database</description>
165 </property>
166
167 <property>
168 <name>datanucleus.validateTables</name>
169 <value>false</value>
170 <description>validates existing schema against code. turn this on if
171 you want to verify existing schema </description>
172 </property>
173
174 <property>
175 <name>datanucleus.validateColumns</name>
176 <value>false</value>
177 <description>validates existing schema against code. turn this on if
178 you want to verify existing schema </description>
179 </property>
180
181 <property>
182 <name>datanucleus.validateConstraints</name>
183 <value>false</value>
184 <description>validates existing schema against code. turn this on if
185 you want to verify existing schema </description>
186 </property>
187
188 <property>
189 <name>datanucleus.storeManagerType</name>
190 <value>rdbms</value>
191 <description>metadata store type</description>
192 </property>
193
194 <property>
195 <name>datanucleus.autoCreateSchema</name>
196 <value>true</value>
197 <description>creates necessary schema on a startup if one doesn't
198 exist. set this to false, after creating it once</description>
199 </property>
200
201 <property>
202 <name>datanucleus.autoStartMechanismMode</name>
203 <value>checked</value>
204 <description>throw exception if metadata tables are incorrect
205 </description>
206 </property>
207
208 <property>
209 <name>datanucleus.transactionIsolation</name>
210 <value>read-committed</value>
211 <description>Default transaction isolation level for identity
212 generation. </description>
213 </property>
214
215 <property>
216 <name>datanucleus.cache.level2</name>
217 <value>false</value>
218 <description>Use a level 2 cache. Turn this off if metadata is changed
219 independently of hive metastore server</description>
220 </property>
221
222 <property>
223 <name>datanucleus.cache.level2.type</name>
224 <value>SOFT</value>
225 <description>SOFT=soft reference based cache, WEAK=weak reference
226 based cache.</description>
227 </property>
228
229 <property>
230 <name>datanucleus.identifierFactory</name>
231 <value>datanucleus</value>
232 <description>Name of the identifier factory to use when generating
233 table/column names etc. 'datanucleus' is used for backward
234 compatibility</description>
235 </property>
236
237 <property>
238 <name>hive.metastore.warehouse.dir</name>
239 <value>/user/hivesterix</value>
240 <description>location of default database for the warehouse
241 </description>
242 </property>
243
244 <property>
245 <name>hive.metastore.connect.retries</name>
246 <value>5</value>
247 <description>Number of retries while opening a connection to metastore
248 </description>
249 </property>
250
251 <property>
252 <name>hive.metastore.rawstore.impl</name>
253 <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
254 <description>Name of the class that implements
255 org.apache.hadoop.hive.metastore.rawstore interface. This class is
256 used to store and retrieval of raw metadata objects such as table,
257 database</description>
258 </property>
259
260 <property>
261 <name>hive.default.fileformat</name>
262 <value>TextFile</value>
263 <description>Default file format for CREATE TABLE statement. Options
264 are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
265 ... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
266 </property>
267
268 <property>
269 <name>hive.fileformat.check</name>
270 <value>true</value>
271 <description>Whether to check file format or not when loading data
272 files</description>
273 </property>
274
275 <property>
276 <name>hive.map.aggr</name>
277 <value>true</value>
278 <description>Whether to use map-side aggregation in Hive Group By
279 queries</description>
280 </property>
281
282 <property>
283 <name>hive.groupby.skewindata</name>
284 <value>false</value>
285 <description>Whether there is skew in data to optimize group by
286 queries</description>
287 </property>
288
289 <property>
290 <name>hive.groupby.mapaggr.checkinterval</name>
291 <value>100000</value>
292 <description>Number of rows after which size of the grouping
293 keys/aggregation classes is performed</description>
294 </property>
295
296 <property>
297 <name>hive.mapred.local.mem</name>
298 <value>0</value>
299 <description>For local mode, memory of the mappers/reducers
300 </description>
301 </property>
302
303 <property>
304 <name>hive.map.aggr.hash.percentmemory</name>
305 <value>0.5</value>
306 <description>Portion of total memory to be used by map-side grup
307 aggregation hash table</description>
308 </property>
309
310 <property>
311 <name>hive.map.aggr.hash.min.reduction</name>
312 <value>0.5</value>
313 <description>Hash aggregation will be turned off if the ratio between
314 hash
315 table size and input rows is bigger than this number. Set to 1 to make
316 sure
317 hash aggregation is never turned off.</description>
318 </property>
319
320 <property>
321 <name>hive.optimize.cp</name>
322 <value>true</value>
323 <description>Whether to enable column pruner</description>
324 </property>
325
326 <property>
327 <name>hive.optimize.ppd</name>
328 <value>true</value>
329 <description>Whether to enable predicate pushdown</description>
330 </property>
331
332 <property>
333 <name>hive.optimize.pruner</name>
334 <value>true</value>
335 <description>Whether to enable the new partition pruner which depends
336 on predicate pushdown. If this is disabled,
337 the old partition pruner which is based on AST will be enabled.
338 </description>
339 </property>
340
341 <property>
342 <name>hive.optimize.groupby</name>
343 <value>true</value>
344 <description>Whether to enable the bucketed group by from bucketed
345 partitions/tables.</description>
346 </property>
347
348 <property>
349 <name>hive.join.emit.interval</name>
350 <value>1000</value>
351 <description>How many rows in the right-most join operand Hive should
352 buffer before emitting the join result. </description>
353 </property>
354
355 <property>
356 <name>hive.join.cache.size</name>
357 <value>25000</value>
358 <description>How many rows in the joining tables (except the streaming
359 table) should be cached in memory. </description>
360 </property>
361
362 <property>
363 <name>hive.mapjoin.bucket.cache.size</name>
364 <value>100</value>
365 <description>How many values in each keys in the map-joined table
366 should be cached in memory. </description>
367 </property>
368
369 <property>
370 <name>hive.mapjoin.maxsize</name>
371 <value>100000</value>
372 <description>Maximum # of rows of the small table that can be handled
373 by map-side join. If the size is reached and hive.task.progress is
374 set, a fatal error counter is set and the job will be killed.
375 </description>
376 </property>
377
378 <property>
379 <name>hive.mapjoin.cache.numrows</name>
380 <value>25000</value>
381 <description>How many rows should be cached by jdbm for map join.
382 </description>
383 </property>
384
385 <property>
386 <name>hive.optimize.skewjoin</name>
387 <value>false</value>
388 <description>Whether to enable skew join optimization. </description>
389 </property>
390
391 <property>
392 <name>hive.skewjoin.key</name>
393 <value>100000</value>
394 <description>Determine if we get a skew key in join. If we see more
395 than the specified number of rows with the same key in join operator,
396 we think the key as a skew join key. </description>
397 </property>
398
399 <property>
400 <name>hive.skewjoin.mapjoin.map.tasks</name>
401 <value>10000</value>
402 <description> Determine the number of map task used in the follow up
403 map join job
404 for a skew join. It should be used together with
405 hive.skewjoin.mapjoin.min.split
406 to perform a fine grained control.</description>
407 </property>
408
409 <property>
410 <name>hive.skewjoin.mapjoin.min.split</name>
411 <value>33554432</value>
412 <description> Determine the number of map task at most used in the
413 follow up map join job
414 for a skew join by specifying the minimum split size. It should be used
415 together with
416 hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.</description>
417 </property>
418
419 <property>
420 <name>hive.mapred.mode</name>
421 <value>nonstrict</value>
422 <description>The mode in which the hive operations are being
423 performed. In strict mode, some risky queries are not allowed to run
424 </description>
425 </property>
426
427 <property>
428 <name>hive.exec.script.maxerrsize</name>
429 <value>100000</value>
430 <description>Maximum number of bytes a script is allowed to emit to
431 standard error (per map-reduce task). This prevents runaway scripts
432 from filling logs partitions to capacity </description>
433 </property>
434
435 <property>
436 <name>hive.exec.script.allow.partial.consumption</name>
437 <value>false</value>
438 <description> When enabled, this option allows a user script to exit
439 successfully without consuming all the data from the standard input.
440 </description>
441 </property>
442
443 <property>
444 <name>hive.script.operator.id.env.var</name>
445 <value>HIVE_SCRIPT_OPERATOR_ID</value>
446 <description> Name of the environment variable that holds the unique
447 script operator ID in the user's transform function (the custom
448 mapper/reducer that the user has specified in the query)
449 </description>
450 </property>
451
452 <property>
453 <name>hive.exec.compress.output</name>
454 <value>false</value>
455 <description> This controls whether the final outputs of a query (to a
456 local/hdfs file or a hive table) is compressed. The compression codec
457 and other options are determined from hadoop config variables
458 mapred.output.compress* </description>
459 </property>
460
461 <property>
462 <name>hive.exec.compress.intermediate</name>
463 <value>false</value>
464 <description> This controls whether intermediate files produced by
465 hive between multiple map-reduce jobs are compressed. The compression
466 codec and other options are determined from hadoop config variables
467 mapred.output.compress* </description>
468 </property>
469
470 <property>
471 <name>hive.exec.parallel</name>
472 <value>false</value>
473 <description>Whether to execute jobs in parallel</description>
474 </property>
475
476 <property>
477 <name>hive.exec.parallel.thread.number</name>
478 <value>8</value>
479 <description>How many jobs at most can be executed in parallel
480 </description>
481 </property>
482
483 <property>
484 <name>hive.hwi.war.file</name>
485 <value>lib\hive-hwi-0.7.0.war</value>
486 <description>This sets the path to the HWI war file, relative to
487 ${HIVE_HOME}. </description>
488 </property>
489
490 <property>
491 <name>hive.hwi.listen.host</name>
492 <value>0.0.0.0</value>
493 <description>This is the host address the Hive Web Interface will
494 listen on</description>
495 </property>
496
497 <property>
498 <name>hive.hwi.listen.port</name>
499 <value>9999</value>
500 <description>This is the port the Hive Web Interface will listen on
501 </description>
502 </property>
503
504 <property>
505 <name>hive.exec.pre.hooks</name>
506 <value></value>
507 <description>Pre Execute Hook for Tests</description>
508 </property>
509
510 <property>
511 <name>hive.merge.mapfiles</name>
512 <value>true</value>
513 <description>Merge small files at the end of a map-only job
514 </description>
515 </property>
516
517 <property>
518 <name>hive.merge.mapredfiles</name>
519 <value>false</value>
520 <description>Merge small files at the end of a map-reduce job
521 </description>
522 </property>
523
524 <property>
525 <name>hive.heartbeat.interval</name>
526 <value>1000</value>
527 <description>Send a heartbeat after this interval - used by mapjoin
528 and filter operators</description>
529 </property>
530
531 <property>
532 <name>hive.merge.size.per.task</name>
533 <value>256000000</value>
534 <description>Size of merged files at the end of the job</description>
535 </property>
536
537 <property>
538 <name>hive.merge.size.smallfiles.avgsize</name>
539 <value>16000000</value>
540 <description>When the average output file size of a job is less than
541 this number, Hive will start an additional map-reduce job to merge
542 the output files into bigger files. This is only done for map-only
543 jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
544 hive.merge.mapredfiles is true.</description>
545 </property>
546
547 <property>
548 <name>hive.script.auto.progress</name>
549 <value>false</value>
550 <description>Whether Hive Tranform/Map/Reduce Clause should
551 automatically send progress information to TaskTracker to avoid the
552 task getting killed because of inactivity. Hive sends progress
553 information when the script is outputting to stderr. This option
554 removes the need of periodically producing stderr messages, but users
555 should be cautious because this may prevent infinite loops in the
556 scripts to be killed by TaskTracker. </description>
557 </property>
558
559 <property>
560 <name>hive.script.serde</name>
561 <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
562 <description>The default serde for trasmitting input data to and
563 reading output data from the user scripts. </description>
564 </property>
565
566 <property>
567 <name>hive.script.recordreader</name>
568 <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
569 <description>The default record reader for reading data from the user
570 scripts. </description>
571 </property>
572
573 <property>
574 <name>hive.script.recordwriter</name>
575 <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
576 <description>The default record writer for writing data to the user
577 scripts. </description>
578 </property>
579
580 <property>
581 <name>hive.input.format</name>
582 <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
583 <description>The default input format, if it is not specified, the
584 system assigns it. It is set to HiveInputFormat for hadoop versions
585 17, 18 and 19, whereas it is set to CombinedHiveInputFormat for
586 hadoop 20. The user can always overwrite it - if there is a bug in
587 CombinedHiveInputFormat, it can always be manually set to
588 HiveInputFormat. </description>
589 </property>
590
591 <property>
592 <name>hive.udtf.auto.progress</name>
593 <value>false</value>
594 <description>Whether Hive should automatically send progress
595 information to TaskTracker when using UDTF's to prevent the task
596 getting killed because of inactivity. Users should be cautious
597 because this may prevent TaskTracker from killing tasks with infinte
598 loops. </description>
599 </property>
600
601 <property>
602 <name>hive.mapred.reduce.tasks.speculative.execution</name>
603 <value>true</value>
604 <description>Whether speculative execution for reducers should be
605 turned on. </description>
606 </property>
607
608 <property>
609 <name>hive.exec.counters.pull.interval</name>
610 <value>1000</value>
611 <description>The interval with which to poll the JobTracker for the
612 counters the running job. The smaller it is the more load there will
613 be on the jobtracker, the higher it is the less granular the caught
614 will be.</description>
615 </property>
616
617 <property>
618 <name>hive.enforce.bucketing</name>
619 <value>false</value>
620 <description>Whether bucketing is enforced. If true, while inserting
621 into the table, bucketing is enforced. </description>
622 </property>
623
624 <property>
625 <name>hive.enforce.sorting</name>
626 <value>false</value>
627 <description>Whether sorting is enforced. If true, while inserting
628 into the table, sorting is enforced. </description>
629 </property>
630
631 <property>
632 <name>hive.metastore.ds.connection.url.hook</name>
633 <value></value>
634 <description>Name of the hook to use for retriving the JDO connection
635 URL. If empty, the value in javax.jdo.option.ConnectionURL is used
636 </description>
637 </property>
638
639 <property>
640 <name>hive.metastore.ds.retry.attempts</name>
641 <value>1</value>
642 <description>The number of times to retry a metastore call if there
643 were a connection error</description>
644 </property>
645
646 <property>
647 <name>hive.metastore.ds.retry.interval</name>
648 <value>1000</value>
649 <description>The number of miliseconds between metastore retry
650 attempts</description>
651 </property>
652
653 <property>
654 <name>hive.metastore.server.min.threads</name>
655 <value>200</value>
656 <description>Minimum number of worker threads in the Thrift server's
657 pool.</description>
658 </property>
659
660 <property>
661 <name>hive.metastore.server.max.threads</name>
662 <value>100000</value>
663 <description>Maximum number of worker threads in the Thrift server's
664 pool.</description>
665 </property>
666
667 <property>
668 <name>hive.metastore.server.tcp.keepalive</name>
669 <value>true</value>
670 <description>Whether to enable TCP keepalive for the metastore server.
671 Keepalive will prevent accumulation of half-open connections.
672 </description>
673 </property>
674
675 <property>
676 <name>hive.optimize.reducededuplication</name>
677 <value>true</value>
678 <description>Remove extra map-reduce jobs if the data is already
679 clustered by the same key which needs to be used again. This should
680 always be set to true. Since it is a new feature, it has been made
681 configurable.</description>
682 </property>
683
684 <property>
685 <name>hive.exec.dynamic.partition</name>
686 <value>false</value>
687 <description>Whether or not to allow dynamic partitions in DML/DDL.
688 </description>
689 </property>
690
691 <property>
692 <name>hive.exec.dynamic.partition.mode</name>
693 <value>strict</value>
694 <description>In strict mode, the user must specify at least one static
695 partition in case the user accidentally overwrites all partitions.
696 </description>
697 </property>
698
699 <property>
700 <name>hive.exec.max.dynamic.partitions</name>
701 <value>1000</value>
702 <description>Maximum number of dynamic partitions allowed to be
703 created in total.</description>
704 </property>
705
706 <property>
707 <name>hive.exec.max.dynamic.partitions.pernode</name>
708 <value>100</value>
709 <description>Maximum number of dynamic partitions allowed to be
710 created in each mapper/reducer node.</description>
711 </property>
712
713 <property>
714 <name>hive.default.partition.name</name>
715 <value>__HIVE_DEFAULT_PARTITION__</value>
716 <description>The default partition name in case the dynamic partition
717 column value is null/empty string or anyother values that cannot be
718 escaped. This value must not contain any special character used in
719 HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
720 dynamic partition value should not contain this value to avoid
721 confusions.</description>
722 </property>
723
724 <property>
725 <name>fs.har.impl</name>
726 <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
727 <description>The implementation for accessing Hadoop Archives. Note
728 that this won't be applicable to Hadoop vers less than 0.20
729 </description>
730 </property>
731
732 <property>
733 <name>hive.archive.enabled</name>
734 <value>false</value>
735 <description>Whether archiving operations are permitted</description>
736 </property>
737
738 <property>
739 <name>hive.archive.har.parentdir.settable</name>
740 <value>false</value>
741 <description>In new Hadoop versions, the parent directory must be set
742 while
743 creating a HAR. Because this functionality is hard to detect with just
744 version
745 numbers, this conf var needs to be set manually.</description>
746 </property>
747
748 <!-- HBase Storage Handler Parameters -->
749
750 <property>
751 <name>hive.hbase.wal.enabled</name>
752 <value>true</value>
753 <description>Whether writes to HBase should be forced to the
754 write-ahead log. Disabling this improves HBase write performance at
755 the risk of lost writes in case of a crash.</description>
756 </property>
757
758</configuration>