blob: d5d014937c081e0e6845d94f3be156b68fe75983 [file] [log] [blame]
buyingyic73348c2012-11-02 00:31:31 +00001<?xml version="1.0"?>
2<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
4<configuration>
5
6 <!-- Hive Configuration can either be stored in this file or in the hadoop
7 configuration files -->
8 <!-- that are implied by Hadoop setup variables. -->
9 <!-- Aside from Hadoop setup variables - this file is provided as a convenience
10 so that Hive -->
11 <!-- users do not have to edit hadoop configuration files (that may be managed
12 as a centralized -->
13 <!-- resource). -->
14
15 <!-- Hive Execution Parameters -->
16 <property>
17 <name>mapred.reduce.tasks</name>
18 <value>-1</value>
19 <description>The default number of reduce tasks per job. Typically set
20 to a prime close to the number of available hosts. Ignored when
21 mapred.job.tracker is "local". Hadoop set this to 1 by default,
22 whereas hive uses -1 as its default value.
23 By setting this property to
24 -1, Hive will automatically figure out what
25 should be the number of
26 reducers.
27 </description>
28 </property>
29
30 <property>
31 <name>hive.hyracks.host</name>
32 <value>127.0.0.1</value>
33 </property>
34
35 <property>
36 <name>hive.hyracks.port</name>
37 <value>13099</value>
38 </property>
39
40 <property>
41 <name>hive.hyracks.app</name>
42 <value>hivesterix</value>
43 </property>
44
45
46 <property>
47 <name>hive.hyracks.parrallelism</name>
48 <value>2</value>
49 </property>
50
51 <property>
52 <name>hive.algebricks.groupby.external</name>
53 <value>true</value>
54 </property>
55
56 <property>
57 <name>hive.algebricks.groupby.external.memory</name>
58 <value>3072</value>
59 </property>
60
61 <property>
62 <name>hive.algebricks.sort.memory</name>
63 <value>3072</value>
64 </property>
65
66 <property>
67 <name>hive.algebricks.framesize</name>
68 <value>768</value>
69 </property>
70
71 <property>
72 <name>hive.exec.reducers.bytes.per.reducer</name>
73 <value>1000000000</value>
74 <description>size per reducer.The default is 1G, i.e if the input size
75 is 10G, it will use 10 reducers.</description>
76 </property>
77
78 <property>
79 <name>hive.exec.reducers.max</name>
80 <value>999</value>
81 <description>max number of reducers will be used. If the one
82 specified
83 in the configuration parameter mapred.reduce.tasks is
84 negative, hive
85 will use this one as the max number of reducers when
86 automatically
87 determine number of reducers.</description>
88 </property>
89
90 <property>
91 <name>hive.exec.scratchdir</name>
92 <value>/tmp/hive-${user.name}</value>
93 <description>Scratch space for Hive jobs</description>
94 </property>
95
96 <property>
97 <name>hive.test.mode</name>
98 <value>false</value>
99 <description>whether hive is running in test mode. If yes, it turns on
100 sampling and prefixes the output tablename</description>
101 </property>
102
103 <property>
104 <name>hive.test.mode.prefix</name>
105 <value>test_</value>
106 <description>if hive is running in test mode, prefixes the output
107 table by this string</description>
108 </property>
109
110 <!-- If the input table is not bucketed, the denominator of the tablesample
111 is determinied by the parameter below -->
112 <!-- For example, the following query: -->
113 <!-- INSERT OVERWRITE TABLE dest -->
114 <!-- SELECT col1 from src -->
115 <!-- would be converted to -->
116 <!-- INSERT OVERWRITE TABLE test_dest -->
117 <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
118 <property>
119 <name>hive.test.mode.samplefreq</name>
120 <value>32</value>
121 <description>if hive is running in test mode and table is not
122 bucketed, sampling frequency</description>
123 </property>
124
125 <property>
126 <name>hive.test.mode.nosamplelist</name>
127 <value></value>
128 <description>if hive is running in test mode, dont sample the above
129 comma seperated list of tables</description>
130 </property>
131
132 <property>
133 <name>hive.metastore.local</name>
134 <value>true</value>
135 <description>controls whether to connect to remove metastore server or
136 open a new metastore server in Hive Client JVM</description>
137 </property>
138
139 <property>
140 <name>javax.jdo.option.ConnectionURL</name>
141 <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
142 <description>JDBC connect string for a JDBC metastore</description>
143 </property>
144
145 <property>
146 <name>javax.jdo.option.ConnectionDriverName</name>
147 <value>org.apache.derby.jdbc.EmbeddedDriver</value>
148 <description>Driver class name for a JDBC metastore</description>
149 </property>
150
151 <property>
152 <name>javax.jdo.PersistenceManagerFactoryClass</name>
153 <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
154 <description>class implementing the jdo persistence</description>
155 </property>
156
157 <property>
158 <name>datanucleus.connectionPoolingType</name>
159 <value>DBCP</value>
160 <description>Uses a DBCP connection pool for JDBC metastore
161 </description>
162 </property>
163
164 <property>
165 <name>javax.jdo.option.DetachAllOnCommit</name>
166 <value>true</value>
167 <description>detaches all objects from session so that they can be
168 used after transaction is committed</description>
169 </property>
170
171 <property>
172 <name>javax.jdo.option.NonTransactionalRead</name>
173 <value>true</value>
174 <description>reads outside of transactions</description>
175 </property>
176
177 <property>
178 <name>javax.jdo.option.ConnectionUserName</name>
179 <value>APP</value>
180 <description>username to use against metastore database</description>
181 </property>
182
183 <property>
184 <name>javax.jdo.option.ConnectionPassword</name>
185 <value>mine</value>
186 <description>password to use against metastore database</description>
187 </property>
188
189 <property>
190 <name>datanucleus.validateTables</name>
191 <value>false</value>
192 <description>validates existing schema against code. turn this on if
193 you want to verify existing schema </description>
194 </property>
195
196 <property>
197 <name>datanucleus.validateColumns</name>
198 <value>false</value>
199 <description>validates existing schema against code. turn this on if
200 you want to verify existing schema </description>
201 </property>
202
203 <property>
204 <name>datanucleus.validateConstraints</name>
205 <value>false</value>
206 <description>validates existing schema against code. turn this on if
207 you want to verify existing schema </description>
208 </property>
209
210 <property>
211 <name>datanucleus.storeManagerType</name>
212 <value>rdbms</value>
213 <description>metadata store type</description>
214 </property>
215
216 <property>
217 <name>datanucleus.autoCreateSchema</name>
218 <value>true</value>
219 <description>creates necessary schema on a startup if one doesn't
220 exist. set this to false, after creating it once</description>
221 </property>
222
223 <property>
224 <name>datanucleus.autoStartMechanismMode</name>
225 <value>checked</value>
226 <description>throw exception if metadata tables are incorrect
227 </description>
228 </property>
229
230 <property>
231 <name>datanucleus.transactionIsolation</name>
232 <value>read-committed</value>
233 <description>Default transaction isolation level for identity
234 generation. </description>
235 </property>
236
237 <property>
238 <name>datanucleus.cache.level2</name>
239 <value>false</value>
240 <description>Use a level 2 cache. Turn this off if metadata is changed
241 independently of hive metastore server</description>
242 </property>
243
244 <property>
245 <name>datanucleus.cache.level2.type</name>
246 <value>SOFT</value>
247 <description>SOFT=soft reference based cache, WEAK=weak reference
248 based cache.</description>
249 </property>
250
251 <property>
252 <name>datanucleus.identifierFactory</name>
253 <value>datanucleus</value>
254 <description>Name of the identifier factory to use when generating
255 table/column names etc. 'datanucleus' is used for backward
256 compatibility</description>
257 </property>
258
259 <property>
260 <name>hive.metastore.warehouse.dir</name>
261 <value>/tmp/hivesterix</value>
262 <description>location of default database for the warehouse
263 </description>
264 </property>
265
266 <property>
267 <name>hive.metastore.connect.retries</name>
268 <value>5</value>
269 <description>Number of retries while opening a connection to metastore
270 </description>
271 </property>
272
273 <property>
274 <name>hive.metastore.rawstore.impl</name>
275 <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
276 <description>Name of the class that implements
277 org.apache.hadoop.hive.metastore.rawstore interface. This class is
278 used to store and retrieval of raw metadata objects such as table,
279 database</description>
280 </property>
281
282 <property>
283 <name>hive.default.fileformat</name>
284 <value>TextFile</value>
285 <description>Default file format for CREATE TABLE statement. Options
286 are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
287 ... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
288 </property>
289
290 <property>
291 <name>hive.fileformat.check</name>
292 <value>true</value>
293 <description>Whether to check file format or not when loading data
294 files</description>
295 </property>
296
297 <property>
298 <name>hive.map.aggr</name>
299 <value>true</value>
300 <description>Whether to use map-side aggregation in Hive Group By
301 queries</description>
302 </property>
303
304 <property>
305 <name>hive.groupby.skewindata</name>
306 <value>false</value>
307 <description>Whether there is skew in data to optimize group by
308 queries</description>
309 </property>
310
311 <property>
312 <name>hive.groupby.mapaggr.checkinterval</name>
313 <value>100000</value>
314 <description>Number of rows after which size of the grouping
315 keys/aggregation classes is performed</description>
316 </property>
317
318 <property>
319 <name>hive.mapred.local.mem</name>
320 <value>0</value>
321 <description>For local mode, memory of the mappers/reducers
322 </description>
323 </property>
324
325 <property>
326 <name>hive.map.aggr.hash.percentmemory</name>
327 <value>0.5</value>
328 <description>Portion of total memory to be used by map-side grup
329 aggregation hash table</description>
330 </property>
331
332 <property>
333 <name>hive.map.aggr.hash.min.reduction</name>
334 <value>0.5</value>
335 <description>Hash aggregation will be turned off if the ratio between
336 hash
337 table size and input rows is bigger than this number. Set to 1 to
338 make
339 sure
340 hash aggregation is never turned off.</description>
341 </property>
342
343 <property>
344 <name>hive.optimize.cp</name>
345 <value>true</value>
346 <description>Whether to enable column pruner</description>
347 </property>
348
349 <property>
350 <name>hive.optimize.ppd</name>
351 <value>true</value>
352 <description>Whether to enable predicate pushdown</description>
353 </property>
354
355 <property>
356 <name>hive.optimize.pruner</name>
357 <value>true</value>
358 <description>Whether to enable the new partition pruner which depends
359 on predicate pushdown. If this is disabled,
360 the old partition pruner
361 which is based on AST will be enabled.
362 </description>
363 </property>
364
365 <property>
366 <name>hive.optimize.groupby</name>
367 <value>true</value>
368 <description>Whether to enable the bucketed group by from bucketed
369 partitions/tables.</description>
370 </property>
371
372 <property>
373 <name>hive.join.emit.interval</name>
374 <value>1000</value>
375 <description>How many rows in the right-most join operand Hive should
376 buffer before emitting the join result. </description>
377 </property>
378
379 <property>
380 <name>hive.join.cache.size</name>
381 <value>25000</value>
382 <description>How many rows in the joining tables (except the streaming
383 table) should be cached in memory. </description>
384 </property>
385
386 <property>
387 <name>hive.mapjoin.bucket.cache.size</name>
388 <value>100</value>
389 <description>How many values in each keys in the map-joined table
390 should be cached in memory. </description>
391 </property>
392
393 <property>
394 <name>hive.mapjoin.maxsize</name>
395 <value>100000</value>
396 <description>Maximum # of rows of the small table that can be handled
397 by map-side join. If the size is reached and hive.task.progress is
398 set, a fatal error counter is set and the job will be killed.
399 </description>
400 </property>
401
402 <property>
403 <name>hive.mapjoin.cache.numrows</name>
404 <value>25000</value>
405 <description>How many rows should be cached by jdbm for map join.
406 </description>
407 </property>
408
409 <property>
410 <name>hive.optimize.skewjoin</name>
411 <value>false</value>
412 <description>Whether to enable skew join optimization. </description>
413 </property>
414
415 <property>
416 <name>hive.skewjoin.key</name>
417 <value>100000</value>
418 <description>Determine if we get a skew key in join. If we see more
419 than the specified number of rows with the same key in join operator,
420 we think the key as a skew join key. </description>
421 </property>
422
423 <property>
424 <name>hive.skewjoin.mapjoin.map.tasks</name>
425 <value>10000</value>
426 <description> Determine the number of map task used in the follow up
427 map join job
428 for a skew join. It should be used together with
429 hive.skewjoin.mapjoin.min.split
430 to perform a fine grained control.
431 </description>
432 </property>
433
434 <property>
435 <name>hive.skewjoin.mapjoin.min.split</name>
436 <value>33554432</value>
437 <description> Determine the number of map task at most used in the
438 follow up map join job
439 for a skew join by specifying the minimum split
440 size. It should be used
441 together with
442 hive.skewjoin.mapjoin.map.tasks
443 to perform a fine grained control.</description>
444 </property>
445
446 <property>
447 <name>hive.mapred.mode</name>
448 <value>nonstrict</value>
449 <description>The mode in which the hive operations are being
450 performed. In strict mode, some risky queries are not allowed to run
451 </description>
452 </property>
453
454 <property>
455 <name>hive.exec.script.maxerrsize</name>
456 <value>100000</value>
457 <description>Maximum number of bytes a script is allowed to emit to
458 standard error (per map-reduce task). This prevents runaway scripts
459 from filling logs partitions to capacity </description>
460 </property>
461
462 <property>
463 <name>hive.exec.script.allow.partial.consumption</name>
464 <value>false</value>
465 <description> When enabled, this option allows a user script to exit
466 successfully without consuming all the data from the standard input.
467 </description>
468 </property>
469
470 <property>
471 <name>hive.script.operator.id.env.var</name>
472 <value>HIVE_SCRIPT_OPERATOR_ID</value>
473 <description> Name of the environment variable that holds the unique
474 script operator ID in the user's transform function (the custom
475 mapper/reducer that the user has specified in the query)
476 </description>
477 </property>
478
479 <property>
480 <name>hive.exec.compress.output</name>
481 <value>false</value>
482 <description> This controls whether the final outputs of a query (to a
483 local/hdfs file or a hive table) is compressed. The compression codec
484 and other options are determined from hadoop config variables
485 mapred.output.compress* </description>
486 </property>
487
488 <property>
489 <name>hive.exec.compress.intermediate</name>
490 <value>false</value>
491 <description> This controls whether intermediate files produced by
492 hive between multiple map-reduce jobs are compressed. The compression
493 codec and other options are determined from hadoop config variables
494 mapred.output.compress* </description>
495 </property>
496
497 <property>
498 <name>hive.exec.parallel</name>
499 <value>false</value>
500 <description>Whether to execute jobs in parallel</description>
501 </property>
502
503 <property>
504 <name>hive.exec.parallel.thread.number</name>
505 <value>8</value>
506 <description>How many jobs at most can be executed in parallel
507 </description>
508 </property>
509
510 <property>
511 <name>hive.hwi.war.file</name>
512 <value>lib\hive-hwi-0.7.0.war</value>
513 <description>This sets the path to the HWI war file, relative to
514 ${HIVE_HOME}. </description>
515 </property>
516
517 <property>
518 <name>hive.hwi.listen.host</name>
519 <value>0.0.0.0</value>
520 <description>This is the host address the Hive Web Interface will
521 listen on</description>
522 </property>
523
524 <property>
525 <name>hive.hwi.listen.port</name>
526 <value>9999</value>
527 <description>This is the port the Hive Web Interface will listen on
528 </description>
529 </property>
530
531 <property>
532 <name>hive.exec.pre.hooks</name>
533 <value></value>
534 <description>Pre Execute Hook for Tests</description>
535 </property>
536
537 <property>
538 <name>hive.merge.mapfiles</name>
539 <value>true</value>
540 <description>Merge small files at the end of a map-only job
541 </description>
542 </property>
543
544 <property>
545 <name>hive.merge.mapredfiles</name>
546 <value>false</value>
547 <description>Merge small files at the end of a map-reduce job
548 </description>
549 </property>
550
551 <property>
552 <name>hive.heartbeat.interval</name>
553 <value>1000</value>
554 <description>Send a heartbeat after this interval - used by mapjoin
555 and filter operators</description>
556 </property>
557
558 <property>
559 <name>hive.merge.size.per.task</name>
560 <value>256000000</value>
561 <description>Size of merged files at the end of the job</description>
562 </property>
563
564 <property>
565 <name>hive.merge.size.smallfiles.avgsize</name>
566 <value>16000000</value>
567 <description>When the average output file size of a job is less than
568 this number, Hive will start an additional map-reduce job to merge
569 the output files into bigger files. This is only done for map-only
570 jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
571 hive.merge.mapredfiles is true.</description>
572 </property>
573
574 <property>
575 <name>hive.script.auto.progress</name>
576 <value>false</value>
577 <description>Whether Hive Tranform/Map/Reduce Clause should
578 automatically send progress information to TaskTracker to avoid the
579 task getting killed because of inactivity. Hive sends progress
580 information when the script is outputting to stderr. This option
581 removes the need of periodically producing stderr messages, but users
582 should be cautious because this may prevent infinite loops in the
583 scripts to be killed by TaskTracker. </description>
584 </property>
585
586 <property>
587 <name>hive.script.serde</name>
588 <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
589 <description>The default serde for trasmitting input data to and
590 reading output data from the user scripts. </description>
591 </property>
592
593 <property>
594 <name>hive.script.recordreader</name>
595 <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
596 <description>The default record reader for reading data from the user
597 scripts. </description>
598 </property>
599
600 <property>
601 <name>hive.script.recordwriter</name>
602 <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
603 <description>The default record writer for writing data to the user
604 scripts. </description>
605 </property>
606
607 <property>
608 <name>hive.input.format</name>
609 <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
610 <description>The default input format, if it is not specified, the
611 system assigns it. It is set to HiveInputFormat for hadoop versions
612 17, 18 and 19, whereas it is set to CombinedHiveInputFormat for
613 hadoop 20. The user can always overwrite it - if there is a bug in
614 CombinedHiveInputFormat, it can always be manually set to
615 HiveInputFormat. </description>
616 </property>
617
618 <property>
619 <name>hive.udtf.auto.progress</name>
620 <value>false</value>
621 <description>Whether Hive should automatically send progress
622 information to TaskTracker when using UDTF's to prevent the task
623 getting killed because of inactivity. Users should be cautious
624 because this may prevent TaskTracker from killing tasks with infinte
625 loops. </description>
626 </property>
627
628 <property>
629 <name>hive.mapred.reduce.tasks.speculative.execution</name>
630 <value>true</value>
631 <description>Whether speculative execution for reducers should be
632 turned on. </description>
633 </property>
634
635 <property>
636 <name>hive.exec.counters.pull.interval</name>
637 <value>1000</value>
638 <description>The interval with which to poll the JobTracker for the
639 counters the running job. The smaller it is the more load there will
640 be on the jobtracker, the higher it is the less granular the caught
641 will be.</description>
642 </property>
643
644 <property>
645 <name>hive.enforce.bucketing</name>
646 <value>false</value>
647 <description>Whether bucketing is enforced. If true, while inserting
648 into the table, bucketing is enforced. </description>
649 </property>
650
651 <property>
652 <name>hive.enforce.sorting</name>
653 <value>false</value>
654 <description>Whether sorting is enforced. If true, while inserting
655 into the table, sorting is enforced. </description>
656 </property>
657
658 <property>
659 <name>hive.metastore.ds.connection.url.hook</name>
660 <value></value>
661 <description>Name of the hook to use for retriving the JDO connection
662 URL. If empty, the value in javax.jdo.option.ConnectionURL is used
663 </description>
664 </property>
665
666 <property>
667 <name>hive.metastore.ds.retry.attempts</name>
668 <value>1</value>
669 <description>The number of times to retry a metastore call if there
670 were a connection error</description>
671 </property>
672
673 <property>
674 <name>hive.metastore.ds.retry.interval</name>
675 <value>1000</value>
676 <description>The number of miliseconds between metastore retry
677 attempts</description>
678 </property>
679
680 <property>
681 <name>hive.metastore.server.min.threads</name>
682 <value>200</value>
683 <description>Minimum number of worker threads in the Thrift server's
684 pool.</description>
685 </property>
686
687 <property>
688 <name>hive.metastore.server.max.threads</name>
689 <value>100000</value>
690 <description>Maximum number of worker threads in the Thrift server's
691 pool.</description>
692 </property>
693
694 <property>
695 <name>hive.metastore.server.tcp.keepalive</name>
696 <value>true</value>
697 <description>Whether to enable TCP keepalive for the metastore server.
698 Keepalive will prevent accumulation of half-open connections.
699 </description>
700 </property>
701
702 <property>
703 <name>hive.optimize.reducededuplication</name>
704 <value>true</value>
705 <description>Remove extra map-reduce jobs if the data is already
706 clustered by the same key which needs to be used again. This should
707 always be set to true. Since it is a new feature, it has been made
708 configurable.</description>
709 </property>
710
711 <property>
712 <name>hive.exec.dynamic.partition</name>
713 <value>false</value>
714 <description>Whether or not to allow dynamic partitions in DML/DDL.
715 </description>
716 </property>
717
718 <property>
719 <name>hive.exec.dynamic.partition.mode</name>
720 <value>strict</value>
721 <description>In strict mode, the user must specify at least one static
722 partition in case the user accidentally overwrites all partitions.
723 </description>
724 </property>
725
726 <property>
727 <name>hive.exec.max.dynamic.partitions</name>
728 <value>1000</value>
729 <description>Maximum number of dynamic partitions allowed to be
730 created in total.</description>
731 </property>
732
733 <property>
734 <name>hive.exec.max.dynamic.partitions.pernode</name>
735 <value>100</value>
736 <description>Maximum number of dynamic partitions allowed to be
737 created in each mapper/reducer node.</description>
738 </property>
739
740 <property>
741 <name>hive.default.partition.name</name>
742 <value>__HIVE_DEFAULT_PARTITION__</value>
743 <description>The default partition name in case the dynamic partition
744 column value is null/empty string or anyother values that cannot be
745 escaped. This value must not contain any special character used in
746 HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
747 dynamic partition value should not contain this value to avoid
748 confusions.</description>
749 </property>
750
751 <property>
752 <name>fs.har.impl</name>
753 <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
754 <description>The implementation for accessing Hadoop Archives. Note
755 that this won't be applicable to Hadoop vers less than 0.20
756 </description>
757 </property>
758
759 <property>
760 <name>hive.archive.enabled</name>
761 <value>false</value>
762 <description>Whether archiving operations are permitted</description>
763 </property>
764
765 <property>
766 <name>hive.archive.har.parentdir.settable</name>
767 <value>false</value>
768 <description>In new Hadoop versions, the parent directory must be set
769 while
770 creating a HAR. Because this functionality is hard to detect
771 with just
772 version
773 numbers, this conf var needs to be set manually.
774 </description>
775 </property>
776
777 <!-- HBase Storage Handler Parameters -->
778
779 <property>
780 <name>hive.hbase.wal.enabled</name>
781 <value>true</value>
782 <description>Whether writes to HBase should be forced to the
783 write-ahead log. Disabling this improves HBase write performance at
784 the risk of lost writes in case of a crash.</description>
785 </property>
786
787 <property>
788 <name>hive.exec.drop.ignorenonexistent</name>
789 <value>true</value>
790 <description>drop table always works.</description>
791 </property>
792
793</configuration>