blob: 034ea61410ec9d0d2d95620edfd695847f3966c9 [file] [log] [blame]
buyingyic73348c2012-11-02 00:31:31 +00001<?xml version="1.0"?>
2<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
4<configuration>
5
6 <!-- Hive Configuration can either be stored in this file or in the hadoop
7 configuration files -->
8 <!-- that are implied by Hadoop setup variables. -->
9 <!-- Aside from Hadoop setup variables - this file is provided as a convenience
10 so that Hive -->
11 <!-- users do not have to edit hadoop configuration files (that may be managed
12 as a centralized -->
13 <!-- resource). -->
14
15 <!-- Hive Execution Parameters -->
16 <property>
17 <name>mapred.reduce.tasks</name>
18 <value>-1</value>
19 <description>The default number of reduce tasks per job. Typically set
20 to a prime close to the number of available hosts. Ignored when
21 mapred.job.tracker is "local". Hadoop set this to 1 by default,
22 whereas hive uses -1 as its default value.
23 By setting this property to -1, Hive will automatically figure out what
24 should be the number of reducers.
25 </description>
26 </property>
27
28 <property>
29 <name>hive.hyracks.host</name>
30 <value>128.195.14.4</value>
31 </property>
32
33 <property>
34 <name>hive.hyracks.port</name>
35 <value>3099</value>
36 </property>
37
38 <property>
39 <name>hive.hyracks.app</name>
40 <value>hivesterix</value>
41 </property>
42
43 <property>
44 <name>hive.hyracks.parrallelism</name>
45 <value>4</value>
46 </property>
47
48 <property>
49 <name>hive.algebricks.groupby.external</name>
50 <value>true</value>
51 </property>
52
53 <property>
54 <name>hive.algebricks.groupby.external.memory</name>
55 <value>536870912</value>
56 </property>
57
58 <property>
59 <name>hive.algebricks.sort.memory</name>
60 <value>536870912</value>
61 </property>
62
63 <property>
64 <name>hive.exec.reducers.bytes.per.reducer</name>
65 <value>1000000000</value>
66 <description>size per reducer.The default is 1G, i.e if the input size
67 is 10G, it will use 10 reducers.</description>
68 </property>
69
70 <property>
71 <name>hive.exec.reducers.max</name>
72 <value>999</value>
73 <description>max number of reducers will be used. If the one
74 specified in the configuration parameter mapred.reduce.tasks is
75 negative, hive will use this one as the max number of reducers when
76 automatically determine number of reducers.</description>
77 </property>
78
79 <property>
80 <name>hive.exec.scratchdir</name>
81 <value>/hive-${user.name}</value>
82 <description>Scratch space for Hive jobs</description>
83 </property>
84
85 <property>
86 <name>hive.test.mode</name>
87 <value>false</value>
88 <description>whether hive is running in test mode. If yes, it turns on
89 sampling and prefixes the output tablename</description>
90 </property>
91
92 <property>
93 <name>hive.test.mode.prefix</name>
94 <value>test_</value>
95 <description>if hive is running in test mode, prefixes the output
96 table by this string</description>
97 </property>
98
99 <!-- If the input table is not bucketed, the denominator of the tablesample
100 is determinied by the parameter below -->
101 <!-- For example, the following query: -->
102 <!-- INSERT OVERWRITE TABLE dest -->
103 <!-- SELECT col1 from src -->
104 <!-- would be converted to -->
105 <!-- INSERT OVERWRITE TABLE test_dest -->
106 <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
107 <property>
108 <name>hive.test.mode.samplefreq</name>
109 <value>32</value>
110 <description>if hive is running in test mode and table is not
111 bucketed, sampling frequency</description>
112 </property>
113
114 <property>
115 <name>hive.test.mode.nosamplelist</name>
116 <value></value>
117 <description>if hive is running in test mode, dont sample the above
118 comma seperated list of tables</description>
119 </property>
120
121 <property>
122 <name>hive.metastore.local</name>
123 <value>true</value>
124 <description>controls whether to connect to remove metastore server or
125 open a new metastore server in Hive Client JVM</description>
126 </property>
127
128 <property>
129 <name>javax.jdo.option.ConnectionURL</name>
130 <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
131 <description>JDBC connect string for a JDBC metastore</description>
132 </property>
133
134 <property>
135 <name>javax.jdo.option.ConnectionDriverName</name>
136 <value>org.apache.derby.jdbc.EmbeddedDriver</value>
137 <description>Driver class name for a JDBC metastore</description>
138 </property>
139
140 <property>
141 <name>javax.jdo.PersistenceManagerFactoryClass</name>
142 <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
143 <description>class implementing the jdo persistence</description>
144 </property>
145
146 <property>
147 <name>datanucleus.connectionPoolingType</name>
148 <value>DBCP</value>
149 <description>Uses a DBCP connection pool for JDBC metastore
150 </description>
151 </property>
152
153 <property>
154 <name>javax.jdo.option.DetachAllOnCommit</name>
155 <value>true</value>
156 <description>detaches all objects from session so that they can be
157 used after transaction is committed</description>
158 </property>
159
160 <property>
161 <name>javax.jdo.option.NonTransactionalRead</name>
162 <value>true</value>
163 <description>reads outside of transactions</description>
164 </property>
165
166 <property>
167 <name>javax.jdo.option.ConnectionUserName</name>
168 <value>APP</value>
169 <description>username to use against metastore database</description>
170 </property>
171
172 <property>
173 <name>javax.jdo.option.ConnectionPassword</name>
174 <value>mine</value>
175 <description>password to use against metastore database</description>
176 </property>
177
178 <property>
179 <name>datanucleus.validateTables</name>
180 <value>false</value>
181 <description>validates existing schema against code. turn this on if
182 you want to verify existing schema </description>
183 </property>
184
185 <property>
186 <name>datanucleus.validateColumns</name>
187 <value>false</value>
188 <description>validates existing schema against code. turn this on if
189 you want to verify existing schema </description>
190 </property>
191
192 <property>
193 <name>datanucleus.validateConstraints</name>
194 <value>false</value>
195 <description>validates existing schema against code. turn this on if
196 you want to verify existing schema </description>
197 </property>
198
199 <property>
200 <name>datanucleus.storeManagerType</name>
201 <value>rdbms</value>
202 <description>metadata store type</description>
203 </property>
204
205 <property>
206 <name>datanucleus.autoCreateSchema</name>
207 <value>true</value>
208 <description>creates necessary schema on a startup if one doesn't
209 exist. set this to false, after creating it once</description>
210 </property>
211
212 <property>
213 <name>datanucleus.autoStartMechanismMode</name>
214 <value>checked</value>
215 <description>throw exception if metadata tables are incorrect
216 </description>
217 </property>
218
219 <property>
220 <name>datanucleus.transactionIsolation</name>
221 <value>read-committed</value>
222 <description>Default transaction isolation level for identity
223 generation. </description>
224 </property>
225
226 <property>
227 <name>datanucleus.cache.level2</name>
228 <value>false</value>
229 <description>Use a level 2 cache. Turn this off if metadata is changed
230 independently of hive metastore server</description>
231 </property>
232
233 <property>
234 <name>datanucleus.cache.level2.type</name>
235 <value>SOFT</value>
236 <description>SOFT=soft reference based cache, WEAK=weak reference
237 based cache.</description>
238 </property>
239
240 <property>
241 <name>datanucleus.identifierFactory</name>
242 <value>datanucleus</value>
243 <description>Name of the identifier factory to use when generating
244 table/column names etc. 'datanucleus' is used for backward
245 compatibility</description>
246 </property>
247
248 <property>
249 <name>hive.metastore.warehouse.dir</name>
250 <value>/user/hivesterix</value>
251 <description>location of default database for the warehouse
252 </description>
253 </property>
254
255 <property>
256 <name>hive.metastore.connect.retries</name>
257 <value>5</value>
258 <description>Number of retries while opening a connection to metastore
259 </description>
260 </property>
261
262 <property>
263 <name>hive.metastore.rawstore.impl</name>
264 <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
265 <description>Name of the class that implements
266 org.apache.hadoop.hive.metastore.rawstore interface. This class is
267 used to store and retrieval of raw metadata objects such as table,
268 database</description>
269 </property>
270
271 <property>
272 <name>hive.default.fileformat</name>
273 <value>TextFile</value>
274 <description>Default file format for CREATE TABLE statement. Options
275 are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
276 ... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
277 </property>
278
279 <property>
280 <name>hive.fileformat.check</name>
281 <value>true</value>
282 <description>Whether to check file format or not when loading data
283 files</description>
284 </property>
285
286 <property>
287 <name>hive.map.aggr</name>
288 <value>true</value>
289 <description>Whether to use map-side aggregation in Hive Group By
290 queries</description>
291 </property>
292
293 <property>
294 <name>hive.groupby.skewindata</name>
295 <value>false</value>
296 <description>Whether there is skew in data to optimize group by
297 queries</description>
298 </property>
299
300 <property>
301 <name>hive.groupby.mapaggr.checkinterval</name>
302 <value>100000</value>
303 <description>Number of rows after which size of the grouping
304 keys/aggregation classes is performed</description>
305 </property>
306
307 <property>
308 <name>hive.mapred.local.mem</name>
309 <value>0</value>
310 <description>For local mode, memory of the mappers/reducers
311 </description>
312 </property>
313
314 <property>
315 <name>hive.map.aggr.hash.percentmemory</name>
316 <value>0.5</value>
317 <description>Portion of total memory to be used by map-side grup
318 aggregation hash table</description>
319 </property>
320
321 <property>
322 <name>hive.map.aggr.hash.min.reduction</name>
323 <value>0.5</value>
324 <description>Hash aggregation will be turned off if the ratio between
325 hash
326 table size and input rows is bigger than this number. Set to 1 to make
327 sure
328 hash aggregation is never turned off.</description>
329 </property>
330
331 <property>
332 <name>hive.optimize.cp</name>
333 <value>true</value>
334 <description>Whether to enable column pruner</description>
335 </property>
336
337 <property>
338 <name>hive.optimize.ppd</name>
339 <value>true</value>
340 <description>Whether to enable predicate pushdown</description>
341 </property>
342
343 <property>
344 <name>hive.optimize.pruner</name>
345 <value>true</value>
346 <description>Whether to enable the new partition pruner which depends
347 on predicate pushdown. If this is disabled,
348 the old partition pruner which is based on AST will be enabled.
349 </description>
350 </property>
351
352 <property>
353 <name>hive.optimize.groupby</name>
354 <value>true</value>
355 <description>Whether to enable the bucketed group by from bucketed
356 partitions/tables.</description>
357 </property>
358
359 <property>
360 <name>hive.join.emit.interval</name>
361 <value>1000</value>
362 <description>How many rows in the right-most join operand Hive should
363 buffer before emitting the join result. </description>
364 </property>
365
366 <property>
367 <name>hive.join.cache.size</name>
368 <value>25000</value>
369 <description>How many rows in the joining tables (except the streaming
370 table) should be cached in memory. </description>
371 </property>
372
373 <property>
374 <name>hive.mapjoin.bucket.cache.size</name>
375 <value>100</value>
376 <description>How many values in each keys in the map-joined table
377 should be cached in memory. </description>
378 </property>
379
380 <property>
381 <name>hive.mapjoin.maxsize</name>
382 <value>100000</value>
383 <description>Maximum # of rows of the small table that can be handled
384 by map-side join. If the size is reached and hive.task.progress is
385 set, a fatal error counter is set and the job will be killed.
386 </description>
387 </property>
388
389 <property>
390 <name>hive.mapjoin.cache.numrows</name>
391 <value>25000</value>
392 <description>How many rows should be cached by jdbm for map join.
393 </description>
394 </property>
395
396 <property>
397 <name>hive.optimize.skewjoin</name>
398 <value>false</value>
399 <description>Whether to enable skew join optimization. </description>
400 </property>
401
402 <property>
403 <name>hive.skewjoin.key</name>
404 <value>100000</value>
405 <description>Determine if we get a skew key in join. If we see more
406 than the specified number of rows with the same key in join operator,
407 we think the key as a skew join key. </description>
408 </property>
409
410 <property>
411 <name>hive.skewjoin.mapjoin.map.tasks</name>
412 <value>10000</value>
413 <description> Determine the number of map task used in the follow up
414 map join job
415 for a skew join. It should be used together with
416 hive.skewjoin.mapjoin.min.split
417 to perform a fine grained control.</description>
418 </property>
419
420 <property>
421 <name>hive.skewjoin.mapjoin.min.split</name>
422 <value>33554432</value>
423 <description> Determine the number of map task at most used in the
424 follow up map join job
425 for a skew join by specifying the minimum split size. It should be used
426 together with
427 hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.</description>
428 </property>
429
430 <property>
431 <name>hive.mapred.mode</name>
432 <value>nonstrict</value>
433 <description>The mode in which the hive operations are being
434 performed. In strict mode, some risky queries are not allowed to run
435 </description>
436 </property>
437
438 <property>
439 <name>hive.exec.script.maxerrsize</name>
440 <value>100000</value>
441 <description>Maximum number of bytes a script is allowed to emit to
442 standard error (per map-reduce task). This prevents runaway scripts
443 from filling logs partitions to capacity </description>
444 </property>
445
446 <property>
447 <name>hive.exec.script.allow.partial.consumption</name>
448 <value>false</value>
449 <description> When enabled, this option allows a user script to exit
450 successfully without consuming all the data from the standard input.
451 </description>
452 </property>
453
454 <property>
455 <name>hive.script.operator.id.env.var</name>
456 <value>HIVE_SCRIPT_OPERATOR_ID</value>
457 <description> Name of the environment variable that holds the unique
458 script operator ID in the user's transform function (the custom
459 mapper/reducer that the user has specified in the query)
460 </description>
461 </property>
462
463 <property>
464 <name>hive.exec.compress.output</name>
465 <value>false</value>
466 <description> This controls whether the final outputs of a query (to a
467 local/hdfs file or a hive table) is compressed. The compression codec
468 and other options are determined from hadoop config variables
469 mapred.output.compress* </description>
470 </property>
471
472 <property>
473 <name>hive.exec.compress.intermediate</name>
474 <value>false</value>
475 <description> This controls whether intermediate files produced by
476 hive between multiple map-reduce jobs are compressed. The compression
477 codec and other options are determined from hadoop config variables
478 mapred.output.compress* </description>
479 </property>
480
481 <property>
482 <name>hive.exec.parallel</name>
483 <value>false</value>
484 <description>Whether to execute jobs in parallel</description>
485 </property>
486
487 <property>
488 <name>hive.exec.parallel.thread.number</name>
489 <value>8</value>
490 <description>How many jobs at most can be executed in parallel
491 </description>
492 </property>
493
494 <property>
495 <name>hive.hwi.war.file</name>
496 <value>lib\hive-hwi-0.7.0.war</value>
497 <description>This sets the path to the HWI war file, relative to
498 ${HIVE_HOME}. </description>
499 </property>
500
501 <property>
502 <name>hive.hwi.listen.host</name>
503 <value>0.0.0.0</value>
504 <description>This is the host address the Hive Web Interface will
505 listen on</description>
506 </property>
507
508 <property>
509 <name>hive.hwi.listen.port</name>
510 <value>9999</value>
511 <description>This is the port the Hive Web Interface will listen on
512 </description>
513 </property>
514
515 <property>
516 <name>hive.exec.pre.hooks</name>
517 <value></value>
518 <description>Pre Execute Hook for Tests</description>
519 </property>
520
521 <property>
522 <name>hive.merge.mapfiles</name>
523 <value>true</value>
524 <description>Merge small files at the end of a map-only job
525 </description>
526 </property>
527
528 <property>
529 <name>hive.merge.mapredfiles</name>
530 <value>false</value>
531 <description>Merge small files at the end of a map-reduce job
532 </description>
533 </property>
534
535 <property>
536 <name>hive.heartbeat.interval</name>
537 <value>1000</value>
538 <description>Send a heartbeat after this interval - used by mapjoin
539 and filter operators</description>
540 </property>
541
542 <property>
543 <name>hive.merge.size.per.task</name>
544 <value>256000000</value>
545 <description>Size of merged files at the end of the job</description>
546 </property>
547
548 <property>
549 <name>hive.merge.size.smallfiles.avgsize</name>
550 <value>16000000</value>
551 <description>When the average output file size of a job is less than
552 this number, Hive will start an additional map-reduce job to merge
553 the output files into bigger files. This is only done for map-only
554 jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
555 hive.merge.mapredfiles is true.</description>
556 </property>
557
558 <property>
559 <name>hive.script.auto.progress</name>
560 <value>false</value>
561 <description>Whether Hive Tranform/Map/Reduce Clause should
562 automatically send progress information to TaskTracker to avoid the
563 task getting killed because of inactivity. Hive sends progress
564 information when the script is outputting to stderr. This option
565 removes the need of periodically producing stderr messages, but users
566 should be cautious because this may prevent infinite loops in the
567 scripts to be killed by TaskTracker. </description>
568 </property>
569
570 <property>
571 <name>hive.script.serde</name>
572 <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
573 <description>The default serde for trasmitting input data to and
574 reading output data from the user scripts. </description>
575 </property>
576
577 <property>
578 <name>hive.script.recordreader</name>
579 <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
580 <description>The default record reader for reading data from the user
581 scripts. </description>
582 </property>
583
584 <property>
585 <name>hive.script.recordwriter</name>
586 <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
587 <description>The default record writer for writing data to the user
588 scripts. </description>
589 </property>
590
591 <property>
592 <name>hive.input.format</name>
593 <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
594 <description>The default input format, if it is not specified, the
595 system assigns it. It is set to HiveInputFormat for hadoop versions
596 17, 18 and 19, whereas it is set to CombinedHiveInputFormat for
597 hadoop 20. The user can always overwrite it - if there is a bug in
598 CombinedHiveInputFormat, it can always be manually set to
599 HiveInputFormat. </description>
600 </property>
601
602 <property>
603 <name>hive.udtf.auto.progress</name>
604 <value>false</value>
605 <description>Whether Hive should automatically send progress
606 information to TaskTracker when using UDTF's to prevent the task
607 getting killed because of inactivity. Users should be cautious
608 because this may prevent TaskTracker from killing tasks with infinte
609 loops. </description>
610 </property>
611
612 <property>
613 <name>hive.mapred.reduce.tasks.speculative.execution</name>
614 <value>true</value>
615 <description>Whether speculative execution for reducers should be
616 turned on. </description>
617 </property>
618
619 <property>
620 <name>hive.exec.counters.pull.interval</name>
621 <value>1000</value>
622 <description>The interval with which to poll the JobTracker for the
623 counters the running job. The smaller it is the more load there will
624 be on the jobtracker, the higher it is the less granular the caught
625 will be.</description>
626 </property>
627
628 <property>
629 <name>hive.enforce.bucketing</name>
630 <value>false</value>
631 <description>Whether bucketing is enforced. If true, while inserting
632 into the table, bucketing is enforced. </description>
633 </property>
634
635 <property>
636 <name>hive.enforce.sorting</name>
637 <value>false</value>
638 <description>Whether sorting is enforced. If true, while inserting
639 into the table, sorting is enforced. </description>
640 </property>
641
642 <property>
643 <name>hive.metastore.ds.connection.url.hook</name>
644 <value></value>
645 <description>Name of the hook to use for retriving the JDO connection
646 URL. If empty, the value in javax.jdo.option.ConnectionURL is used
647 </description>
648 </property>
649
650 <property>
651 <name>hive.metastore.ds.retry.attempts</name>
652 <value>1</value>
653 <description>The number of times to retry a metastore call if there
654 were a connection error</description>
655 </property>
656
657 <property>
658 <name>hive.metastore.ds.retry.interval</name>
659 <value>1000</value>
660 <description>The number of miliseconds between metastore retry
661 attempts</description>
662 </property>
663
664 <property>
665 <name>hive.metastore.server.min.threads</name>
666 <value>200</value>
667 <description>Minimum number of worker threads in the Thrift server's
668 pool.</description>
669 </property>
670
671 <property>
672 <name>hive.metastore.server.max.threads</name>
673 <value>100000</value>
674 <description>Maximum number of worker threads in the Thrift server's
675 pool.</description>
676 </property>
677
678 <property>
679 <name>hive.metastore.server.tcp.keepalive</name>
680 <value>true</value>
681 <description>Whether to enable TCP keepalive for the metastore server.
682 Keepalive will prevent accumulation of half-open connections.
683 </description>
684 </property>
685
686 <property>
687 <name>hive.optimize.reducededuplication</name>
688 <value>true</value>
689 <description>Remove extra map-reduce jobs if the data is already
690 clustered by the same key which needs to be used again. This should
691 always be set to true. Since it is a new feature, it has been made
692 configurable.</description>
693 </property>
694
695 <property>
696 <name>hive.exec.dynamic.partition</name>
697 <value>false</value>
698 <description>Whether or not to allow dynamic partitions in DML/DDL.
699 </description>
700 </property>
701
702 <property>
703 <name>hive.exec.dynamic.partition.mode</name>
704 <value>strict</value>
705 <description>In strict mode, the user must specify at least one static
706 partition in case the user accidentally overwrites all partitions.
707 </description>
708 </property>
709
710 <property>
711 <name>hive.exec.max.dynamic.partitions</name>
712 <value>1000</value>
713 <description>Maximum number of dynamic partitions allowed to be
714 created in total.</description>
715 </property>
716
717 <property>
718 <name>hive.exec.max.dynamic.partitions.pernode</name>
719 <value>100</value>
720 <description>Maximum number of dynamic partitions allowed to be
721 created in each mapper/reducer node.</description>
722 </property>
723
724 <property>
725 <name>hive.default.partition.name</name>
726 <value>__HIVE_DEFAULT_PARTITION__</value>
727 <description>The default partition name in case the dynamic partition
728 column value is null/empty string or anyother values that cannot be
729 escaped. This value must not contain any special character used in
730 HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
731 dynamic partition value should not contain this value to avoid
732 confusions.</description>
733 </property>
734
735 <property>
736 <name>fs.har.impl</name>
737 <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
738 <description>The implementation for accessing Hadoop Archives. Note
739 that this won't be applicable to Hadoop vers less than 0.20
740 </description>
741 </property>
742
743 <property>
744 <name>hive.archive.enabled</name>
745 <value>false</value>
746 <description>Whether archiving operations are permitted</description>
747 </property>
748
749 <property>
750 <name>hive.archive.har.parentdir.settable</name>
751 <value>false</value>
752 <description>In new Hadoop versions, the parent directory must be set
753 while
754 creating a HAR. Because this functionality is hard to detect with just
755 version
756 numbers, this conf var needs to be set manually.</description>
757 </property>
758
759 <!-- HBase Storage Handler Parameters -->
760
761 <property>
762 <name>hive.hbase.wal.enabled</name>
763 <value>true</value>
764 <description>Whether writes to HBase should be forced to the
765 write-ahead log. Disabling this improves HBase write performance at
766 the risk of lost writes in case of a crash.</description>
767 </property>
768
769</configuration>