blob: a0d481ee2269ee9ee647b633d515a8dadb9d44bf [file] [log] [blame]
Ian Maxonbf2c56b2017-01-24 14:14:49 -08001<!DOCTYPE html>
2<!--
3 | Generated by Apache Maven Doxia at 2017-01-24
4 | Rendered using Apache Maven Fluido Skin 1.3.0
5-->
6<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7 <head>
8 <meta charset="UTF-8" />
9 <meta name="viewport" content="width=device-width, initial-scale=1.0" />
10 <meta name="Date-Revision-yyyymmdd" content="20170124" />
11 <meta http-equiv="Content-Language" content="en" />
12 <title>AsterixDB &#x2013; <a id="toc">Table of Contents</a></title>
13 <link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
14 <link rel="stylesheet" href="./css/site.css" />
15 <link rel="stylesheet" href="./css/print.css" media="print" />
16
17
18 <script type="text/javascript" src="./js/apache-maven-fluido-1.3.0.min.js"></script>
19
20
21
22<script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
23 (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
24 m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
25 })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
26
27 ga('create', 'UA-41536543-1', 'uci.edu');
28 ga('send', 'pageview');</script>
29
30 </head>
31 <body class="topBarDisabled">
32
33
34
35
36 <div class="container-fluid">
37 <div id="banner">
38 <div class="pull-left">
39 <a href="./" id="bannerLeft">
40 <img src="images/asterixlogo.png" alt="AsterixDB"/>
41 </a>
42 </div>
43 <div class="pull-right"> </div>
44 <div class="clear"><hr/></div>
45 </div>
46
47 <div id="breadcrumbs">
48 <ul class="breadcrumb">
49
50
51 <li id="publishDate">Last Published: 2017-01-24</li>
52
53
54
55 <li id="projectVersion" class="pull-right">Version: 0.9.0</li>
56
57 <li class="divider pull-right">|</li>
58
59 <li class="pull-right"> <a href="index.html" title="Documentation Home">
60 Documentation Home</a>
61 </li>
62
63 </ul>
64 </div>
65
66
67 <div class="row-fluid">
68 <div id="leftColumn" class="span3">
69 <div class="well sidebar-nav">
70
71
72 <ul class="nav nav-list">
73 <li class="nav-header">Get Started - Installation</li>
74
75 <li class="active">
76
77 <a href="#"><i class="none"></i>Option 1: using NCService</a>
78 </li>
79
80 <li>
81
82 <a href="install.html" title="Option 2: using Managix">
83 <i class="none"></i>
84 Option 2: using Managix</a>
85 </li>
86
87 <li>
88
89 <a href="yarn.html" title="Option 3: using YARN">
90 <i class="none"></i>
91 Option 3: using YARN</a>
92 </li>
93 <li class="nav-header">AsterixDB Primer</li>
94
95 <li>
96
97 <a href="sqlpp/primer-sqlpp.html" title="Option 1: using SQL++">
98 <i class="none"></i>
99 Option 1: using SQL++</a>
100 </li>
101
102 <li>
103
104 <a href="aql/primer.html" title="Option 2: using AQL">
105 <i class="none"></i>
106 Option 2: using AQL</a>
107 </li>
108 <li class="nav-header">Data Model</li>
109
110 <li>
111
112 <a href="datamodel.html" title="The Asterix Data Model">
113 <i class="none"></i>
114 The Asterix Data Model</a>
115 </li>
116 <li class="nav-header">Queries - SQL++</li>
117
118 <li>
119
120 <a href="sqlpp/manual.html" title="The SQL++ Query Language">
121 <i class="none"></i>
122 The SQL++ Query Language</a>
123 </li>
124
125 <li>
126
127 <a href="sqlpp/builtins.html" title="Builtin Functions">
128 <i class="none"></i>
129 Builtin Functions</a>
130 </li>
131 <li class="nav-header">Queries - AQL</li>
132
133 <li>
134
135 <a href="aql/manual.html" title="The Asterix Query Language (AQL)">
136 <i class="none"></i>
137 The Asterix Query Language (AQL)</a>
138 </li>
139
140 <li>
141
142 <a href="aql/builtins.html" title="Builtin Functions">
143 <i class="none"></i>
144 Builtin Functions</a>
145 </li>
146 <li class="nav-header">Advanced Features</li>
147
148 <li>
149
150 <a href="aql/similarity.html" title="Support of Similarity Queries">
151 <i class="none"></i>
152 Support of Similarity Queries</a>
153 </li>
154
155 <li>
156
157 <a href="aql/fulltext.html" title="Support of Full-text Queries">
158 <i class="none"></i>
159 Support of Full-text Queries</a>
160 </li>
161
162 <li>
163
164 <a href="aql/externaldata.html" title="Accessing External Data">
165 <i class="none"></i>
166 Accessing External Data</a>
167 </li>
168
169 <li>
170
171 <a href="feeds/tutorial.html" title="Support for Data Ingestion">
172 <i class="none"></i>
173 Support for Data Ingestion</a>
174 </li>
175
176 <li>
177
178 <a href="udf.html" title="User Defined Functions">
179 <i class="none"></i>
180 User Defined Functions</a>
181 </li>
182
183 <li>
184
185 <a href="aql/filters.html" title="Filter-Based LSM Index Acceleration">
186 <i class="none"></i>
187 Filter-Based LSM Index Acceleration</a>
188 </li>
189 <li class="nav-header">API/SDK</li>
190
191 <li>
192
193 <a href="api.html" title="HTTP API">
194 <i class="none"></i>
195 HTTP API</a>
196 </li>
197 </ul>
198
199
200
201 <hr class="divider" />
202
203 <div id="poweredBy">
204 <div class="clear"></div>
205 <div class="clear"></div>
206 <div class="clear"></div>
207 <a href="./" title="AsterixDB" class="builtBy">
208 <img class="builtBy" alt="AsterixDB" src="images/asterixlogo.png" />
209 </a>
210 </div>
211 </div>
212 </div>
213
214
215 <div id="bodyColumn" class="span9" >
216
217 <!-- ! Licensed to the Apache Software Foundation (ASF) under one
218 ! or more contributor license agreements. See the NOTICE file
219 ! distributed with this work for additional information
220 ! regarding copyright ownership. The ASF licenses this file
221 ! to you under the Apache License, Version 2.0 (the
222 ! "License"); you may not use this file except in compliance
223 ! with the License. You may obtain a copy of the License at
224 !
225 ! http://www.apache.org/licenses/LICENSE-2.0
226 !
227 ! Unless required by applicable law or agreed to in writing,
228 ! software distributed under the License is distributed on an
229 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
230 ! KIND, either express or implied. See the License for the
231 ! specific language governing permissions and limitations
232 ! under the License.
233 ! --><div class="section">
234<h2><a name="Table_of_Contents"></a><a name="toc" id="toc">Table of Contents</a></h2>
235
236<ul>
237
238<li><a href="#Small_cluster">Starting a small cluster using the NCService</a></li>
239
240<li><a href="#Parameters">Parameter setting</a></li>
241</ul>
242<h1><a name="Small_cluster" id="Small_cluster">Starting a small cluster using the NCService</a></h1>
243<p>When running a cluster using the <tt>NCService</tt> there are 3 different kind of processes involved:</p>
244
245<ol style="list-style-type: decimal">
246
247<li><tt>NCDriver</tt> does the work of a NodeController</li>
248
249<li><tt>NCService</tt> configures and starts an <tt>NCDriver</tt></li>
250
251<li><tt>CCDriver</tt> does the work of a ClusterController and sends the configuration to the <tt>NCServices</tt></li>
252</ol>
253<p>To start a small cluster consisting of 2 NodeControllers (<tt>red</tt> and <tt>blue</tt>) and 1 ClusterController (<tt>cc</tt>) on a single machine only 2 configuration files are required. The first one is</p>
254<p><tt>blue.conf</tt>:</p>
255
256<div class="source">
257<div class="source">
258<pre>[ncservice]
259port=9091
260</pre></div></div>
261<p>It is a configuration file for the second <tt>NCService</tt>. This contains only the port that the <tt>NCService</tt> of the second NodeControllers listens to as it is non-standard. The first <tt>NCService</tt> does not need a configuration file, as it only uses default parameters. In a distributed environment with 1 NodeController per machine, no <tt>NCService</tt> needs a configuration file.</p>
262<p>The second configuration file is</p>
263<p><tt>cc.conf</tt>:</p>
264
265<div class="source">
266<div class="source">
267<pre>[nc/red]
268txnlogdir=/tmp/asterix/red/txnlog
269coredumpdir=/tmp/asterix/red/coredump
270iodevices=/tmp/asterix/red
271
272[nc/blue]
273port=9091
274txnlogdir=/tmp/asterix/blue/txnlog
275coredumpdir=/tmp/asterix/blue/coredump
276iodevices=/tmp/asterix/blue
277
278[nc]
279app.class=org.apache.asterix.hyracks.bootstrap.NCApplicationEntryPoint
280storagedir=storage
281address=127.0.0.1
282command=asterixnc
283
284[cc]
285cluster.address = 127.0.0.1
286http.port = 12345
287</pre></div></div>
288<p>This is the configuration file for the cluster and it contains information that each <tt>NCService</tt> will use when starting the corresponding <tt>NCDriver</tt> as well as information for the <tt>CCDriver</tt>.</p>
289<p>To start the cluster simply use the following steps</p>
290
291<ol style="list-style-type: decimal">
292
293<li>
294<p>Set BASEDIR to location of an unzipped asterix-server binary assembly (in the source tree that&#x2019;s at <tt>asterixdb/asterix-server/target</tt>).</p>
295
296<div class="source">
297<div class="source">
298<pre>$ export BASEDIR=[..]/asterix-server-0.8.9-SNAPSHOT-binary-assembly
299</pre></div></div></li>
300
301<li>
302<p>Start the 2 <tt>NCServices</tt> for <tt>red</tt> and <tt>blue</tt>.</p>
303
304<div class="source">
305<div class="source">
306<pre>$ $BASEDIR/bin/asterixncservice -config-file blue.conf &gt; blue-service.log 2&gt;&amp;1 &amp;
307$ $BASEDIR/bin/asterixncservice &gt;red-service.log 2&gt;&amp;1 &amp;
308</pre></div></div></li>
309
310<li>
311<p>Start the <tt>CCDriver</tt>.</p>
312
313<div class="source">
314<div class="source">
315<pre>$ $BASEDIR/bin/asterixcc -config-file cc.conf &gt; cc.log 2&gt;&amp;1 &amp;
316</pre></div></div></li>
317</ol>
318<p>The <tt>CCDriver</tt> will connect to the <tt>NCServices</tt> and thus initiate the configuration and the start of the <tt>NCDrivers</tt>. After running these scripts, <tt>jps</tt> should show a result similar to this:</p>
319
320<div class="source">
321<div class="source">
322<pre>$ jps
32313184 NCService
32413200 NCDriver
32513185 NCService
32613186 CCDriver
32713533 Jps
32813198 NCDriver
329</pre></div></div>
330<p>The logs for the <tt>NCDrivers</tt> will be in <tt>$BASEDIR/logs</tt>.</p>
331<p>To stop the cluster again simply run</p>
332
333<div class="source">
334<div class="source">
335<pre>$ kill `jps | egrep '(CDriver|NCService)' | awk '{print $1}'`
336</pre></div></div>
337<p>to kill all processes.</p>
338<h1><a name="Parameters" id="Parameters">Parameter settings</a></h1>
339<p>The following parameters are for the master process, under the &#x201c;[cc]&#x201d; section.</p>
340
341<table border="0" class="table table-striped">
342 <thead>
343
344<tr class="a">
345
346<th>Parameter </th>
347
348<th>Meaning </th>
349
350<th>Default </th>
351 </tr>
352 </thead>
353 <tbody>
354
355<tr class="b">
356
357<td>instance.name </td>
358
359<td>The name of the AsterixDB instance </td>
360
361<td>&#x201c;DEFAULT_INSTANCE&#x201d; </td>
362 </tr>
363
364<tr class="a">
365
366<td>max.wait.active.cluster </td>
367
368<td>The max pending time (in seconds) for cluster startup. After the threshold, if the cluster still is not up and running, it is considered unavailable. </td>
369
370<td>60 </td>
371 </tr>
372
373<tr class="b">
374
375<td>metadata.callback.port </td>
376
377<td>The port for metadata communication </td>
378
379<td>0 </td>
380 </tr>
381
382<tr class="a">
383
384<td>cluster.address </td>
385
386<td>The binding IP address for the AsterixDB instance </td>
387
388<td>N/A </td>
389 </tr>
390 </tbody>
391</table>
392<p>The following parameters for slave processes, under &#x201c;[nc]&#x201d; sections.</p>
393
394<table border="0" class="table table-striped">
395 <thead>
396
397<tr class="a">
398
399<th>Parameter </th>
400
401<th>Meaning </th>
402
403<th>Default </th>
404 </tr>
405 </thead>
406 <tbody>
407
408<tr class="b">
409
410<td>address </td>
411
412<td>The binding IP address for the slave process </td>
413
414<td>N/A </td>
415 </tr>
416
417<tr class="a">
418
419<td>command </td>
420
421<td>The command for the slave process </td>
422
423<td>N/A (for AsterixDB, it should be &#x201c;asterixnc&#x201d;) </td>
424 </tr>
425
426<tr class="b">
427
428<td>coredumpdir </td>
429
430<td>The path for core dump </td>
431
432<td>N/A </td>
433 </tr>
434
435<tr class="a">
436
437<td>iodevices </td>
438
439<td>Comma separated directory paths for both storage files and temporary files </td>
440
441<td>N/A </td>
442 </tr>
443
444<tr class="b">
445
446<td>jvm.args </td>
447
448<td>The JVM arguments </td>
449
450<td>-Xmx1536m </td>
451 </tr>
452
453<tr class="a">
454
455<td>metadata.port </td>
456
457<td>The metadata communication port on the metadata node. This parameter should only be present in the section of the metadata NC </td>
458
459<td>0 </td>
460 </tr>
461
462<tr class="b">
463
464<td>metadata.registration.timeout.secs </td>
465
466<td>The time out threshold (in seconds) for metadata node registration </td>
467
468<td>60 </td>
469 </tr>
470
471<tr class="a">
472
473<td>port </td>
474
475<td>The port for the NCService that starts the slave process </td>
476
477<td>N/A </td>
478 </tr>
479
480<tr class="b">
481
482<td>storagedir </td>
483
484<td>The directory for storage files </td>
485
486<td>N/A </td>
487 </tr>
488
489<tr class="a">
490
491<td>storage.buffercache.maxopenfiles </td>
492
493<td>The maximum number of open files for the buffer cache. Note that this is the parameter for the AsterixDB and setting the operating system parameter is still required. </td>
494
495<td>2147483647 </td>
496 </tr>
497
498<tr class="b">
499
500<td>storage.buffercache.pagesize </td>
501
502<td>The page size (in bytes) for the disk buffer cache (for reads) </td>
503
504<td>131072 </td>
505 </tr>
506
507<tr class="a">
508
509<td>storage.buffercache.size </td>
510
511<td>The overall budget (in bytes) of the disk buffer cache (for reads) </td>
512
513<td>536870912 </td>
514 </tr>
515
516<tr class="b">
517
518<td>storage.lsm.bloomfilter.falsepositiverate </td>
519
520<td>The false positive rate for the bloom filter for each memory/disk components </td>
521
522<td>0.01 </td>
523 </tr>
524
525<tr class="a">
526
527<td>storage.memorycomponent.globalbudget </td>
528
529<td>The global budget (in bytes) for all memory components of all datasets and indexes (for writes) </td>
530
531<td>536870912 </td>
532 </tr>
533
534<tr class="b">
535
536<td>storage.memorycomponent.numcomponents </td>
537
538<td>The number of memory components per data partition per index </td>
539
540<td>2 </td>
541 </tr>
542
543<tr class="a">
544
545<td>storage.memorycomponent.numpages </td>
546
547<td>The number of pages for all memory components of a dataset, including those for secondary indexes </td>
548
549<td>256 </td>
550 </tr>
551
552<tr class="b">
553
554<td>storage.memorycomponent.pagesize </td>
555
556<td>The page size (in bytes) of memory components </td>
557
558<td>131072 </td>
559 </tr>
560
561<tr class="a">
562
563<td>storage.metadata.memorycomponent.numpages </td>
564
565<td>The number of pages for all memory components of a metadata dataset </td>
566
567<td>256 </td>
568 </tr>
569
570<tr class="b">
571
572<td>txnlogdir </td>
573
574<td>The directory for transaction logs </td>
575
576<td>N/A </td>
577 </tr>
578
579<tr class="a">
580
581<td>txn.commitprofiler.reportinterval </td>
582
583<td>The interval for reporting commit statistics </td>
584
585<td>5 </td>
586 </tr>
587
588<tr class="b">
589
590<td>txn.job.recovery.memorysize </td>
591
592<td>The memory budget (in bytes) used for recovery </td>
593
594<td>67108864 </td>
595 </tr>
596
597<tr class="a">
598
599<td>txn.lock.timeout.sweepthreshold </td>
600
601<td>Interval (in milliseconds) for checking lock timeout </td>
602
603<td>10000 </td>
604 </tr>
605
606<tr class="b">
607
608<td>txn.lock.timeout.waitthreshold </td>
609
610<td>Time out (in milliseconds) of waiting for a lock </td>
611
612<td>60000 </td>
613 </tr>
614
615<tr class="a">
616
617<td>txn.log.buffer.numpages </td>
618
619<td>The number of pages in the transaction log tail </td>
620
621<td>8 </td>
622 </tr>
623
624<tr class="b">
625
626<td>txn.log.buffer.pagesize </td>
627
628<td>The page size (in bytes) for transaction log buffer. </td>
629
630<td>131072 </td>
631 </tr>
632
633<tr class="a">
634
635<td>txn.log.checkpoint.history </td>
636
637<td>The number of checkpoints to keep in the transaction log </td>
638
639<td>0 </td>
640 </tr>
641
642<tr class="b">
643
644<td>txn.log.checkpoint.lsnthreshold </td>
645
646<td>The checkpoint threshold (in terms of LSNs (log sequence numbers) that have been written to the transaction log, i.e., the length of the transaction log) for transection logs </td>
647
648<td>67108864 </td>
649 </tr>
650 </tbody>
651</table>
652<p>The following parameter is for both master and slave processes, under the &#x201c;[app]&#x201d; section.</p>
653
654<table border="0" class="table table-striped">
655 <thead>
656
657<tr class="a">
658
659<th>Parameter </th>
660
661<th>Meaning </th>
662
663<th>Default </th>
664 </tr>
665 </thead>
666 <tbody>
667
668<tr class="b">
669
670<td>log.level </td>
671
672<td>The logging level for master and slave processes </td>
673
674<td>&#x201c;INFO&#x201d; </td>
675 </tr>
676
677<tr class="a">
678
679<td>compiler.framesize </td>
680
681<td>The page size (in bytes) for computation </td>
682
683<td>32768 </td>
684 </tr>
685
686<tr class="b">
687
688<td>compiler.groupmemory </td>
689
690<td>The memory budget (in bytes) for a group by operator instance in a partition </td>
691
692<td>33554432 </td>
693 </tr>
694
695<tr class="a">
696
697<td>compiler.joinmemory </td>
698
699<td>The memory budget (in bytes) for a join operator instance in a partition </td>
700
701<td>33554432 </td>
702 </tr>
703
704<tr class="b">
705
706<td>compiler.sortmemory </td>
707
708<td>The memory budget (in bytes) for a sort operator instance in a partition </td>
709
710<td>33554432 </td>
711 </tr>
712
713<tr class="a">
714
715<td>compiler.parallelism </td>
716
717<td>The degree of parallelism for query execution. Zero means to use the storage parallelism as the query execution parallelism, while other integer values dictate the number of query execution parallel partitions. The system will fall back to use the number of all available CPU cores in the cluster as the degree of parallelism if the number set by a user is too large or too small. </td>
718
719<td>0 </td>
720 </tr>
721 </tbody>
722</table></div>
723 </div>
724 </div>
725 </div>
726
727 <hr/>
728
729 <footer>
730 <div class="container-fluid">
731 <div class="row span12">Copyright &copy; 2017
732 <a href="https://www.apache.org/">The Apache Software Foundation</a>.
733 All Rights Reserved.
734
735 </div>
736
737 <?xml version="1.0" encoding="UTF-8"?>
738<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
739 feather logo, and the Apache AsterixDB project logo are either
740 registered trademarks or trademarks of The Apache Software
741 Foundation in the United States and other countries.
742 All other marks mentioned may be trademarks or registered
743 trademarks of their respective owners.</div>
744
745
746 </div>
747 </footer>
748 </body>
749</html>