Fixed issues with Replication tests
- Changed the way the log reader writes the newValue
- Fixed bug with creating recovery plan
- Updated ReplicationIT query files
Change-Id: I204a41311012dc348d4d41ae8bf6ec8554ab3dc3
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1334
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <hubailmor@gmail.com>
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/transactions/LogRecord.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/transactions/LogRecord.java
index 306b888..361ac7c 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/transactions/LogRecord.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/transactions/LogRecord.java
@@ -208,12 +208,7 @@
}
private void writeTuple(ByteBuffer buffer, ITupleReference tuple, int size) {
- if (logSource == LogSource.LOCAL) {
- SimpleTupleWriter.INSTANCE.writeTuple(tuple, buffer.array(), buffer.position());
- } else {
- //since the tuple is already serialized in remote logs, just copy it from beginning to end.
- System.arraycopy(tuple.getFieldData(0), 0, buffer.array(), buffer.position(), size);
- }
+ SimpleTupleWriter.INSTANCE.writeTuple(tuple, buffer.array(), buffer.position());
buffer.position(buffer.position() + size);
}
diff --git a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.10.cstate.aql b/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.10.get.http
similarity index 98%
copy from asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.10.cstate.aql
copy to asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.10.get.http
index bd01d99..2bd5e45 100644
--- a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.10.cstate.aql
+++ b/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.10.get.http
@@ -26,4 +26,5 @@
query cluster state, query data.
* Expected Result : Success
* Date : February 3 2016
- */
\ No newline at end of file
+ */
+/admin/cluster
diff --git a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.5.cstate.aql b/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.5.cstate.aql
deleted file mode 100644
index bd01d99..0000000
--- a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.5.cstate.aql
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*
- * Test case Name : node_failback.aql
- * Description : Make sure node failback completes as expected.
- The test goes as follows:
- start 2 nodes, bulkload a dataset, copy it to in-memory dataset,
- kill one node and wait until the failover complete, query cluster state,
- query data, insert new data, start the killed node and wait for failback,
- query cluster state, query data.
- * Expected Result : Success
- * Date : February 3 2016
- */
\ No newline at end of file
diff --git a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.10.cstate.aql b/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.5.get.http
similarity index 98%
rename from asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.10.cstate.aql
rename to asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.5.get.http
index bd01d99..9d19b69 100644
--- a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.10.cstate.aql
+++ b/asterixdb/asterix-installer/src/test/resources/integrationts/replication/queries/failback/node_failback/node_failback.5.get.http
@@ -26,4 +26,5 @@
query cluster state, query data.
* Expected Result : Success
* Date : February 3 2016
- */
\ No newline at end of file
+ */
+admin/cluster
diff --git a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/results/failback/node_failback/node_failback.cluster_state.10.adm b/asterixdb/asterix-installer/src/test/resources/integrationts/replication/results/failback/node_failback/node_failback.cluster_state.10.adm
index 61322c9..2c3c039 100644
--- a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/results/failback/node_failback/node_failback.cluster_state.10.adm
+++ b/asterixdb/asterix-installer/src/test/resources/integrationts/replication/results/failback/node_failback/node_failback.cluster_state.10.adm
@@ -1 +1,131 @@
-{"State":"ACTIVE","Metadata_Node":"asterix_nc1","partition_0":"asterix_nc1","partition_1":"asterix_nc1","partition_2":"asterix_nc2","partition_3":"asterix_nc2"}
\ No newline at end of file
+{
+ "cc": {
+ "configUri": "http://10.10.0.2:19002/admin/cluster/cc/config",
+ "statsUri": "http://10.10.0.2:19002/admin/cluster/cc/stats",
+ "threadDumpUri": "http://10.10.0.2:19002/admin/cluster/cc/threaddump"
+ },
+ "config": {
+ "api.port": 19002,
+ "cluster.partitions": {
+ "0": "ID:0, Original Node: asterix_nc1, IODevice: 0, Active Node: asterix_nc1",
+ "1": "ID:1, Original Node: asterix_nc1, IODevice: 1, Active Node: asterix_nc1",
+ "2": "ID:2, Original Node: asterix_nc2, IODevice: 0, Active Node: asterix_nc2",
+ "3": "ID:3, Original Node: asterix_nc2, IODevice: 1, Active Node: asterix_nc2"
+ },
+ "compiler.framesize": 131072,
+ "compiler.groupmemory": 33554432,
+ "compiler.joinmemory": 33554432,
+ "compiler.pregelix.home": "~/pregelix",
+ "compiler.sortmemory": 33554432,
+ "core.dump.paths": {
+ "asterix_nc1": "/home/vagrant/asterix/logs//asterix_nc1",
+ "asterix_nc2": "/home/vagrant/asterix/logs//asterix_nc2"
+ },
+ "feed.central.manager.port": 4500,
+ "feed.max.threshold.period": 5,
+ "feed.memory.available.wait.timeout": 10,
+ "feed.memory.global.budget": 67108864,
+ "feed.pending.work.threshold": 50,
+ "feed.port": 19003,
+ "instance.name": "asterix",
+ "log.level": "INFO",
+ "max.wait.active.cluster": 60,
+ "metadata.callback.port": 0,
+ "metadata.node": "asterix_nc1",
+ "metadata.partition": "ID:0, Original Node: asterix_nc1, IODevice: 0, Active Node: asterix_nc1",
+ "metadata.port": 0,
+ "metadata.registration.timeout.secs": 60,
+ "node.partitions": {
+ "asterix_nc1": [
+ "ID:0, Original Node: asterix_nc1, IODevice: 0, Active Node: asterix_nc1",
+ "ID:1, Original Node: asterix_nc1, IODevice: 1, Active Node: asterix_nc1"
+ ],
+ "asterix_nc2": [
+ "ID:2, Original Node: asterix_nc2, IODevice: 0, Active Node: asterix_nc2",
+ "ID:3, Original Node: asterix_nc2, IODevice: 1, Active Node: asterix_nc2"
+ ]
+ },
+ "node.stores": {
+ "asterix_nc1": [
+ "/home/vagrant/asterix/p1/storage",
+ "/home/vagrant/asterix/p2/storage"
+ ],
+ "asterix_nc2": [
+ "/home/vagrant/asterix/p1/storage",
+ "/home/vagrant/asterix/p2/storage"
+ ]
+ },
+ "plot.activate": false,
+ "storage.buffercache.maxopenfiles": 214748364,
+ "storage.buffercache.pagesize": 131072,
+ "storage.buffercache.size": 536870912,
+ "storage.lsm.bloomfilter.falsepositiverate": 0.01,
+ "storage.memorycomponent.globalbudget": 1073741824,
+ "storage.memorycomponent.numcomponents": 2,
+ "storage.memorycomponent.numpages": 256,
+ "storage.memorycomponent.pagesize": 131072,
+ "storage.metadata.memorycomponent.numpages": 64,
+ "transaction.log.dirs": {
+ "asterix_nc1": "/home/vagrant/asterix/tx_logs",
+ "asterix_nc2": "/home/vagrant/asterix/tx_logs"
+ },
+ "txn.commitprofiler.reportinterval": 5,
+ "txn.job.recovery.memorysize": 67108864,
+ "txn.lock.escalationthreshold": 1000,
+ "txn.lock.shrinktimer": 5000,
+ "txn.lock.timeout.sweepthreshold": 10000,
+ "txn.lock.timeout.waitthreshold": 60000,
+ "txn.log.buffer.numpages": 8,
+ "txn.log.buffer.pagesize": 524288,
+ "txn.log.checkpoint.history": 0,
+ "txn.log.checkpoint.lsnthreshold": 67108864,
+ "txn.log.checkpoint.pollfrequency": 120,
+ "txn.log.partitionsize": 2147483648,
+ "web.port": 19001,
+ "web.queryinterface.port": 19006,
+ "web.secondary.port": 19005
+ },
+ "diagnosticsUri": "http://10.10.0.2:19002/admin/diagnostics",
+ "fullShutdownUri": "http://10.10.0.2:19002/admin/shutdown?all=true",
+ "metadata_node": "asterix_nc1",
+ "ncs": [
+ {
+ "configUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc1/config",
+ "node_id": "asterix_nc1",
+ "partitions": [
+ {
+ "active": true,
+ "partition_id": "partition_0"
+ },
+ {
+ "active": true,
+ "partition_id": "partition_1"
+ }
+ ],
+ "state": "ACTIVE",
+ "statsUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc1/stats",
+ "threadDumpUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc1/threaddump"
+ },
+ {
+ "configUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc2/config",
+ "node_id": "asterix_nc2",
+ "partitions": [
+ {
+ "active": true,
+ "partition_id": "partition_2"
+ },
+ {
+ "active": true,
+ "partition_id": "partition_3"
+ }
+ ],
+ "state": "ACTIVE",
+ "statsUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc2/stats",
+ "threadDumpUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc2/threaddump"
+ }
+ ],
+ "replicationUri": "http://10.10.0.2:19002/admin/cluster/replication",
+ "shutdownUri": "http://10.10.0.2:19002/admin/shutdown",
+ "state": "ACTIVE",
+ "versionUri": "http://10.10.0.2:19002/admin/version"
+}
diff --git a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/results/failback/node_failback/node_failback.cluster_state.5.adm b/asterixdb/asterix-installer/src/test/resources/integrationts/replication/results/failback/node_failback/node_failback.cluster_state.5.adm
index 587a97a..076c05b 100644
--- a/asterixdb/asterix-installer/src/test/resources/integrationts/replication/results/failback/node_failback/node_failback.cluster_state.5.adm
+++ b/asterixdb/asterix-installer/src/test/resources/integrationts/replication/results/failback/node_failback/node_failback.cluster_state.5.adm
@@ -1 +1,131 @@
-{"State":"ACTIVE","Metadata_Node":"asterix_nc2","partition_0":"asterix_nc2","partition_1":"asterix_nc2","partition_2":"asterix_nc2","partition_3":"asterix_nc2"}
\ No newline at end of file
+{
+ "cc": {
+ "configUri": "http://10.10.0.2:19002/admin/cluster/cc/config",
+ "statsUri": "http://10.10.0.2:19002/admin/cluster/cc/stats",
+ "threadDumpUri": "http://10.10.0.2:19002/admin/cluster/cc/threaddump"
+ },
+ "config": {
+ "api.port": 19002,
+ "cluster.partitions": {
+ "0": "ID:0, Original Node: asterix_nc1, IODevice: 0, Active Node: asterix_nc2",
+ "1": "ID:1, Original Node: asterix_nc1, IODevice: 1, Active Node: asterix_nc2",
+ "2": "ID:2, Original Node: asterix_nc2, IODevice: 0, Active Node: asterix_nc2",
+ "3": "ID:3, Original Node: asterix_nc2, IODevice: 1, Active Node: asterix_nc2"
+ },
+ "compiler.framesize": 131072,
+ "compiler.groupmemory": 33554432,
+ "compiler.joinmemory": 33554432,
+ "compiler.pregelix.home": "~/pregelix",
+ "compiler.sortmemory": 33554432,
+ "core.dump.paths": {
+ "asterix_nc1": "/home/vagrant/asterix/logs//asterix_nc1",
+ "asterix_nc2": "/home/vagrant/asterix/logs//asterix_nc2"
+ },
+ "feed.central.manager.port": 4500,
+ "feed.max.threshold.period": 5,
+ "feed.memory.available.wait.timeout": 10,
+ "feed.memory.global.budget": 67108864,
+ "feed.pending.work.threshold": 50,
+ "feed.port": 19003,
+ "instance.name": "asterix",
+ "log.level": "INFO",
+ "max.wait.active.cluster": 60,
+ "metadata.callback.port": 0,
+ "metadata.node": "asterix_nc1",
+ "metadata.partition": "ID:0, Original Node: asterix_nc1, IODevice: 0, Active Node: asterix_nc2",
+ "metadata.port": 0,
+ "metadata.registration.timeout.secs": 60,
+ "node.partitions": {
+ "asterix_nc1": [
+ "ID:0, Original Node: asterix_nc1, IODevice: 0, Active Node: asterix_nc2",
+ "ID:1, Original Node: asterix_nc1, IODevice: 1, Active Node: asterix_nc2"
+ ],
+ "asterix_nc2": [
+ "ID:2, Original Node: asterix_nc2, IODevice: 0, Active Node: asterix_nc2",
+ "ID:3, Original Node: asterix_nc2, IODevice: 1, Active Node: asterix_nc2"
+ ]
+ },
+ "node.stores": {
+ "asterix_nc1": [
+ "/home/vagrant/asterix/p1/storage",
+ "/home/vagrant/asterix/p2/storage"
+ ],
+ "asterix_nc2": [
+ "/home/vagrant/asterix/p1/storage",
+ "/home/vagrant/asterix/p2/storage"
+ ]
+ },
+ "plot.activate": false,
+ "storage.buffercache.maxopenfiles": 214748364,
+ "storage.buffercache.pagesize": 131072,
+ "storage.buffercache.size": 536870912,
+ "storage.lsm.bloomfilter.falsepositiverate": 0.01,
+ "storage.memorycomponent.globalbudget": 1073741824,
+ "storage.memorycomponent.numcomponents": 2,
+ "storage.memorycomponent.numpages": 256,
+ "storage.memorycomponent.pagesize": 131072,
+ "storage.metadata.memorycomponent.numpages": 64,
+ "transaction.log.dirs": {
+ "asterix_nc1": "/home/vagrant/asterix/tx_logs",
+ "asterix_nc2": "/home/vagrant/asterix/tx_logs"
+ },
+ "txn.commitprofiler.reportinterval": 5,
+ "txn.job.recovery.memorysize": 67108864,
+ "txn.lock.escalationthreshold": 1000,
+ "txn.lock.shrinktimer": 5000,
+ "txn.lock.timeout.sweepthreshold": 10000,
+ "txn.lock.timeout.waitthreshold": 60000,
+ "txn.log.buffer.numpages": 8,
+ "txn.log.buffer.pagesize": 524288,
+ "txn.log.checkpoint.history": 0,
+ "txn.log.checkpoint.lsnthreshold": 67108864,
+ "txn.log.checkpoint.pollfrequency": 120,
+ "txn.log.partitionsize": 2147483648,
+ "web.port": 19001,
+ "web.queryinterface.port": 19006,
+ "web.secondary.port": 19005
+ },
+ "diagnosticsUri": "http://10.10.0.2:19002/admin/diagnostics",
+ "fullShutdownUri": "http://10.10.0.2:19002/admin/shutdown?all=true",
+ "metadata_node": "asterix_nc2",
+ "ncs": [
+ {
+ "configUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc1/config",
+ "node_id": "asterix_nc1",
+ "partitions": [
+ {
+ "active": true,
+ "partition_id": "partition_0"
+ },
+ {
+ "active": true,
+ "partition_id": "partition_1"
+ }
+ ],
+ "state": "FAILED",
+ "statsUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc1/stats",
+ "threadDumpUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc1/threaddump"
+ },
+ {
+ "configUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc2/config",
+ "node_id": "asterix_nc2",
+ "partitions": [
+ {
+ "active": true,
+ "partition_id": "partition_2"
+ },
+ {
+ "active": true,
+ "partition_id": "partition_3"
+ }
+ ],
+ "state": "ACTIVE",
+ "statsUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc2/stats",
+ "threadDumpUri": "http://10.10.0.2:19002/admin/cluster/node/asterix_nc2/threaddump"
+ }
+ ],
+ "replicationUri": "http://10.10.0.2:19002/admin/cluster/replication",
+ "shutdownUri": "http://10.10.0.2:19002/admin/shutdown",
+ "state": "ACTIVE",
+ "versionUri": "http://10.10.0.2:19002/admin/version"
+}
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/util/ClusterStateManager.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/util/ClusterStateManager.java
index 942abe3..376c82a 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/util/ClusterStateManager.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/util/ClusterStateManager.java
@@ -319,9 +319,9 @@
for (String replica : partitionReplicas) {
//TODO (mhubail) currently this assigns the partition to the first found active replica.
//It needs to be modified to consider load balancing.
- addActiveReplica(replica, partition, partitionRecoveryPlan);
- // bug? will always break on first loop execution
- break;
+ if (addActiveReplica(replica, partition, partitionRecoveryPlan)) {
+ break;
+ }
}
}
@@ -356,7 +356,7 @@
}
}
- private void addActiveReplica(String replica, ClusterPartition partition,
+ private boolean addActiveReplica(String replica, ClusterPartition partition,
Map<String, List<Integer>> partitionRecoveryPlan) {
if (activeNcConfiguration.containsKey(replica) && !failedNodes.contains(replica)) {
if (!partitionRecoveryPlan.containsKey(replica)) {
@@ -366,7 +366,9 @@
} else {
partitionRecoveryPlan.get(replica).add(partition.getPartitionId());
}
+ return true;
}
+ return false;
}
private synchronized List<ClusterPartition> getNodeAssignedPartitions(String nodeId) {
@@ -660,4 +662,4 @@
}
return stateDescription;
}
-}
\ No newline at end of file
+}