1. add tests for rack-aware test; 2. add svn ignore files

git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_asterix_stabilization@3156 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/RackAwareNcCollectionBuilder.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/RackAwareNcCollectionBuilder.java
new file mode 100644
index 0000000..5371c84
--- /dev/null
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/RackAwareNcCollectionBuilder.java
@@ -0,0 +1,167 @@
+package edu.uci.ics.hyracks.hdfs.scheduler;
+
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.InputSplit;
+
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.topology.ClusterTopology;
+import edu.uci.ics.hyracks.hdfs.api.INcCollection;
+import edu.uci.ics.hyracks.hdfs.api.INcCollectionBuilder;
+
+@SuppressWarnings("deprecation")
+public class RackAwareNcCollectionBuilder implements INcCollectionBuilder {
+    private ClusterTopology topology;
+
+    public RackAwareNcCollectionBuilder(ClusterTopology topology) {
+        this.topology = topology;
+    }
+
+    @Override
+    public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos,
+            final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs,
+            final int[] workloads, final int slotLimit) {
+        try {
+            final Map<List<Integer>, String> pathToNCs = new HashMap<List<Integer>, String>();
+            for (int i = 0; i < NCs.length; i++) {
+                List<Integer> path = new ArrayList<Integer>();
+                String ipAddress = InetAddress.getByAddress(
+                        ncNameToNcInfos.get(NCs[i]).getNetworkAddress().getIpAddress()).getHostAddress();
+                topology.lookupNetworkTerminal(ipAddress, path);
+                pathToNCs.put(path, NCs[i]);
+            }
+
+            final TreeMap<List<Integer>, IntWritable> availableIpsToSlots = new TreeMap<List<Integer>, IntWritable>(
+                    new Comparator<List<Integer>>() {
+
+                        @Override
+                        public int compare(List<Integer> l1, List<Integer> l2) {
+                            int commonLength = Math.min(l1.size(), l2.size());
+                            for (int i = 0; i < commonLength; i++) {
+                                Integer value1 = l1.get(i);
+                                Integer value2 = l2.get(i);
+                                int cmp = value1 > value2 ? 1 : (value1 < value2 ? -1 : 0);
+                                if (cmp != 0) {
+                                    return cmp;
+                                }
+                            }
+                            return l1.size() > l2.size() ? 1 : (l1.size() < l2.size() ? -1 : 0);
+                        }
+
+                    });
+            for (int i = 0; i < workloads.length; i++) {
+                if (workloads[i] < slotLimit) {
+                    List<Integer> path = new ArrayList<Integer>();
+                    String ipAddress = InetAddress.getByAddress(
+                            ncNameToNcInfos.get(NCs[i]).getNetworkAddress().getIpAddress()).getHostAddress();
+                    topology.lookupNetworkTerminal(ipAddress, path);
+                    IntWritable availableSlot = availableIpsToSlots.get(path);
+                    if (availableSlot == null) {
+                        availableSlot = new IntWritable(slotLimit - workloads[i]);
+                        availableIpsToSlots.put(path, availableSlot);
+                    } else {
+                        availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
+                    }
+                }
+            }
+            return new INcCollection() {
+
+                @Override
+                public String findNearestAvailableSlot(InputSplit split) {
+                    try {
+                        String[] locs = split.getLocations();
+                        int minDistance = Integer.MAX_VALUE;
+                        List<Integer> currentCandidatePath = null;
+                        if (locs == null || locs.length > 0) {
+                            for (int j = 0; j < locs.length; j++) {
+                                /**
+                                 * get all the IP addresses from the name
+                                 */
+                                InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
+                                for (InetAddress ip : allIps) {
+                                    List<Integer> splitPath = new ArrayList<Integer>();
+                                    boolean inCluster = topology.lookupNetworkTerminal(ip.getHostAddress(), splitPath);
+                                    if (!inCluster) {
+                                        continue;
+                                    }
+                                    /**
+                                     * if the node controller exists
+                                     */
+                                    List<Integer> candidatePath = availableIpsToSlots.floorKey(splitPath);
+                                    if (candidatePath == null) {
+                                        candidatePath = availableIpsToSlots.ceilingKey(splitPath);
+                                    }
+                                    if (candidatePath != null) {
+                                        if (availableIpsToSlots.get(candidatePath).get() > 0) {
+                                            int distance = distance(splitPath, candidatePath);
+                                            if (minDistance > distance) {
+                                                minDistance = distance;
+                                                currentCandidatePath = candidatePath;
+                                            }
+                                        }
+
+                                    }
+                                }
+                            }
+                        } else {
+                            for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
+                                if (entry.getValue().get() > 0) {
+                                    currentCandidatePath = entry.getKey();
+                                    break;
+                                }
+                            }
+                        }
+
+                        if (currentCandidatePath.size() > 0) {
+                            /**
+                             * Update the entry of the selected IP
+                             */
+                            IntWritable availableSlot = availableIpsToSlots.get(currentCandidatePath);
+                            availableSlot.set(availableSlot.get() - 1);
+                            if (availableSlot.get() == 0) {
+                                availableIpsToSlots.remove(currentCandidatePath);
+                            }
+                            /**
+                             * Update the entry of the selected NC
+                             */
+                            String candidateNc = pathToNCs.get(currentCandidatePath);
+                            int ncIndex = ncNameToIndex.get(candidateNc);
+                            if (workloads[ncIndex] < slotLimit) {
+                                return candidateNc;
+                            }
+                        }
+                        /** not scheduled */
+                        return null;
+                    } catch (Exception e) {
+                        throw new IllegalStateException(e);
+                    }
+                }
+
+                @Override
+                public int numAvailableSlots() {
+                    return availableIpsToSlots.size();
+                }
+
+                private int distance(List<Integer> splitPath, List<Integer> candidatePath) {
+                    int commonLength = Math.min(splitPath.size(), candidatePath.size());
+                    int distance = 0;
+                    for (int i = 0; i < commonLength; i++) {
+                        distance = distance * 10 + Math.abs(splitPath.get(i) - candidatePath.get(i));
+                    }
+                    return distance;
+                }
+            };
+        } catch (Exception e) {
+            throw new IllegalStateException(e);
+        }
+    }
+
+}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java
index a6320e7..6d31855 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java
@@ -64,8 +64,6 @@
      * the nc collection builder
      */
     private INcCollectionBuilder ncCollectionBuilder;
-    
-    private ClusterTopology topology;
 
     /**
      * The constructor of the scheduler.
@@ -74,7 +72,16 @@
      * @throws HyracksException
      */
     public Scheduler(String ipAddress, int port) throws HyracksException {
-        this(ipAddress, port, new IPProximityNcCollectionBuilder());
+        try {
+            IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
+            this.ncNameToNcInfos = hcc.getNodeControllerInfos();
+            ClusterTopology topology = hcc.getClusterTopology();
+            this.ncCollectionBuilder = topology == null ? new IPProximityNcCollectionBuilder()
+                    : new RackAwareNcCollectionBuilder(topology);
+            loadIPAddressToNCMap(ncNameToNcInfos);
+        } catch (Exception e) {
+            throw new HyracksException(e);
+        }
     }
 
     /**
@@ -87,7 +94,6 @@
         try {
             IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
             this.ncNameToNcInfos = hcc.getNodeControllerInfos();
-            this.topology = hcc.getClusterTopology();
             this.ncCollectionBuilder = ncCollectionBuilder;
             loadIPAddressToNCMap(ncNameToNcInfos);
         } catch (Exception e) {
@@ -100,21 +106,6 @@
      * 
      * @param ncNameToNcInfos
      *            the mapping from nc names to nc infos
-     * @param topology
-     *            the hyracks cluster toplogy
-     * @throws HyracksException
-     */
-    public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos, INcCollectionBuilder ncCollectionBuilder,
-            ClusterTopology topology) throws HyracksException {
-        this(ncNameToNcInfos, ncCollectionBuilder);
-        this.topology = topology;
-    }
-
-    /**
-     * The constructor of the scheduler.
-     * 
-     * @param ncNameToNcInfos
-     *            the mapping from nc names to nc infos
      * @throws HyracksException
      */
     public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
@@ -128,6 +119,21 @@
      * 
      * @param ncNameToNcInfos
      *            the mapping from nc names to nc infos
+     * @param topology
+     *            the hyracks cluster toplogy
+     * @throws HyracksException
+     */
+    public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos, ClusterTopology topology) throws HyracksException {
+        this(ncNameToNcInfos);
+        this.ncCollectionBuilder = topology == null ? new IPProximityNcCollectionBuilder()
+                : new RackAwareNcCollectionBuilder(topology);
+    }
+
+    /**
+     * The constructor of the scheduler.
+     * 
+     * @param ncNameToNcInfos
+     *            the mapping from nc names to nc infos
      * @throws HyracksException
      */
     public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos, INcCollectionBuilder ncCollectionBuilder)
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java
index f392293..90967a0 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java
@@ -15,6 +15,8 @@
 
 package edu.uci.ics.hyracks.hdfs.scheduler;
 
+import java.io.FileReader;
+import java.io.IOException;
 import java.net.InetAddress;
 import java.util.HashMap;
 import java.util.Map;
@@ -25,13 +27,28 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputSplit;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
 
 import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
 import edu.uci.ics.hyracks.api.client.NodeStatus;
 import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+import edu.uci.ics.hyracks.api.topology.ClusterTopology;
+import edu.uci.ics.hyracks.api.topology.TopologyDefinitionParser;
 
 @SuppressWarnings("deprecation")
 public class SchedulerTest extends TestCase {
+    private static String TOPOLOGY_PATH = "src/test/resources/topology.xml";
+
+    private ClusterTopology parseTopology() throws IOException, SAXException {
+        FileReader fr = new FileReader(TOPOLOGY_PATH);
+        InputSource in = new InputSource(fr);
+        try {
+            return TopologyDefinitionParser.parse(in);
+        } finally {
+            fr.close();
+        }
+    }
 
     /**
      * Test the scheduler for the case when the Hyracks cluster is the HDFS cluster
@@ -67,11 +84,17 @@
         fileSplits[4] = new FileSplit(new Path("part-5"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
         fileSplits[5] = new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
 
-        Scheduler scheduler = new Scheduler(ncNameToNcInfos);
-        String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
-
         String[] expectedResults = new String[] { "nc1", "nc4", "nc6", "nc2", "nc3", "nc5" };
 
+        Scheduler scheduler = new Scheduler(ncNameToNcInfos);
+        String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
+        for (int i = 0; i < locationConstraints.length; i++) {
+            Assert.assertEquals(locationConstraints[i], expectedResults[i]);
+        }
+
+        ClusterTopology topology = parseTopology();
+        scheduler = new Scheduler(ncNameToNcInfos, topology);
+        locationConstraints = scheduler.getLocationConstraints(fileSplits);
         for (int i = 0; i < locationConstraints.length; i++) {
             Assert.assertEquals(locationConstraints[i], expectedResults[i]);
         }
@@ -125,6 +148,15 @@
         for (int i = 0; i < locationConstraints.length; i++) {
             Assert.assertEquals(locationConstraints[i], expectedResults[i]);
         }
+
+        expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc7", "nc12", "nc7",
+                "nc12" };
+        ClusterTopology topology = parseTopology();
+        scheduler = new Scheduler(ncNameToNcInfos, topology);
+        locationConstraints = scheduler.getLocationConstraints(fileSplits);
+        for (int i = 0; i < locationConstraints.length; i++) {
+            Assert.assertEquals(locationConstraints[i], expectedResults[i]);
+        }
     }
 
     /**
@@ -167,12 +199,19 @@
         fileSplits[10] = new FileSplit(new Path("part-11"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
         fileSplits[11] = new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
 
-        Scheduler scheduler = new Scheduler(ncNameToNcInfos);
-        String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
-
         String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc5", "nc6",
                 "nc5", "nc6" };
 
+        Scheduler scheduler = new Scheduler(ncNameToNcInfos);
+        String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
+
+        for (int i = 0; i < locationConstraints.length; i++) {
+            Assert.assertEquals(locationConstraints[i], expectedResults[i]);
+        }
+
+        ClusterTopology topology = parseTopology();
+        scheduler = new Scheduler(ncNameToNcInfos, topology);
+        locationConstraints = scheduler.getLocationConstraints(fileSplits);
         for (int i = 0; i < locationConstraints.length; i++) {
             Assert.assertEquals(locationConstraints[i], expectedResults[i]);
         }
@@ -219,15 +258,23 @@
         fileSplits[11] = new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
         fileSplits[12] = new FileSplit(new Path("part-13"), 0, 0, new String[] { "10.0.0.2", "10.0.0.4", "10.0.0.5" });
 
-        Scheduler scheduler = new Scheduler(ncNameToNcInfos);
-        String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
-
         String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc5", "nc1",
                 "nc5", "nc2", "nc4" };
 
+        Scheduler scheduler = new Scheduler(ncNameToNcInfos);
+        String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
+
         for (int i = 0; i < locationConstraints.length; i++) {
             Assert.assertEquals(locationConstraints[i], expectedResults[i]);
         }
+
+        ClusterTopology topology = parseTopology();
+        scheduler = new Scheduler(ncNameToNcInfos, topology);
+        locationConstraints = scheduler.getLocationConstraints(fileSplits);
+        for (int i = 0; i < locationConstraints.length; i++) {
+            Assert.assertEquals(locationConstraints[i], expectedResults[i]);
+        }
+
     }
 
 }