blob: a55ee79e69ac606997bf013cf9f5b727e217a483 [file] [log] [blame]
icetindil82838a22013-10-11 16:41:18 -07001#!/bin/bash
Ian Maxon928bbd12015-09-14 17:12:48 -07002# Licensed to the Apache Software Foundation (ASF) under one
3# or more contributor license agreements. See the NOTICE file
4# distributed with this work for additional information
5# regarding copyright ownership. The ASF licenses this file
6# to you under the Apache License, Version 2.0 (the
7# "License"); you may not use this file except in compliance
8# with the License. You may obtain a copy of the License at
icetindil82838a22013-10-11 16:41:18 -07009#
Ian Maxon928bbd12015-09-14 17:12:48 -070010# http://www.apache.org/licenses/LICENSE-2.0
icetindil82838a22013-10-11 16:41:18 -070011#
Ian Maxon928bbd12015-09-14 17:12:48 -070012# Unless required by applicable law or agreed to in writing,
13# software distributed under the License is distributed on an
14# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15# KIND, either express or implied. See the License for the
16# specific language governing permissions and limitations
17# under the License.
18
icetindil82838a22013-10-11 16:41:18 -070019
20DIR=`dirname $0`; if [ "${DIR:0:1}" == "." ]; then DIR=`pwd`"${DIR:1}"; fi
21source $DIR/conf.sh
22
23ARGS=1 # Required number of arguments
24E_BADARGS=85 # Wrong number of arguments passed to script.
25if [ $# -lt "$ARGS" ]
26then
27 echo "Usage: `basename $0` dataset"
28 echo "Example: `basename $0` dblp-small"
29 exit $E_BADARGS
30fi
31
32THR="0.80"
33if [ "$1" == "dblp-small" ]; then
34 THR="0.50"
35fi
36
37
38mkdir $DATA/$1.expected/$OUT
39$SSJOIN/ppjoinplus j $THR $DATA/$1/$IN/part-00000 | \
40 sed 's/0\.812/0\.813/' | \
41 sort > $DATA/$1.expected/$OUT/expected.txt
42
43mkdir $DATA/$1/$OUT
44java \
45 -Xmx2g \
46 -jar $DIR/../../../target/fuzzyjoin-core-0.0.1.jar \
47 $THR $DATA/$1/$IN/part-00000 | \
48 sort > $DATA/$1/$OUT/part-00000
49
50diff $DATA/$1.expected/$OUT/expected.txt $DATA/$1/$OUT/part-00000