blob: b2b82ff91cf3b65725325edd71ea667d5936a75a [file] [log] [blame]
icetindil82838a22013-10-11 16:41:18 -07001#!/bin/bash
Ian Maxon928bbd12015-09-14 17:12:48 -07002# Licensed to the Apache Software Foundation (ASF) under one
3# or more contributor license agreements. See the NOTICE file
4# distributed with this work for additional information
5# regarding copyright ownership. The ASF licenses this file
6# to you under the Apache License, Version 2.0 (the
7# "License"); you may not use this file except in compliance
8# with the License. You may obtain a copy of the License at
icetindil82838a22013-10-11 16:41:18 -07009#
Ian Maxon928bbd12015-09-14 17:12:48 -070010# http://www.apache.org/licenses/LICENSE-2.0
icetindil82838a22013-10-11 16:41:18 -070011#
Ian Maxon928bbd12015-09-14 17:12:48 -070012# Unless required by applicable law or agreed to in writing,
13# software distributed under the License is distributed on an
14# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15# KIND, either express or implied. See the License for the
16# specific language governing permissions and limitations
17# under the License.
18
icetindil82838a22013-10-11 16:41:18 -070019
20DIR=`dirname $0`; if [ "${DIR:0:1}" == "." ]; then DIR=`pwd`"${DIR:1}"; fi
21source $DIR/conf.sh
22
23ARGS=1 # Required number of arguments
24E_BADARGS=85 # Wrong number of arguments passed to script.
25if [ $# -lt "$ARGS" ]
26then
27 echo "Usage: `basename $0` dataset"
28 echo "Example: `basename $0` dblp-small"
29 exit $E_BADARGS
30fi
31
32$SSJOIN/tokenizer $DATA/$1/raw-000/part-00000 $2
33mkdir $DATA/$1/$IN
34mv $DATA/$1/raw-000/part-00000.bin $DATA/$1/$IN/part-00000
35