/*
 * Decompiled with CFR 0.152.
 */
package org.apache.pig.test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Iterator;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.test.MiniCluster;
import org.apache.pig.test.Util;
import org.apache.pig.test.utils.TestHelper;
import org.junit.After;
import org.junit.Before;

public class TestSkewedJoin
extends TestCase {
    private static final String INPUT_FILE1 = "SkewedJoinInput1.txt";
    private static final String INPUT_FILE2 = "SkewedJoinInput2.txt";
    private static final String INPUT_FILE3 = "SkewedJoinInput3.txt";
    private static final String INPUT_FILE4 = "SkewedJoinInput4.txt";
    private static final String INPUT_FILE5 = "SkewedJoinInput5.txt";
    private static final String INPUT_FILE6 = "SkewedJoinInput6.txt";
    private static final String INPUT_FILE7 = "SkewedJoinInput7.txt";
    private PigServer pigServer;
    private MiniCluster cluster = MiniCluster.buildCluster();

    public TestSkewedJoin() throws ExecException, IOException {
        this.pigServer = new PigServer(ExecType.MAPREDUCE, this.cluster.getProperties());
        this.pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.maxtuple", "5");
        this.pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.memusage", "0.01");
    }

    @Before
    public void setUp() throws Exception {
        this.createFiles();
    }

    private void createFiles() throws IOException {
        int i;
        PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE1));
        int k = 0;
        for (int j = 0; j < 120; ++j) {
            w.println("100\tapple1\taaa" + k);
            w.println("200\torange1\tbbb" + ++k);
            w.println("300\tstrawberry\tccc" + ++k);
            ++k;
        }
        w.close();
        PrintWriter w2 = new PrintWriter(new FileWriter(INPUT_FILE2));
        w2.println("100\tapple1");
        w2.println("100\tapple2");
        w2.println("100\tapple2");
        w2.println("200\torange1");
        w2.println("200\torange2");
        w2.println("300\tstrawberry");
        w2.println("400\tpear");
        w2.close();
        PrintWriter w3 = new PrintWriter(new FileWriter(INPUT_FILE3));
        w3.println("100\tapple1");
        w3.println("100\tapple2");
        w3.println("200\torange1");
        w3.println("200\torange2");
        w3.println("300\tstrawberry");
        w3.println("300\tstrawberry2");
        w3.println("400\tpear");
        w3.close();
        PrintWriter w4 = new PrintWriter(new FileWriter(INPUT_FILE4));
        for (int i2 = 0; i2 < 100; ++i2) {
            w4.println("[a100#apple1,a100#apple2,a200#orange1,a200#orange2,a300#strawberry,a300#strawberry2,a400#pear]");
        }
        w4.close();
        PrintWriter w5 = new PrintWriter(new FileWriter(INPUT_FILE5));
        for (i = 0; i < 10; ++i) {
            w5.println("\tapple1");
        }
        w5.println("100\tapple2");
        for (i = 0; i < 10; ++i) {
            w5.println("\torange1");
        }
        w5.println("\t");
        w5.println("100\t");
        w5.close();
        PrintWriter w6 = new PrintWriter(new FileWriter(INPUT_FILE6));
        for (int i3 = 0; i3 < 300; ++i3) {
            for (int j = 0; j < 5; ++j) {
                w6.println("" + i3 + "\t" + j);
            }
        }
        w6.close();
        PrintWriter w7 = new PrintWriter(new FileWriter(INPUT_FILE7));
        for (int i4 = 0; i4 < 300; i4 += 3) {
            for (int j = 0; j < 2; ++j) {
                w7.println("" + i4 + "\t" + j);
            }
        }
        w7.close();
        Util.copyFromLocalToCluster(this.cluster, INPUT_FILE1, INPUT_FILE1);
        Util.copyFromLocalToCluster(this.cluster, INPUT_FILE2, INPUT_FILE2);
        Util.copyFromLocalToCluster(this.cluster, INPUT_FILE3, INPUT_FILE3);
        Util.copyFromLocalToCluster(this.cluster, INPUT_FILE4, INPUT_FILE4);
        Util.copyFromLocalToCluster(this.cluster, INPUT_FILE5, INPUT_FILE5);
        Util.copyFromLocalToCluster(this.cluster, INPUT_FILE6, INPUT_FILE6);
        Util.copyFromLocalToCluster(this.cluster, INPUT_FILE7, INPUT_FILE7);
    }

    @After
    public void tearDown() throws Exception {
        new File(INPUT_FILE1).delete();
        new File(INPUT_FILE2).delete();
        new File(INPUT_FILE3).delete();
        new File(INPUT_FILE4).delete();
        new File(INPUT_FILE5).delete();
        new File(INPUT_FILE6).delete();
        new File(INPUT_FILE7).delete();
        Util.deleteDirectory(new File("skewedjoin"));
        Util.deleteFile(this.cluster, INPUT_FILE1);
        Util.deleteFile(this.cluster, INPUT_FILE2);
        Util.deleteFile(this.cluster, INPUT_FILE3);
        Util.deleteFile(this.cluster, INPUT_FILE4);
        Util.deleteFile(this.cluster, INPUT_FILE5);
        Util.deleteFile(this.cluster, INPUT_FILE6);
        Util.deleteFile(this.cluster, INPUT_FILE7);
    }

    public void testSkewedJoinWithGroup() throws IOException {
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput1.txt' as (id, name, n);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput2.txt' as (id, name);");
        this.pigServer.registerQuery("C = GROUP A by id;");
        this.pigServer.registerQuery("D = GROUP B by id;");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
        DataBag dbshj = BagFactory.getInstance().newDefaultBag();
        this.pigServer.registerQuery("E = join C by group, D by group using \"skewed\" parallel 5;");
        Iterator iter = this.pigServer.openIterator("E");
        while (iter.hasNext()) {
            dbfrj.add((Tuple)iter.next());
        }
        this.pigServer.registerQuery("E = join C by group, D by group;");
        iter = this.pigServer.openIterator("E");
        while (iter.hasNext()) {
            dbshj.add((Tuple)iter.next());
        }
        Assert.assertTrue((dbfrj.size() > 0L && dbshj.size() > 0L ? 1 : 0) != 0);
        Assert.assertEquals((boolean)true, (boolean)TestHelper.compareBags(dbfrj, dbshj));
    }

    public void testSkewedJoinWithNoProperties() throws IOException {
        this.pigServer = new PigServer(ExecType.MAPREDUCE, this.cluster.getProperties());
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput1.txt' as (id, name, n);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput2.txt' as (id, name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            DataBag dbshj = BagFactory.getInstance().newDefaultBag();
            this.pigServer.registerQuery("C = join A by (id, name), B by (id, name) using \"skewed\" parallel 5;");
            Iterator iter = this.pigServer.openIterator("C");
            while (iter.hasNext()) {
                dbfrj.add((Tuple)iter.next());
            }
            this.pigServer.registerQuery("E = join A by(id, name), B by (id, name);");
            iter = this.pigServer.openIterator("E");
            while (iter.hasNext()) {
                dbshj.add((Tuple)iter.next());
            }
            Assert.assertTrue((dbfrj.size() > 0L && dbshj.size() > 0L ? 1 : 0) != 0);
            Assert.assertEquals((boolean)true, (boolean)TestHelper.compareBags(dbfrj, dbshj));
        }
        catch (Exception e) {
            TestSkewedJoin.fail((String)e.getMessage());
        }
    }

    public void testSkewedJoinReducers() throws IOException {
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput1.txt' as (id, name, n);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput2.txt' as (id, name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            this.pigServer.registerQuery("C = join A by id, B by id using \"skewed\" parallel 1;");
            Iterator iter = this.pigServer.openIterator("C");
            while (iter.hasNext()) {
                dbfrj.add((Tuple)iter.next());
            }
        }
        catch (Exception e) {
            TestSkewedJoin.fail((String)"Should not throw exception, should continue execution");
        }
    }

    public void testSkewedJoin3Way() throws IOException {
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput1.txt' as (id, name, n);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput2.txt' as (id, name);");
        this.pigServer.registerQuery("C = LOAD 'SkewedJoinInput3.txt' as (id, name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            this.pigServer.registerQuery("D = join A by id, B by id, C by id using \"skewed\" parallel 5;");
            Iterator iter = this.pigServer.openIterator("D");
            while (iter.hasNext()) {
                dbfrj.add((Tuple)iter.next());
            }
        }
        catch (Exception e) {
            return;
        }
        TestSkewedJoin.fail((String)"Should throw exception, do not support 3 way join");
    }

    public void testSkewedJoinMapKey() throws IOException {
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput4.txt' as (m:[]);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput4.txt' as (n:[]);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            this.pigServer.registerQuery("C = join A by (chararray)m#'a100', B by (chararray)n#'a100' using \"skewed\" parallel 20;");
            Iterator iter = this.pigServer.openIterator("C");
            while (iter.hasNext()) {
                dbfrj.add((Tuple)iter.next());
            }
        }
        catch (Exception e) {
            System.out.println(e.getMessage());
            e.printStackTrace();
            TestSkewedJoin.fail((String)"Should support maps and expression operators as keys");
        }
    }

    public void testSkewedJoinKeyPartition() throws IOException {
        try {
            Util.deleteFile(this.cluster, "skewedjoin");
        }
        catch (Exception e) {
            // empty catch block
        }
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput1.txt' as (id, name, n);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput2.txt' as (id, name);");
        this.pigServer.registerQuery("E = join A by id, B by id using \"skewed\" parallel 7;");
        this.pigServer.store("E", "skewedjoin");
        int[][] lineCount = new int[3][7];
        new File("skewedjoin").mkdir();
        for (int i = 0; i < 7; ++i) {
            Util.copyFromClusterToLocal(this.cluster, "skewedjoin/part-r-0000" + i, "skewedjoin/part-r-0000" + i);
            BufferedReader reader = new BufferedReader(new FileReader("skewedjoin/part-r-0000" + i));
            String line = null;
            while ((line = reader.readLine()) != null) {
                String[] cols = line.split("\t");
                int key = Integer.parseInt(cols[0]) / 100 - 1;
                int[] nArray = lineCount[key];
                int n = i;
                nArray[n] = nArray[n] + 1;
            }
        }
        int fc = 0;
        for (int i = 0; i < 3; ++i) {
            for (int j = 0; j < 7; ++j) {
                if (lineCount[i][j] <= 0) continue;
                ++fc;
            }
        }
        TestSkewedJoin.assertTrue((fc > 3 ? 1 : 0) != 0);
    }

    public void testSkewedJoinNullKeys() throws IOException {
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput5.txt' as (id,name);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput5.txt' as (id,name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            this.pigServer.registerQuery("C = join A by id, B by id using \"skewed\";");
            Iterator iter = this.pigServer.openIterator("C");
            while (iter.hasNext()) {
                dbfrj.add((Tuple)iter.next());
            }
        }
        catch (Exception e) {
            System.out.println(e.getMessage());
            e.printStackTrace();
            TestSkewedJoin.fail((String)"Should support null keys in skewed join");
        }
    }

    public void testSkewedJoinOuter() throws IOException {
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput5.txt' as (id,name);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput5.txt' as (id,name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            this.pigServer.registerQuery("C = join A by id left, B by id using \"skewed\";");
            Iterator iter = this.pigServer.openIterator("C");
            while (iter.hasNext()) {
                dbfrj.add((Tuple)iter.next());
            }
            this.pigServer.registerQuery("C = join A by id right, B by id using \"skewed\";");
            iter = this.pigServer.openIterator("C");
            while (iter.hasNext()) {
                dbfrj.add((Tuple)iter.next());
            }
            this.pigServer.registerQuery("C = join A by id full, B by id using \"skewed\";");
            iter = this.pigServer.openIterator("C");
            while (iter.hasNext()) {
                dbfrj.add((Tuple)iter.next());
            }
        }
        catch (Exception e) {
            System.out.println(e.getMessage());
            e.printStackTrace();
            TestSkewedJoin.fail((String)"Should support outer join in skewed join");
        }
    }

    public void testSkewedJoinOneValue() throws IOException {
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput3.txt' as (id,name);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput3.txt' as (id,name);");
        this.pigServer.registerQuery("C = FILTER A by id == 400;");
        this.pigServer.registerQuery("D = FILTER B by id == 400;");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
        DataBag dbrj = BagFactory.getInstance().newDefaultBag();
        this.pigServer.registerQuery("E = join C by id, D by id using \"skewed\";");
        Iterator iter = this.pigServer.openIterator("E");
        while (iter.hasNext()) {
            dbfrj.add((Tuple)iter.next());
        }
        this.pigServer.registerQuery("E = join C by id, D by id;");
        iter = this.pigServer.openIterator("E");
        while (iter.hasNext()) {
            dbrj.add((Tuple)iter.next());
        }
        Assert.assertEquals((long)dbfrj.size(), (long)dbrj.size());
        Assert.assertEquals((boolean)true, (boolean)TestHelper.compareBags(dbfrj, dbrj));
    }

    public void testSkewedJoinManyReducers() throws IOException {
        this.pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.maxtuple", "2");
        this.pigServer.registerQuery("A = LOAD 'SkewedJoinInput6.txt' as (id,name);");
        this.pigServer.registerQuery("B = LOAD 'SkewedJoinInput7.txt' as (id,name);");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
        DataBag dbrj = BagFactory.getInstance().newDefaultBag();
        this.pigServer.registerQuery("E = join A by id, B by id using \"skewed\" parallel 300;");
        Iterator iter = this.pigServer.openIterator("E");
        while (iter.hasNext()) {
            dbfrj.add((Tuple)iter.next());
        }
        this.pigServer.registerQuery("E = join A by id, B by id;");
        iter = this.pigServer.openIterator("E");
        while (iter.hasNext()) {
            dbrj.add((Tuple)iter.next());
        }
        Assert.assertEquals((long)dbfrj.size(), (long)dbrj.size());
        Assert.assertEquals((boolean)true, (boolean)TestHelper.compareBags(dbfrj, dbrj));
    }

    public void testSkewedJoinEmptyInput() throws IOException {
        String LEFT_INPUT_FILE = "left.dat";
        String RIGHT_INPUT_FILE = "right.dat";
        PrintWriter w = new PrintWriter(new FileWriter(LEFT_INPUT_FILE));
        w.println("1");
        w.println("2");
        w.println("3");
        w.println("5");
        w.close();
        Util.copyFromLocalToCluster(this.cluster, LEFT_INPUT_FILE, LEFT_INPUT_FILE);
        PrintWriter w2 = new PrintWriter(new FileWriter(RIGHT_INPUT_FILE));
        w2.println("1\tone");
        w2.println("2\ttwo");
        w2.println("3\tthree");
        w2.close();
        Util.copyFromLocalToCluster(this.cluster, RIGHT_INPUT_FILE, RIGHT_INPUT_FILE);
        this.pigServer.registerQuery("a = load 'left.dat' as (nums:chararray);");
        this.pigServer.registerQuery("b = load 'right.dat' as (number:chararray,text:chararray);");
        this.pigServer.registerQuery("c = filter a by nums == '7';");
        this.pigServer.registerQuery("d = join c by nums LEFT OUTER, b by number USING 'skewed';");
        Iterator iter = this.pigServer.openIterator("d");
        Assert.assertFalse((boolean)iter.hasNext());
        new File(LEFT_INPUT_FILE).delete();
        Util.deleteFile(this.cluster, LEFT_INPUT_FILE);
        new File(RIGHT_INPUT_FILE).delete();
        Util.deleteFile(this.cluster, RIGHT_INPUT_FILE);
    }

    public void testRecursiveFileListing() throws IOException {
        String LOCAL_INPUT_FILE = "test.dat";
        String INPUT_FILE = "foo/bar/test.dat";
        PrintWriter w = new PrintWriter(new FileWriter(LOCAL_INPUT_FILE));
        w.println("1");
        w.println("2");
        w.println("3");
        w.println("5");
        w.close();
        Util.copyFromLocalToCluster(this.cluster, LOCAL_INPUT_FILE, INPUT_FILE);
        this.pigServer.registerQuery("a = load 'foo' as (nums:chararray);");
        this.pigServer.registerQuery("b = load 'foo' as (nums:chararray);");
        this.pigServer.registerQuery("d = join a by nums, b by nums USING 'skewed';");
        Iterator iter = this.pigServer.openIterator("d");
        int count = 0;
        while (iter.hasNext()) {
            iter.next();
            ++count;
        }
        Assert.assertEquals((int)4, (int)count);
        new File(LOCAL_INPUT_FILE).delete();
        Util.deleteFile(this.cluster, INPUT_FILE);
    }
}

