org.apache.pig.PigServer - java examples

Here are the examples of the java api org.apache.pig.PigServer taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

155 Examples 7

16 View Complete Implementation : TestBlackAndWhitelistValidator.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testExplain() throws Exception {
    try {
        ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "explain");
        PigServer pigServer = new PigServer(ctx);
        Data data = resetData(pigServer);
        data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d"));
        StringBuilder script = new StringBuilder();
        script.append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);").append("B = order A by f1,f2,f3 DESC;").append("EXPLAIN B;").append("STORE B INTO 'bar' USING mock.Storage();");
        pigServer.registerScript(IOUtils.toInputStream(script));
        fail();
    } catch (Exception e) {
        Util.replacedertExceptionAndMessage(FrontendException.clreplaced, e, "EXPLAIN command is not permitted. ");
    }
}

16 View Complete Implementation : TestProject.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testMissingCols2() throws Exception {
    String inputFileName = "TestProject-testMissingCols2-input.txt";
    String[] input = { "1\t(hello,world)", "2\t(good,bye)" };
    Util.createLocalInputFile(inputFileName, input);
    // in the script, PigStorage will return a null for the tuple field
    // since it does not comply with the schema
    String query = "a = load '" + inputFileName + "' as (i:int, " + "t:tuple(s1:chararray, s2:chararray, s3:chararray));" + "b = foreach a generate t.(s2,s3);";
    PigServer ps = new PigServer(ExecType.LOCAL);
    Util.registerMultiLineQuery(ps, query);
    Iterator<Tuple> it = ps.openIterator("b");
    Tuple[] expectedResults = new Tuple[] { (Tuple) Util.getPigConstant("((null, null))"), (Tuple) Util.getPigConstant("((null, null))") };
    int i = 0;
    while (it.hasNext()) {
        replacedertEquals(expectedResults[i++], it.next());
    }
}

16 View Complete Implementation : TestHiveColumnarStorage.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testShouldStoreBagAsHiveArray() throws IOException, InterruptedException, SerDeException {
    String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
    String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";
    String singleParreplacedionedFile = simpleDataFile.getAbsolutePath();
    File outputFile = new File("testhiveColumnarStore");
    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("a = LOAD '" + Util.encodeEscape(singleParreplacedionedFile) + "' using " + loadString + ";");
    server.registerQuery("b = FOREACH a GENERATE f1, TOBAG(f2,f3);");
    // when
    server.store("b", outputFile.getAbsolutePath(), storeString);
    // then
    Path outputPath = new Path(outputFile.getAbsolutePath() + "/part-m-00000.rc");
    ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>");
    replacedertEquals(2, struct.getFieldsAsList().size());
    Object o = struct.getField(0);
    replacedertEquals(LazyString.clreplaced, o.getClreplaced());
    o = struct.getField(1);
    replacedertEquals(LazyArray.clreplaced, o.getClreplaced());
    LazyArray arr = (LazyArray) o;
    List<Object> values = arr.getList();
    for (Object value : values) {
        replacedertEquals(LazyString.clreplaced, value.getClreplaced());
        String valueStr = ((LazyString) value).getWritableObject().toString();
        replacedertEquals("Sample value", valueStr);
    }
}

16 View Complete Implementation : TestProjectStarExpander.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testProjectStarForeach() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    // specifying the new aliases only for initial set of fields
    String query = "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int, d : int, e : int);" + "f = foreach l1 generate * as (aa, bb, cc);";
    Util.registerMultiLineQuery(pig, query);
    Schema expectedSch = Utils.getSchemaFromString("aa : int, bb : int, cc : int, d : int, e : int");
    Schema sch = pig.dumpSchema("f");
    replacedertEquals("Checking expected schema", expectedSch, sch);
    // specifying aliases for all fields
    query = "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int, d : int, e : int);" + "f = foreach l1 generate * as (aa, bb, cc, dd, ee);";
    Util.registerMultiLineQuery(pig, query);
    expectedSch = Utils.getSchemaFromString("aa : int, bb : int, cc : int, dd : int, ee : int");
    sch = pig.dumpSchema("f");
    replacedertEquals("Checking expected schema", expectedSch, sch);
    Iterator<Tuple> it = pig.openIterator("f");
    List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings(new String[] { "(10,20,30,40,50)", "(11,21,31,41,51)" });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}

16 View Complete Implementation : TestXMLLoader.java
Copyright Apache License 2.0
Author : sigmoidanalytics
public void testXMLLoaderShouldWorkWithIndentedXmlWithMultilineContent() throws Exception {
    String filename = TestHelper.createTempFile(indentedXmlWithMultilineLineContent, "");
    PigServer pig = new PigServer(LOCAL);
    filename = filename.replace("\\", "\\\\");
    String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('page') as (doc:chararray);";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("A");
    int tupleCount = 0;
    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        else {
            System.out.println(((String) tuple.get(0)));
            replacedertTrue(((String) tuple.get(0)).equals("<page>You have not missed it</page>"));
            tupleCount++;
        }
    }
    replacedertEquals(1, tupleCount);
}

16 View Complete Implementation : TestBuiltInBagToTupleOrString.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testPigScriptEmptyBagForBagToTupleUDF() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", "myBag:bag{t:(l:chararray)}", tuple(bag()));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();");
    pigServer.registerQuery("B = FOREACH A GENERATE BagToTuple(myBag) as myBag;");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    List<Tuple> out = data.get("bar");
    // empty bag will generate empty tuple
    replacedertEquals(tuple(), out.get(0).get(0));
}

16 View Complete Implementation : TestBlackAndWhitelistValidator.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * A few commands such as DECLARE, DEFAULT go via
 * {@link PreprocessorContext}. This step basically parses commands and
 * subsreplacedutes parameters. The parameters can be evaluated using shell
 * commands, which need to validated if specified in the white or blacklist.
 * This test handles that scenario
 *
 * @throws Exception
 */
@Test
public void testPreprocessorCommands() throws Exception {
    try {
        ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "dEclAre");
        PigServer pigServer = new PigServer(ctx);
        Data data = resetData(pigServer);
        data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d"));
        StringBuilder script = new StringBuilder();
        script.append("set io.sort.mb 1000;").append("%declare X `echo`; ").append("%default input 'foo';").append("A = LOAD '$input' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);").append("B = order A by f1,f2,f3 DESC;").append("STORE B INTO 'bar' USING mock.Storage();");
        pigServer.registerScript(IOUtils.toInputStream(script));
        fail();
    } catch (Exception e) {
        // We check RuntimeException here and not FrontendException as Pig wraps the error from Preprocessor
        // within RuntimeException
        Util.replacedertExceptionAndMessage(RuntimeException.clreplaced, e, "DECLARE command is not permitted. ");
    }
}

16 View Complete Implementation : TestSchemaResetter.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testSchemaResetterExec() throws IOException {
    PigServer pigServer = new PigServer(LOCAL);
    Data data = Storage.resetData(pigServer);
    data.set("input", tuple(tuple("1", "2")), tuple(tuple("2", "3")), tuple(tuple("2", "4")));
    pigServer.registerQuery("A = LOAD 'input' USING mock.Storage() AS (group:tuple(uid, dst_id));" + "edges_both = FOREACH A GENERATE" + "    group.uid AS src_id," + "    group.dst_id AS dst_id;" + "both_counts = GROUP edges_both BY src_id;" + "both_counts = FOREACH both_counts GENERATE" + "    group AS src_id, SIZE(edges_both) AS size_both;" + "edges_bq = FOREACH A GENERATE" + "    group.uid AS src_id," + "    group.dst_id AS dst_id;" + "bq_counts = GROUP edges_bq BY src_id;" + "bq_counts = FOREACH bq_counts GENERATE" + "    group AS src_id, SIZE(edges_bq) AS size_bq;" + "per_user_set_sizes = JOIN bq_counts BY src_id LEFT OUTER, both_counts BY src_id;" + "store per_user_set_sizes into 'output' USING mock.Storage();");
    List<Tuple> list = data.get("output");
    Collections.sort(list);
    replacedertEquals("list: " + list, 2, list.size());
    replacedertEquals("(1,1,1,1)", list.get(0).toString());
    replacedertEquals("(2,2,2,2)", list.get(1).toString());
}

16 View Complete Implementation : TestPigServer.java
Copyright Apache License 2.0
Author : sigmoidanalytics
// PIG-2059
@Test
public void test1() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.setValidateEachStatement(true);
    pig.registerQuery("A = load 'x' as (u, v);");
    try {
        pig.registerQuery("B = foreach A generate $2;");
        fail("Query is supposed to fail.");
    } catch (FrontendException ex) {
        String msg = "Out of bound access. " + "Trying to access non-existent column: 2";
        Util.checkMessageInException(ex, msg);
    }
}

16 View Complete Implementation : TestProjectStarExpander.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query = "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);" + "f = foreach l1 generate * as (aa, bb, cc), *;";
    Util.registerMultiLineQuery(pig, query);
    Schema expectedSch = Utils.getSchemaFromString("aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    replacedertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings(new String[] { "(10,20,30,10,20,30)", "(11,21,31,11,21,31)" });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}

16 View Complete Implementation : TestBlackAndWhitelistValidator.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testImport() throws Exception {
    try {
        ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "import");
        PigServer pigServer = new PigServer(ctx);
        Data data = resetData(pigServer);
        data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d"));
        StringBuilder script = new StringBuilder();
        script.append("import 'piggybank.jar';").append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);").append("B = order A by f1,f2,f3 DESC;").append("run evil.pig;").append("STORE B INTO 'bar' USING mock.Storage();");
        pigServer.registerScript(IOUtils.toInputStream(script));
        fail();
    } catch (Exception e) {
        Util.replacedertExceptionAndMessage(FrontendException.clreplaced, e, "Error during parsing. IMPORT command is not permitted. ");
    }
}

16 View Complete Implementation : TestProject.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testMissingCols1() throws Exception {
    String inputFileName = "TestProject-testMissingCols1-input.txt";
    String[] input = { "hello\tworld", "good\tbye" };
    Util.createLocalInputFile(inputFileName, input);
    String query = "a = load '" + inputFileName + "' as (s1:chararray, s2:chararray, extra:chararray);" + "b = foreach a generate s1, s2, extra;";
    PigServer ps = new PigServer(ExecType.LOCAL);
    Util.registerMultiLineQuery(ps, query);
    Iterator<Tuple> it = ps.openIterator("b");
    Tuple[] expectedResults = new Tuple[] { (Tuple) Util.getPigConstant("('hello', 'world', null)"), (Tuple) Util.getPigConstant("('good', 'bye', null)") };
    int i = 0;
    while (it.hasNext()) {
        replacedertEquals(expectedResults[i++], it.next());
    }
}

16 View Complete Implementation : TestXMLLoader.java
Copyright Apache License 2.0
Author : sigmoidanalytics
public void testXMLLoaderShouldReturnValidXML() throws Exception {
    String filename = TestHelper.createTempFile(inlineClosedTags, "");
    PigServer pig = new PigServer(LOCAL);
    filename = filename.replace("\\", "\\\\");
    String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('event') as (doc:chararray);";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("A");
    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        else {
            // Test it returns a valid XML
            DoreplacedentBuilder docBuilder = DoreplacedentBuilderFactory.newInstance().newDoreplacedentBuilder();
            docBuilder.parse(new ByteArrayInputStream(((String) tuple.get(0)).getBytes()));
        }
    }
}

15 View Complete Implementation : TestExampleGenerator.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testFilterGroupCountStore() throws Exception {
    File out = File.createTempFile("testFilterGroupCountStoreOutput", "");
    out.deleteOnExit();
    out.delete();
    PigServer pigServer = new PigServer(pigContext);
    pigServer.setBatchOn();
    pigServer.registerQuery("A = load " + A.toString() + " as (x, y);");
    pigServer.registerQuery("B = filter A by x < 5;");
    pigServer.registerQuery("C = group B by x;");
    pigServer.registerQuery("D = foreach C generate group as x, COUNT(B) as the_count;");
    pigServer.registerQuery("store D into '" + Util.encodeEscape(out.getAbsolutePath()) + "';");
    Map<Operator, DataBag> derivedData = pigServer.getExamples(null);
    replacedertNotNull(derivedData);
}

15 View Complete Implementation : TestUnionOnSchema.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Test UNION ONSCHEMA with input relation having udfs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaInputUdfs() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query = "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);" + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);" + "f1 = foreach l1 generate i, CONCAT(j,j) as cj, " + "org.apache.pig.test.TestUnionOnSchema\\$UDFTupleNullSchema(i,j) as uo;" + "u = union onschema f1, l2;";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("u");
    String expectedSch = "{i: int,cj: chararray,uo: (),j: chararray}";
    replacedert.replacedertTrue(expectedSch.equals(sch.toString()));
    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings(new String[] { "(1,null,null,'2')", "(5,null,null,'3')", "(1,'22',(1,'2'),null)", "(5,'33',(5,'3'),null)" });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}

15 View Complete Implementation : TestPigServer.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testDescribeCross() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );");
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );");
    pig.registerQuery("c = cross a, b;");
    Schema dumpedSchema = pig.dumpSchema("c");
    Schema expectedSchema = Utils.getSchemaFromString("a::field1: int,a::field2: float,a::field3: chararray,b::field4: bytearray,b::field5: double,b::field6: chararray");
    replacedertEquals(expectedSchema, dumpedSchema);
}

15 View Complete Implementation : TestBZip.java
Copyright Apache License 2.0
Author : sigmoidanalytics
// See PIG-1714
@Test
public void testBzipStoreInMultiQuery3() throws Exception {
    String[] inputData = new String[] { "1\t2\r3\t4" };
    String inputFileName = "input3.txt";
    Util.createInputFile(cluster, inputFileName, inputData);
    String inputScript = "set mapred.output.compress true\n" + "set mapreduce.output.fileoutputformat.compress true\n" + "set mapred.output.compression.codec org.apache.hadoop.io.compress.BZip2Codec\n" + "set mapreduce.output.fileoutputformat.compress.codec org.apache.hadoop.io.compress.BZip2Codec\n" + "a = load '" + inputFileName + "';\n" + "store a into 'output3.bz2';\n" + "store a into 'output3';";
    String inputScriptName = "script3.txt";
    PrintWriter pw = new PrintWriter(new FileWriter(inputScriptName));
    pw.println(inputScript);
    pw.close();
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    FileInputStream fis = new FileInputStream(inputScriptName);
    pig.registerScript(fis);
    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output3"), Util.getSuccessMarkerPathFilter());
    replacedertTrue(outputFiles[0].getLen() > 0);
    outputFiles = fs.listStatus(new Path("output3.bz2"), Util.getSuccessMarkerPathFilter());
    replacedertTrue(outputFiles[0].getLen() > 0);
}

15 View Complete Implementation : TestIn.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Verify that IN operator works with FILTER BY.
 * @throws Exception
 */
@Test
public void testWithFilter() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", tuple(1), tuple(2), tuple(3), tuple(4), tuple(5));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FILTER A BY i IN (1, 2, 3);");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    List<Tuple> out = data.get("bar");
    replacedertEquals(3, out.size());
    replacedertEquals(tuple(1), out.get(0));
    replacedertEquals(tuple(2), out.get(1));
    replacedertEquals(tuple(3), out.get(2));
}

15 View Complete Implementation : TestHadoopJobHistoryLoader.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@SuppressWarnings("unchecked")
@Test
public void testHadoopJHLoader() throws Exception {
    PigServer pig = new PigServer(ExecType.LOCAL);
    pig.registerQuery("a = load '" + INPUT_DIR + "' using org.apache.pig.piggybank.storage.HadoopJobHistoryLoader() " + "as (j:map[], m:map[], r:map[]);");
    Iterator<Tuple> iter = pig.openIterator("a");
    replacedertTrue(iter.hasNext());
    Tuple t = iter.next();
    Map<String, Object> job = (Map<String, Object>) t.get(0);
    replacedertEquals("3eb62180-5473-4301-aa22-467bd685d466", (String) job.get("PIG_SCRIPT_ID"));
    replacedertEquals("job_201004271216_9998", (String) job.get("JOBID"));
    replacedertEquals("job_201004271216_9995", (String) job.get("PIG_JOB_PARENTS"));
    replacedertEquals("0.8.0-dev", (String) job.get("PIG_VERSION"));
    replacedertEquals("0.20.2", (String) job.get("HADOOP_VERSION"));
    replacedertEquals("d", (String) job.get("PIG_JOB_ALIAS"));
    replacedertEquals("PigLatin:Test.pig", job.get("JOBNAME"));
    replacedertEquals("ORDER_BY", (String) job.get("PIG_JOB_FEATURE"));
    replacedertEquals("1", (String) job.get("TOTAL_MAPS"));
    replacedertEquals("1", (String) job.get("TOTAL_REDUCES"));
}

15 View Complete Implementation : TestPigServer.java
Copyright Apache License 2.0
Author : sigmoidanalytics
private void registerScalarScript(boolean useScalar, String expectedSchemaStr) throws IOException {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("A = load 'adata' AS (a: int, b: int);");
    // scalar
    pig.registerQuery("C = FOREACH A GENERATE *;");
    String overrideScalar = useScalar ? "C = FILTER A BY b % 2 == 0; " : "";
    pig.registerQuery("B = FOREACH (GROUP A BY a) { " + overrideScalar + "D = FILTER A BY b % 2 == 1;" + "GENERATE group AS a, A.b AS every, C.b AS even, D.b AS odd;" + "};");
    Schema dumpedSchema = pig.dumpSchema("B");
    Schema expectedSchema = Utils.getSchemaFromString(expectedSchemaStr);
    replacedertEquals(expectedSchema, dumpedSchema);
}

15 View Complete Implementation : TestUnionOnSchema.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testUnionOnSchemaAdditionalColumnsWithImplicitSplit() throws IOException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    Data data = Storage.resetData(pig);
    // Use batch to force multiple outputs from relation l3. This causes
    // ImplicitSplitInsertVisitor to call SchemaResetter.
    pig.setBatchOn();
    String query = "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j: int);" + "l2 = load '" + INP_FILE_3NUMS + "' as (i : int, j : int, k : int);" + "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, j : int, k : int, l :int);" + "u = union onschema l1, l2, l3;" + "store u into 'out1' using mock.Storage;" + "store l3 into 'out2' using mock.Storage;";
    Util.registerMultiLineQuery(pig, query);
    pig.executeBatch();
    List<Tuple> list1 = data.get("out1");
    List<Tuple> list2 = data.get("out2");
    List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings(new String[] { "(1,2,null,null)", "(5,3,null,null)", "(1,2,3,null)", "(4,5,6,null)" });
    Util.checkQueryOutputsAfterSort(list1, expectedRes);
    replacedertEquals(0, list2.size());
}

15 View Complete Implementation : TestUnionOnSchema.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Test UNION ONSCHEMA on two inputs , one input with additional columns
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaAdditionalColumn() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query = "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int);" + "l2 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " + "  (i : long, c : chararray, j : int " + ", b : bag { t : tuple (c1 : int, c2 : chararray)}" + ", t : tuple (tc1 : int, tc2 : chararray) );" + "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, x : long);" + "u = union onschema l1, l2, l3;";
    Util.registerMultiLineQuery(pig, query);
    pig.explain("u", System.out);
    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings(new String[] { "(1L,2,null,null,null,null)", "(5L,3,null,null,null,null)", "(1L,2,'abc',{(1,'a'),(1,'b')},(1,'c'),null)", "(5L,3,'def',{(2,'a'),(2,'b')},(2,'c'),null)" });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}

15 View Complete Implementation : TestJobSubmission.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testDefaultParallelInSkewJoin() throws Throwable {
    // default_parallel is considered only at runtime, so here we only test requested parallel
    // more thorough tests can be found in TestNumberOfReducers.java
    String query = "a = load 'input';" + "b = load 'input';" + "c = join a by $0, b by $0 using 'skewed' parallel 100;" + "store c into 'output';";
    PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    // Get the skew join job
    Iterator<MapReduceOper> iter = mrPlan.getKeys().values().iterator();
    int counter = 0;
    while (iter.hasNext()) {
        MapReduceOper op = iter.next();
        counter++;
        if (op.isSkewedJoin()) {
            replacedertTrue(op.getRequestedParallelism() == 100);
        }
    }
    replacedertEquals(3, counter);
    pc.defaultParallel = -1;
}

15 View Complete Implementation : TestCase.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Verify that CASE statement with else branch works correctly.
 * @throws Exception
 */
@Test
public void testWithElseBranch() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", tuple(1), tuple(2), tuple(3), tuple(4), tuple(5), tuple(6), tuple(7));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE i, (" + "  CASE i % 4" + "    WHEN 0 THEN '4n'" + "    WHEN 1 THEN '4n+1'" + "    WHEN 2 THEN '4n+2'" + "    ELSE        '4n+3'" + "  END" + ");");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    List<Tuple> out = data.get("bar");
    replacedertEquals(7, out.size());
    replacedertEquals(tuple(1, "4n+1"), out.get(0));
    replacedertEquals(tuple(2, "4n+2"), out.get(1));
    replacedertEquals(tuple(3, "4n+3"), out.get(2));
    replacedertEquals(tuple(4, "4n"), out.get(3));
    replacedertEquals(tuple(5, "4n+1"), out.get(4));
    replacedertEquals(tuple(6, "4n+2"), out.get(5));
    replacedertEquals(tuple(7, "4n+3"), out.get(6));
}

15 View Complete Implementation : TestMyRegExLoader.java
Copyright Apache License 2.0
Author : sigmoidanalytics
public void testLoadMyRegExFromPigServer() throws Exception {
    ArrayList<DataByteArray[]> expected = TestHelper.getExpected(data, pattern);
    String filename = TestHelper.createTempFile(data, "");
    PigServer pig = new PigServer(LOCAL);
    filename = filename.replace("\\", "\\\\");
    patternString = patternString.replace("\\", "\\\\");
    String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.MyRegExLoader('" + patternString + "');";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("A");
    int tupleCount = 0;
    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        else {
            TestHelper.examineTuple(expected, tuple, tupleCount);
            tupleCount++;
        }
    }
    replacedertEquals(data.size(), tupleCount);
}

15 View Complete Implementation : TestPigScriptParser.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testDefineUDF() throws Exception {
    PigServer ps = new PigServer(ExecType.LOCAL);
    String[] inputData = { "dshfdskfwww.xyz.com/sportsjoadfjdslpdshfdskfwww.xyz.com/sportsjoadfjdsl", "kas;dka;sd", "jsjsjwww.xyz.com/sports", "jsdLSJDcom/sports", "wwwJxyzMcom/sports" };
    File f = Util.createFile(inputData);
    String[] queryLines = new String[] { // the reason we have 4 backslashes below is we really want to put two backslashes but
    // since this is to be represented in a Java String, we escape each backslash with one more
    // backslash - hence 4. In a pig script in a file, this would be
    // www\\.xyz\\.com
    "define minelogs org.apache.pig.test.RegexGroupCount('www\\\\.xyz\\\\.com/sports');", "A = load '" + Util.generateURI(f.getAbsolutePath(), ps.getPigContext()) + "'  using PigStorage() as (source : chararray);", "B = foreach A generate minelogs(source) as sportslogs;" };
    for (String line : queryLines) {
        ps.registerQuery(line);
    }
    Iterator<Tuple> it = ps.openIterator("B");
    int[] expectedResults = new int[] { 2, 0, 1, 0, 0 };
    int i = 0;
    while (it.hasNext()) {
        Tuple t = it.next();
        replacedertEquals(expectedResults[i++], t.get(0));
    }
}

15 View Complete Implementation : TestSchema.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testSchemaSerialization() throws IOException {
    MiniGenericCluster cluster = MiniGenericCluster.buildCluster();
    PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    String inputFileName = "testSchemaSerialization-input.txt";
    String[] inputData = new String[] { "foo\t1", "hello\t2" };
    Util.createInputFile(cluster, inputFileName, inputData);
    String script = "a = load '" + inputFileName + "' as (f1:chararray, f2:int);" + " b = group a all; c = foreach b generate org.apache.pig.test.InputSchemaUDF(a);";
    Util.registerMultiLineQuery(pigServer, script);
    Iterator<Tuple> it = pigServer.openIterator("c");
    while (it.hasNext()) {
        Tuple t = it.next();
        replacedertEquals("{a: {(f1: chararray,f2: int)}}", t.get(0));
    }
    cluster.shutDown();
}

15 View Complete Implementation : TestJobSubmission.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testDefaultParallelInSort() throws Throwable {
    // default_parallel is considered only at runtime, so here we only test requested parallel
    // more thorough tests can be found in TestNumberOfReducers.java
    String query = "a = load 'input';" + "b = order a by $0 parallel 100;" + "store b into 'output';";
    PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    // Get the sort job
    Iterator<MapReduceOper> iter = mrPlan.getKeys().values().iterator();
    int counter = 0;
    while (iter.hasNext()) {
        MapReduceOper op = iter.next();
        counter++;
        if (op.isGlobalSort()) {
            replacedertTrue(op.getRequestedParallelism() == 100);
        }
    }
    replacedertEquals(3, counter);
    pc.defaultParallel = -1;
}

15 View Complete Implementation : TestPigServer.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testDescribeCogroup() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );");
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );");
    pig.registerQuery("c = cogroup a by field1, b by field4;");
    Schema dumpedSchema = pig.dumpSchema("c");
    Schema expectedSchema = Utils.getSchemaFromString("group:int,a:{(field1:int,field2:float,field3:chararray)},b:{(field4:bytearray,field5:double,field6:chararray)}");
    replacedertEquals(expectedSchema, dumpedSchema);
}

15 View Complete Implementation : TestBZip.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testBzipStoreInMultiQuery2() throws Exception {
    String[] inputData = new String[] { "1\t2\r3\t4" };
    String inputFileName = "input2.txt";
    Util.createInputFile(cluster, inputFileName, inputData);
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    PigContext pigContext = pig.getPigContext();
    pigContext.getProperties().setProperty("output.compression.enabled", "true");
    pigContext.getProperties().setProperty("output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    pig.setBatchOn();
    pig.registerQuery("a = load '" + inputFileName + "';");
    pig.registerQuery("store a into 'output2.bz2';");
    pig.registerQuery("store a into 'output2';");
    pig.executeBatch();
    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output2"), Util.getSuccessMarkerPathFilter());
    replacedertTrue(outputFiles[0].getLen() > 0);
    outputFiles = fs.listStatus(new Path("output2.bz2"), Util.getSuccessMarkerPathFilter());
    replacedertTrue(outputFiles[0].getLen() > 0);
}

15 View Complete Implementation : TestBZip.java
Copyright Apache License 2.0
Author : sigmoidanalytics
// see PIG-2391
@Test
public void testBz2() throws Exception {
    String[] inputData = new String[] { // '\r' case - this will be split into two tuples
    "1\t2\r3\t4", // '\r\n' case
    "5\t6\r", // '\n' case
    "7\t8", // '\r\n' at the end of file
    "9\t10\r" };
    // bzip compressed input
    File in = File.createTempFile("junit", ".bz2");
    String compressedInputFileName = in.getAbsolutePath();
    String clusterCompressedFilePath = Util.removeColon(compressedInputFileName);
    in.deleteOnExit();
    try {
        CBZip2OutputStream cos = new CBZip2OutputStream(new FileOutputStream(in));
        for (int i = 0; i < inputData.length; i++) {
            StringBuffer sb = new StringBuffer();
            sb.append(inputData[i]).append("\n");
            byte[] bytes = sb.toString().getBytes();
            cos.write(bytes);
        }
        cos.close();
        Util.copyFromLocalToCluster(cluster, compressedInputFileName, clusterCompressedFilePath);
        // pig script to read compressed input
        PigServer pig = new PigServer(cluster.getExecType(), properties);
        // pig script to read compressed input
        String script = "a = load '" + Util.encodeEscape(clusterCompressedFilePath) + "';";
        pig.registerQuery(script);
        pig.registerQuery("store a into 'intermediate.bz';");
        pig.registerQuery("b = load 'intermediate.bz';");
        Iterator<Tuple> it2 = pig.openIterator("b");
        while (it2.hasNext()) {
            it2.next();
        }
    } finally {
        in.delete();
        Util.deleteFile(cluster, "intermediate.bz");
        Util.deleteFile(cluster, "final.bz");
    }
}

15 View Complete Implementation : TestUDF.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public // See PIG-4184
void testUDFNullInput() throws Exception {
    PigServer pig = new PigServer(ExecType.LOCAL);
    File inputFile = Util.createInputFile("tmp", "", new String[] { "\t", "2\t3" });
    pig.registerQuery("a = load '" + Util.generateURI(inputFile.toString(), pig.getPigContext()) + "' as (i1:int, i2:int);");
    pig.registerQuery("b = foreach a generate " + IntToBool.clreplaced.getName() + "(i1);");
    Iterator<Tuple> iter = pig.openIterator("b");
    replacedertEquals(iter.next().toString(), "(false)");
    replacedertEquals(iter.next().toString(), "(true)");
    replacedertFalse(iter.hasNext());
}

15 View Complete Implementation : TestCase.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Verify that conditional CASE statement without else branch works correctly.
 * @throws Exception
 */
@Test
public void testConditionalWithNoElse() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", tuple(1), tuple(2), tuple(3), tuple(4), tuple(5), tuple(6), tuple(7));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE i, (" + "  CASE " + // Conditional expression in when branch
    "    WHEN i % 5 == 0 THEN '5n'" + "    WHEN i % 5 == 1 THEN '5n+1'" + "    WHEN i % 5 == 2 THEN '5n+2'" + "    WHEN i % 5 == 3 THEN '5n+3'" + "  END" + ") AS s;");
    pigServer.registerQuery("C = FILTER B BY s IS NOT NULL;");
    pigServer.registerQuery("STORE C INTO 'bar' USING mock.Storage();");
    List<Tuple> out = data.get("bar");
    replacedertEquals(6, out.size());
    replacedertEquals(tuple(1, "5n+1"), out.get(0));
    replacedertEquals(tuple(2, "5n+2"), out.get(1));
    replacedertEquals(tuple(3, "5n+3"), out.get(2));
    replacedertEquals(tuple(5, "5n"), out.get(3));
    replacedertEquals(tuple(6, "5n+1"), out.get(4));
    replacedertEquals(tuple(7, "5n+2"), out.get(5));
}

15 View Complete Implementation : TestStoreInstances.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Test that putnext is able to communicate to outputcommitter
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testBackendStoreCommunication() throws IOException, ParserException {
    ExecType[] execTypes = { cluster.getExecType(), ExecType.LOCAL };
    PigServer pig = null;
    for (ExecType execType : execTypes) {
        Util.resetStateForExecModeSwitch();
        System.err.println("Starting test mode " + execType);
        if (execType == cluster.getExecType()) {
            pig = new PigServer(cluster.getExecType(), cluster.getProperties());
        } else {
            pig = new PigServer(execType);
        }
        final String outFile = "TestStoreInst1";
        Util.deleteFile(pig.getPigContext(), outFile);
        pig.setBatchOn();
        String query = "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int);" + " store l1 into '" + outFile + "' using " + CHECK_INSTANCE_STORE_FUNC + ";";
        Util.registerMultiLineQuery(pig, query);
        List<ExecJob> execJobs = pig.executeBatch();
        replacedertEquals("num jobs", 1, execJobs.size());
        replacedertEquals("status ", JOB_STATUS.COMPLETED, execJobs.get(0).getStatus());
    }
}

15 View Complete Implementation : TestXMLLoader.java
Copyright Apache License 2.0
Author : sigmoidanalytics
public void testShouldReturn0TupleCountIfNoEndTagIsFound() throws Exception {
    // modify the data content to avoid end tag for </ignoreProperty>
    ArrayList<String[]> testData = new ArrayList<String[]>();
    for (String[] content : data) {
        if (!content[0].equals("</ignoreProperty>")) {
            testData.add(content);
        }
    }
    String filename = TestHelper.createTempFile(testData, "");
    PigServer pig = new PigServer(LOCAL);
    filename = filename.replace("\\", "\\\\");
    String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('ignoreProperty') as (doc:chararray);";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("A");
    int tupleCount = 0;
    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        else {
            if (tuple.size() > 0) {
                tupleCount++;
            }
        }
    }
    replacedertEquals(0, tupleCount);
}

15 View Complete Implementation : TestCase.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Verify that CASE statement without else branch works correctly.
 * @throws Exception
 */
@Test
public void testNoElseBranch() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", tuple(1), tuple(2), tuple(3), tuple(4), tuple(5), tuple(6), tuple(7));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE i, (" + "  CASE i % 5" + "    WHEN 0 THEN '5n'" + "    WHEN 1 THEN '5n+1'" + "    WHEN 2 THEN '5n+2'" + "    WHEN 3 THEN '5n+3'" + "  END" + ") AS s;");
    pigServer.registerQuery("C = FILTER B BY s IS NOT NULL;");
    pigServer.registerQuery("STORE C INTO 'bar' USING mock.Storage();");
    List<Tuple> out = data.get("bar");
    replacedertEquals(6, out.size());
    replacedertEquals(tuple(1, "5n+1"), out.get(0));
    replacedertEquals(tuple(2, "5n+2"), out.get(1));
    replacedertEquals(tuple(3, "5n+3"), out.get(2));
    replacedertEquals(tuple(5, "5n"), out.get(3));
    replacedertEquals(tuple(6, "5n+1"), out.get(4));
    replacedertEquals(tuple(7, "5n+2"), out.get(5));
}

15 View Complete Implementation : TestRegExLoader.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testOnlyLastMatch() throws Exception {
    PigServer pigServer = new PigServer(LOCAL);
    String filename = TestHelper.createTempFile(data, "");
    ArrayList<String[]> dataE = new ArrayList<String[]>();
    dataE.add(new String[] { "3,three;iii" });
    ArrayList<DataByteArray[]> expected = TestHelper.getExpected(dataE, pattern2);
    pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(filename) + "' USING " + DummyRegExLoader2.clreplaced.getName() + "() AS (key, val);");
    Iterator<?> it = pigServer.openIterator("A");
    int tupleCount = 0;
    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        else {
            TestHelper.examineTuple(expected, tuple, tupleCount);
            tupleCount++;
        }
    }
    replacedertEquals(1, tupleCount);
}

15 View Complete Implementation : TestCase.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Verify that conditional CASE statement with else branch works correctly.
 * @throws Exception
 */
@Test
public void testConditionalWithElse() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", tuple(1), tuple(2), tuple(3), tuple(4), tuple(5), tuple(6), tuple(7));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE i, (" + "  CASE " + // Conditional expression in when branch
    "    WHEN i % 4 == 0 THEN '4n'" + "    WHEN i % 4 == 1 THEN '4n+1'" + "    WHEN i % 4 == 2 THEN '4n+2'" + "    ELSE                 '4n+3'" + "  END" + ");");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    List<Tuple> out = data.get("bar");
    replacedertEquals(7, out.size());
    replacedertEquals(tuple(1, "4n+1"), out.get(0));
    replacedertEquals(tuple(2, "4n+2"), out.get(1));
    replacedertEquals(tuple(3, "4n+3"), out.get(2));
    replacedertEquals(tuple(4, "4n"), out.get(3));
    replacedertEquals(tuple(5, "4n+1"), out.get(4));
    replacedertEquals(tuple(6, "4n+2"), out.get(5));
    replacedertEquals(tuple(7, "4n+3"), out.get(6));
}

15 View Complete Implementation : TestPONegative.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testPONegType() throws Exception {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    File f = Util.createInputFile("tmp", "", new String[] { "a", "b", "c" });
    pig.registerQuery("a = load '" + Util.encodeEscape(Util.generateURI(f.toString(), pig.getPigContext())) + "';");
    // -1 is modeled as POnegative with Constant(1)
    pig.registerQuery("b = foreach a generate SIZE(-1);");
    Iterator<Tuple> it = pig.openIterator("b");
    int i = 0;
    while (it.hasNext()) {
        replacedertEquals(1L, it.next().get(0));
        i++;
    }
    replacedertEquals(3, i);
}

15 View Complete Implementation : TestNewPartitionFilterPushDown.java
Copyright Apache License 2.0
Author : sigmoidanalytics
// The filter cannot be pushed down unless it meets certain conditions. In
// that case, PColExtractor.getPColCondition() should return null.
private void negativeTest(String query, List<String> parreplacedionCols) throws Exception {
    PigServer pigServer = new PigServer(pc);
    LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);
    Operator op = newLogicalPlan.getSinks().get(0);
    LOFilter filter = (LOFilter) newLogicalPlan.getPredecessors(op).get(0);
    ParreplacedionFilterExtractor extractor = new ParreplacedionFilterExtractor(filter.getFilterPlan(), parreplacedionCols);
    extractor.visit();
    replacedert.replacedertFalse(extractor.canPushDown());
}

15 View Complete Implementation : TestIn.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Verify that IN operator works when expressions contain dereference operators.
 * @throws Exception
 */
@Test
public void testWithDereferenceOperator() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", tuple("a", "x", 1), tuple("a", "y", 2), tuple("b", "x", 3), tuple("b", "y", 4), tuple("c", "x", 5), tuple("c", "y", 6));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (k1:chararray, k2:chararray, i:int);");
    pigServer.registerQuery("B = GROUP A BY (k1, k2);");
    pigServer.registerQuery("C = FILTER B BY group.k1 IN ('a', 'b') AND group.k2 IN ('x');");
    pigServer.registerQuery("STORE C INTO 'bar' USING mock.Storage();");
    List<Tuple> out = data.get("bar");
    replacedertEquals(2, out.size());
    replacedertEquals(tuple(tuple("a", "x"), bag(tuple("a", "x", 1))), out.get(0));
    replacedertEquals(tuple(tuple("b", "x"), bag(tuple("b", "x", 3))), out.get(1));
}

15 View Complete Implementation : TestUnionOnSchema.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Test UNION ONSCHEMA on 3 inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchema3Inputs() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query = "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " + "l2 = load '" + INP_FILE_2NUMS + "' as (i : double, x : int); " + "l3 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " + "  (i : long, c : chararray, j : int " + ", b : bag { t : tuple (c1 : int, c2 : chararray)} ); " + "u = union onschema l1, l2, l3;";
    Util.registerMultiLineQuery(pig, query);
    pig.explain("u", System.out);
    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings(new String[] { "(1.0,2,null,null,null)", "(5.0,3,null,null,null)", "(1.0,null,2,null,null)", "(5.0,null,3,null,null)", "(1.0,2,null,'abc',{(1,'a'),(1,'b')})", "(5.0,3,null,'def',{(2,'a'),(2,'b')})" });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}

15 View Complete Implementation : TestCase.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Verify that CASE statement preserves the order of conditions.
 * @throws Exception
 */
@Test
public void testOrderOfConditions() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", tuple(1), tuple(5), tuple(10), tuple(15), tuple(20), tuple(25), tuple(30));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE i, (" + "  CASE " + // Conditions are not mutually exclusive
    "    WHEN i > 20 THEN '> 20'" + "    WHEN i > 10 THEN '> 10'" + "    ELSE             '> 0'" + "  END" + ");");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    List<Tuple> out = data.get("bar");
    replacedertEquals(7, out.size());
    replacedertEquals(tuple(1, "> 0"), out.get(0));
    replacedertEquals(tuple(5, "> 0"), out.get(1));
    replacedertEquals(tuple(10, "> 0"), out.get(2));
    replacedertEquals(tuple(15, "> 10"), out.get(3));
    replacedertEquals(tuple(20, "> 10"), out.get(4));
    replacedertEquals(tuple(25, "> 20"), out.get(5));
    replacedertEquals(tuple(30, "> 20"), out.get(6));
}

15 View Complete Implementation : TestIn.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Verify that IN operator works with ? operator.
 * @throws Exception
 */
@Test
public void testWithBincond() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", tuple(1), tuple(2), tuple(3), tuple(4), tuple(5));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE (i IN (1, 3, 5) ? 'ODD' : 'EVEN');");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    List<Tuple> out = data.get("bar");
    replacedertEquals(5, out.size());
    replacedertEquals(tuple("ODD"), out.get(0));
    replacedertEquals(tuple("EVEN"), out.get(1));
    replacedertEquals(tuple("ODD"), out.get(2));
    replacedertEquals(tuple("EVEN"), out.get(3));
    replacedertEquals(tuple("ODD"), out.get(4));
}

15 View Complete Implementation : TestIn.java
Copyright Apache License 2.0
Author : sigmoidanalytics
/**
 * Verify that IN operator works with SPLIT.
 * @throws Exception
 */
@Test
public void testWithSplit() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);
    data.set("foo", tuple(1), tuple(2), tuple(3), tuple(4), tuple(5));
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("SPLIT A INTO B IF i IN (1, 3, 5), C OTHERWISE;");
    pigServer.registerQuery("STORE B INTO 'odd' USING mock.Storage();");
    pigServer.registerQuery("STORE C INTO 'even' USING mock.Storage();");
    List<Tuple> out = data.get("odd");
    replacedertEquals(3, out.size());
    replacedertEquals(tuple(1), out.get(0));
    replacedertEquals(tuple(3), out.get(1));
    replacedertEquals(tuple(5), out.get(2));
    out = data.get("even");
    replacedertEquals(2, out.size());
    replacedertEquals(tuple(2), out.get(0));
    replacedertEquals(tuple(4), out.get(1));
}

15 View Complete Implementation : TestPigServer.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testDescribeUnion() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );");
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );");
    pig.registerQuery("c = union a, b;");
    Schema dumpedSchema = pig.dumpSchema("c");
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: double,field3: chararray");
    replacedertEquals(expectedSchema, dumpedSchema);
}

15 View Complete Implementation : TestXMLLoader.java
Copyright Apache License 2.0
Author : sigmoidanalytics
public void testXMLLoaderShouldNotConfusedWithTagsHavingSimilarPrefix() throws Exception {
    ArrayList<String[]> testData = new ArrayList<String[]>();
    testData.add(new String[] { "<namethisalso> foobar9 </namethisalso>" });
    testData.addAll(data);
    String filename = TestHelper.createTempFile(testData, "");
    PigServer pig = new PigServer(LOCAL);
    filename = filename.replace("\\", "\\\\");
    String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('name') as (doc:chararray);";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("A");
    int tupleCount = 0;
    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        else {
            if (tuple.size() > 0) {
                tupleCount++;
            }
        }
    }
    replacedertEquals(3, tupleCount);
}

15 View Complete Implementation : TestCombiner.java
Copyright Apache License 2.0
Author : sigmoidanalytics
private String runTest(PigServer pig) throws IOException {
    List<String> inputLines = new ArrayList<String>();
    inputLines.add("a,b,1");
    inputLines.add("a,b,1");
    inputLines.add("a,c,1");
    String inputFileName = loadWithTestLoadFunc("A", pig, inputLines);
    pig.registerQuery("B = group A by ($0, $1);");
    pig.registerQuery("C = foreach B generate flatten(group), COUNT($1);");
    Iterator<Tuple> resulreplacederator = pig.openIterator("C");
    Tuple tuple = resulreplacederator.next();
    replacedertEquals("(a,b,2)", tuple.toString());
    tuple = resulreplacederator.next();
    replacedertEquals("(a,c,1)", tuple.toString());
    return inputFileName;
}

15 View Complete Implementation : TestPigServer.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testDescribeJoin() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );");
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );");
    pig.registerQuery("c = join a by field1, b by field4;");
    Schema dumpedSchema = pig.dumpSchema("c");
    Schema expectedSchema = Utils.getSchemaFromString("a::field1: int,a::field2: float,a::field3: chararray,b::field4: bytearray,b::field5: double,b::field6: chararray");
    replacedertEquals(expectedSchema, dumpedSchema);
}

15 View Complete Implementation : TestBZip.java
Copyright Apache License 2.0
Author : sigmoidanalytics
@Test
public void testBzipStoreInMultiQuery() throws Exception {
    String[] inputData = new String[] { "1\t2\r3\t4" };
    String inputFileName = "input.txt";
    Util.createInputFile(cluster, inputFileName, inputData);
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.setBatchOn();
    pig.registerQuery("a = load '" + inputFileName + "';");
    pig.registerQuery("store a into 'output.bz2';");
    pig.registerQuery("store a into 'output';");
    pig.executeBatch();
    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output"), Util.getSuccessMarkerPathFilter());
    replacedertTrue(outputFiles[0].getLen() > 0);
    outputFiles = fs.listStatus(new Path("output.bz2"), Util.getSuccessMarkerPathFilter());
    replacedertTrue(outputFiles[0].getLen() > 0);
}