org.apache.avro.file.DataFileReader - java examples

Here are the examples of the java api org.apache.avro.file.DataFileReader taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

155 Examples 7

18 View Complete Implementation : TestExtractor.java
Copyright Apache License 2.0
Author : apache
/**
 * An implementation of {@link Extractor} for integration test.
 *
 * @author Yinan Li
 */
public clreplaced TestExtractor implements Extractor<String, String> {

    private static final Logger LOG = LoggerFactory.getLogger(TestExtractor.clreplaced);

    // Test Avro schema
    private static final String AVRO_SCHEMA = "{\"namespace\": \"example.avro\",\n" + " \"type\": \"record\",\n" + " \"name\": \"User\",\n" + " \"fields\": [\n" + "     {\"name\": \"name\", \"type\": \"string\"},\n" + "     {\"name\": \"favorite_number\",  \"type\": \"int\"},\n" + "     {\"name\": \"favorite_color\", \"type\": \"string\"}\n" + " ]\n" + "}";

    public static final int TOTAL_RECORDS = 1000;

    private DataFileReader<GenericRecord> dataFileReader;

    public TestExtractor(WorkUnitState workUnitState) {
        // super(workUnitState);
        Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
        Path sourceFile = new Path(workUnitState.getWorkunit().getProp(TestSource.SOURCE_FILE_KEY));
        LOG.info("Reading from source file " + sourceFile);
        DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
        try {
            FileSystem fs = FileSystem.get(URI.create(workUnitState.getProp(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI)), new Configuration());
            sourceFile = new Path(fs.makeQualified(sourceFile).toUri().getRawPath());
            this.dataFileReader = new DataFileReader<GenericRecord>(new FsInput(sourceFile, new Configuration()), datumReader);
        } catch (IOException ioe) {
            LOG.error("Failed to read the source file " + sourceFile, ioe);
        }
    }

    @Override
    public String getSchema() {
        return AVRO_SCHEMA;
    }

    @Override
    public String readRecord(@Deprecated String reuse) throws IOException {
        if (this.dataFileReader == null) {
            return null;
        }
        if (this.dataFileReader.hasNext()) {
            return this.dataFileReader.next().toString();
        }
        return null;
    }

    @Override
    public void close() {
        try {
            this.dataFileReader.close();
        } catch (IOException ioe) {
        // ignored
        }
    }

    @Override
    public long getExpectedRecordCount() {
        return TOTAL_RECORDS;
    }

    @Override
    public long getHighWatermark() {
        // TODO Auto-generated method stub
        return 0;
    }
}

18 View Complete Implementation : AvroFileReader.java
Copyright Apache License 2.0
Author : linkedin
public clreplaced AvroFileReader implements CachedFileReader {

    private DataFileReader<Object> dataFileReader;

    private BlockSchema schema;

    @Override
    public void open(JsonNode json, File file) throws IOException {
        DatumReader<Object> datumReader = new PigAvroDatumReader(AvroUtils.getSchema(new SeekableFileInput(file)));
        dataFileReader = new DataFileReader<Object>(file, datumReader);
        schema = AvroUtils.convertToBlockSchema(dataFileReader.getSchema());
    }

    @Override
    public Tuple next() {
        if (!dataFileReader.hasNext())
            return null;
        Tuple outputTuple = (Tuple) dataFileReader.next();
        return outputTuple;
    }

    @Override
    public void close() throws IOException {
        dataFileReader.close();
    }

    @Override
    public BlockSchema getSchema() {
        return schema;
    }
}

17 View Complete Implementation : StarTreeQueriesTest.java
Copyright Apache License 2.0
Author : Hanmourang
private static Map<List<String>, Long> computeAggregateGroupByFromRawData(File avroFile, Map<String, String> fixedValues, List<String> groupByColumns) throws Exception {
    Map<List<String>, Long> m0Aggregates = new HashMap<>();
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(avroFile, reader);
    GenericRecord record = null;
    while (fileReader.hasNext()) {
        record = fileReader.next(record);
        boolean matches = true;
        for (Map.Entry<String, String> entry : fixedValues.entrySet()) {
            String value = record.get(entry.getKey()).toString();
            if (!value.equals(entry.getValue())) {
                matches = false;
            }
        }
        if (matches) {
            // Get group
            List<String> group = new ArrayList<>();
            for (String column : groupByColumns) {
                group.add(record.get(column).toString());
            }
            Long sum = m0Aggregates.get(group);
            if (sum == null) {
                sum = 0L;
            }
            m0Aggregates.put(group, sum + (Long) record.get("M0"));
        }
    }
    return m0Aggregates;
}

17 View Complete Implementation : StarTreeQueriesTest.java
Copyright Apache License 2.0
Author : Hanmourang
private static Map<String, Number> computeAggregateFromRawData(File avroFile, Map<String, String> fixedValues) throws Exception {
    long m0Aggregate = 0;
    double m1Aggregate = 0.0;
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(avroFile, reader);
    GenericRecord record = null;
    while (fileReader.hasNext()) {
        record = fileReader.next(record);
        boolean matches = true;
        for (Map.Entry<String, String> entry : fixedValues.entrySet()) {
            String value = record.get(entry.getKey()).toString();
            if (!value.equals(entry.getValue())) {
                matches = false;
            }
        }
        if (matches) {
            m0Aggregate += (Long) record.get("M0");
            m1Aggregate += (Double) record.get("M1");
        }
    }
    return ImmutableMap.of("M0", m0Aggregate, "M1", m1Aggregate);
}

17 View Complete Implementation : AsyncDiskQueryInfoStore.java
Copyright Apache License 2.0
Author : snuspl
/**
 * Load the stored dag from File.
 * @param storedPlanFile file
 * @return chained dag
 * @throws IOException
 */
private AvroDag loadFromFile(final File storedPlanFile) throws IOException {
    final DataFileReader<AvroDag> dataFileReader = new DataFileReader<AvroDag>(storedPlanFile, datumReader);
    AvroDag dag = null;
    dag = dataFileReader.next(dag);
    return dag;
}

17 View Complete Implementation : AvroUtils.java
Copyright Apache License 2.0
Author : apache
/**
 * Get Avro schema from an Avro data file.
 */
public static Schema getSchemaFromDataFile(Path dataFile, FileSystem fs) throws IOException {
    try (SeekableInput sin = new FsInput(dataFile, fs.getConf());
        DataFileReader<GenericRecord> reader = new DataFileReader<>(sin, new GenericDatumReader<GenericRecord>())) {
        return reader.getSchema();
    }
}

17 View Complete Implementation : AvroStringFieldDecryptorConverterTest.java
Copyright Apache License 2.0
Author : apache
private GenericRecord getRecordFromFile(String path) throws IOException {
    DatumReader<GenericRecord> reader = new GenericDatumReader<>();
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(path), reader);
    while (dataFileReader.hasNext()) {
        return dataFileReader.next();
    }
    return null;
}

17 View Complete Implementation : AvroExecutionVertexStore.java
Copyright Apache License 2.0
Author : snuspl
// TODO: [MIST-*] Implement policy for deleting from disk. Currently does not delete from disk.
private AvroPhysicalSourceOutgoingEdgesInfo loadAvroPhysicalSourceOutgoingEdgesInfoFromFile(final File storedSource) {
    try {
        final DataFileReader<AvroPhysicalSourceOutgoingEdgesInfo> dataFileReader = new DataFileReader<>(storedSource, sourceDatumReader);
        AvroPhysicalSourceOutgoingEdgesInfo avroPhysicalSourceOutgoingEdgesInfo = null;
        avroPhysicalSourceOutgoingEdgesInfo = dataFileReader.next(avroPhysicalSourceOutgoingEdgesInfo);
        return avroPhysicalSourceOutgoingEdgesInfo;
    } catch (IOException e) {
        throw new RuntimeException("Loading AvroPhysicalSourceOutgoingEdgesInfo has failed.");
    }
}

16 View Complete Implementation : AvroStringFieldEncryptorConverterTest.java
Copyright Apache License 2.0
Author : apache
private GenericRecord getRecordFromFile(String path) throws IOException {
    DatumReader<GenericRecord> reader = new GenericDatumReader<>();
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(path), reader);
    if (dataFileReader.hasNext()) {
        return dataFileReader.next();
    }
    return null;
}

16 View Complete Implementation : TestAvro.java
Copyright Apache License 2.0
Author : lrtdc
public void deserUserCompile() {
    // Deserialize Users from disk
    DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.clreplaced);
    DataFileReader<User> dataFileReader = null;
    User user = null;
    try {
        dataFileReader = new DataFileReader<User>(new File("/Users/a/Desktop/tmp/users.avro"), userDatumReader);
        while (dataFileReader.hasNext()) {
            // Reuse user object by preplaceding it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            user = dataFileReader.next(user);
            System.out.println(user);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

16 View Complete Implementation : Purge.java
Copyright Apache License 2.0
Author : linkedin
private DataFileReader<GenericRecord> createDataFileReader(String filename, boolean localFS) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader;
    if (localFS) {
        dataFileReader = new DataFileReader<GenericRecord>(new File(filename), datumReader);
    } else {
        Path path = new Path(filename);
        SeekableInput input = new FsInput(path, conf);
        dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
    }
    return dataFileReader;
}

16 View Complete Implementation : AvroDimensionRowParser.java
Copyright Apache License 2.0
Author : yahoo
/**
 * Parses the avro file and returns the dimension rows.
 *
 * @param dimension The dimension object used to configure the dimension
 * @param avroFilePath The path of the AVRO data file (.avro)
 *
 * @return A set of dimension rows
 *
 * @throws IllegalArgumentException thrown if JSON object `fields` is not present
 */
public Set<DimensionRow> parseAvroFileDimensionRows(Dimension dimension, String avroFilePath) throws IllegalArgumentException {
    GenericDatumReader datumReader = new GenericDatumReader();
    // Creates an AVRO DataFileReader object that reads the AVRO data file one record at a time
    try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(avroFilePath), datumReader)) {
        return streamDimensionRows(dataFileReader, dimension).collect(Collectors.toSet());
    } catch (IOException e) {
        String msg = String.format("Unable to process the file, at the location %s", avroFilePath);
        LOG.error(msg, e);
        throw new IllegalArgumentException(msg, e);
    }
}

16 View Complete Implementation : AvroGenericRecordAccessorTest.java
Copyright Apache License 2.0
Author : apache
private void updateRecordFromTestResource(String resourceName, String avroFileName) throws IOException {
    if (avroFileName == null) {
        avroFileName = resourceName + ".avro";
    }
    recordSchema = new Schema.Parser().parse(getClreplaced().getClreplacedLoader().getResourcereplacedtream(resourceName + ".avsc"));
    DatumReader<GenericRecord> reader = new GenericDatumReader<>(recordSchema);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(getClreplaced().getClreplacedLoader().getResource(avroFileName).getPath()), reader);
    replacedert.replacedertTrue(dataFileReader.hasNext());
    record = dataFileReader.next(record);
    accessor = new AvroGenericRecordAccessor(record);
}

16 View Complete Implementation : AvroDataStoreReader.java
Copyright Apache License 2.0
Author : openaire
private DataFileReader<T> getNextNonemptyReader() throws IOException {
    while (fileIterator != null && fileIterator.hasNext()) {
        LocatedFileStatus currentFileStatus = fileIterator.next();
        if (isValidFile(currentFileStatus)) {
            FileSystemPath currPath = new FileSystemPath(path.getFileSystem(), currentFileStatus.getPath());
            DataFileReader<T> reader = getSingleFileReader(currPath, readerSchema);
            /**
             * Check if the file contains at least one record
             */
            if (reader.hasNext()) {
                return reader;
            } else {
                reader.close();
            }
        }
    }
    /**
     * fallback
     */
    return null;
}

16 View Complete Implementation : AvroUtils.java
Copyright Apache License 2.0
Author : Khalian
/**
 * Method to deserialize modulo7 object, the replacedumption is that each file
 * has exactly
 *
 * Returns a null object if no song objects are serialized in the file
 *
 * @param sourceFileName
 * @return
 */
public static Song deserialize(final String sourceFileName) throws Modulo7NoSuchFileOrDirectoryException {
    File sourceFile = new File(sourceFileName);
    DatumReader<Song> reader = new ReflectDatumReader<>(Song.clreplaced);
    DataFileReader<Song> dataFileReader;
    try {
        dataFileReader = new DataFileReader<>(sourceFile, reader);
    } catch (IOException e) {
        throw new Modulo7NoSuchFileOrDirectoryException("No such file" + sourceFileName);
    }
    /**
     * Since we have only one song object in the file as replacedumed we return that back
     */
    for (Song song : dataFileReader) {
        return song;
    }
    // Return null if nothing appears
    return null;
}

16 View Complete Implementation : AvroUtils.java
Copyright Apache License 2.0
Author : Khalian
/**
 * Deserializer for an independent lyrics object
 *
 * @param sourceFileName
 * @return
 * @throws com.modulo7.common.exceptions.Modulo7NoSuchFileOrDirectoryException
 */
public static Lyrics deserializeLyricsObject(final String sourceFileName) throws Modulo7NoSuchFileOrDirectoryException {
    File sourceFile = new File(sourceFileName);
    DatumReader<Lyrics> reader = new ReflectDatumReader<>(Lyrics.clreplaced);
    DataFileReader<Lyrics> dataFileReader;
    try {
        dataFileReader = new DataFileReader<>(sourceFile, reader);
    } catch (IOException e) {
        throw new Modulo7NoSuchFileOrDirectoryException("No such file" + sourceFileName);
    }
    /**
     * Since we have only one song object in the file by design as replacedumed we return that back
     */
    for (Lyrics lyrics : dataFileReader) {
        return lyrics;
    }
    // Return null if nothing appears
    return null;
}

16 View Complete Implementation : ReadActivityFile.java
Copyright MIT License
Author : oracle
/**
 * Reads the avro file
 *  @throws IOException
 */
private void readFile() throws IOException {
    // Deserialize Activities from disk
    File file = new File(filename);
    DatumReader<Activity> activityDatumReader = new SpecificDatumReader<Activity>(Activity.clreplaced);
    DataFileReader<Activity> dataFileReader = new DataFileReader<Activity>(file, activityDatumReader);
    Activity activity = null;
    int i = 0;
    while (dataFileReader.hasNext() && i < numrecs) {
        i++;
        activity = dataFileReader.next(activity);
        System.out.println(activity);
    }
}

16 View Complete Implementation : AvroExecutionVertexStore.java
Copyright Apache License 2.0
Author : snuspl
/**
 * Loads the AvroPhysicalOperatorChain with the chainId.
 */
public AvroPhysicalOperatorChain loadAvroPhysicalOperatorChain(final String chainId) throws IOException {
    try {
        final File storedChain = getAvroPhysicalOperatorChainFile(chainId);
        final DataFileReader<AvroPhysicalOperatorChain> dataFileReader = new DataFileReader<>(storedChain, operatorChainDatumReader);
        AvroPhysicalOperatorChain avroPhysicalOperatorChain = null;
        avroPhysicalOperatorChain = dataFileReader.next(avroPhysicalOperatorChain);
        return avroPhysicalOperatorChain;
    } catch (final IOException e) {
        LOG.log(Level.SEVERE, "An exception occurred while loading the AvroPhysicalOperatorChain with ID {0}.", new Object[] { chainId });
        throw e;
    }
}

15 View Complete Implementation : LobAvroImportTestCase.java
Copyright Apache License 2.0
Author : aliyun
/**
 * Import multiple columns of blob data. Blob data should be saved as Avro
 * bytes.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobAvroImportMultiCols() throws IOException, SQLException {
    String[] types = { getBlobType(), getBlobType(), getBlobType() };
    String expectedVal1 = "This is short BLOB data1";
    String expectedVal2 = "This is short BLOB data2";
    String expectedVal3 = "This is short BLOB data3";
    String[] vals = { getBlobInsertStr(expectedVal1), getBlobInsertStr(expectedVal2), getBlobInsertStr(expectedVal3) };
    createTableWithColTypes(types, vals);
    runImport(getArgv());
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record = reader.next();
    // Verify that all columns are imported correctly.
    ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
    String returnVal = new String(buf.array());
    replacedertEquals(getColName(0), expectedVal1, returnVal);
    buf = (ByteBuffer) record.get(getColName(1));
    returnVal = new String(buf.array());
    replacedertEquals(getColName(1), expectedVal2, returnVal);
    buf = (ByteBuffer) record.get(getColName(2));
    returnVal = new String(buf.array());
    replacedertEquals(getColName(2), expectedVal3, returnVal);
}

15 View Complete Implementation : TestAvroImport.java
Copyright Apache License 2.0
Author : aliyun
public void testNullableAvroImport() throws IOException, SQLException {
    String[] types = { "INT" };
    String[] vals = { null };
    createTableWithColTypes(types, vals);
    runImport(getOutputArgv(true, null));
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record1 = reader.next();
    replacedertNull(record1.get("DATA_COL0"));
}

15 View Complete Implementation : AvroProcessors.java
Copyright Apache License 2.0
Author : hazelcast
/**
 * Returns a supplier of processors for {@link AvroSources#filesBuilder}.
 */
@Nonnull
public static <D, T> ProcessorMetaSupplier readFilesP(@Nonnull String directory, @Nonnull String glob, boolean sharedFileSystem, @Nonnull SupplierEx<? extends DatumReader<D>> datumReaderSupplier, @Nonnull BiFunctionEx<String, ? super D, T> mapOutputFn) {
    return ReadFilesP.metaSupplier(directory, glob, sharedFileSystem, path -> {
        DataFileReader<D> reader = new DataFileReader<>(path.toFile(), datumReaderSupplier.get());
        return StreamSupport.stream(reader.spliterator(), false).onClose(() -> uncheckRun(reader::close));
    }, mapOutputFn);
}

15 View Complete Implementation : TestAvroImport.java
Copyright Apache License 2.0
Author : dkhadoop
public void testNullableAvroImport() throws IOException, SQLException {
    String[] types = { "INT" };
    String[] vals = { null };
    createTableWithColTypes(types, vals);
    runImport(getOutputArgv(true, null));
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record1 = reader.next();
    replacedertNull(record1.get("DATA_COL0"));
}

15 View Complete Implementation : LobAvroImportTestCase.java
Copyright Apache License 2.0
Author : dkhadoop
/**
 * Import multiple columns of blob data. Blob data should be saved as Avro
 * bytes.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobAvroImportMultiCols() throws IOException, SQLException {
    String[] types = { getBlobType(), getBlobType(), getBlobType() };
    String expectedVal1 = "This is short BLOB data1";
    String expectedVal2 = "This is short BLOB data2";
    String expectedVal3 = "This is short BLOB data3";
    String[] vals = { getBlobInsertStr(expectedVal1), getBlobInsertStr(expectedVal2), getBlobInsertStr(expectedVal3) };
    createTableWithColTypes(types, vals);
    runImport(getArgv());
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record = reader.next();
    // Verify that all columns are imported correctly.
    ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
    String returnVal = new String(buf.array());
    replacedertEquals(getColName(0), expectedVal1, returnVal);
    buf = (ByteBuffer) record.get(getColName(1));
    returnVal = new String(buf.array());
    replacedertEquals(getColName(1), expectedVal2, returnVal);
    buf = (ByteBuffer) record.get(getColName(2));
    returnVal = new String(buf.array());
    replacedertEquals(getColName(2), expectedVal3, returnVal);
}

15 View Complete Implementation : FileManager.java
Copyright BSD 2-Clause "Simplified" License
Author : pyvandenbussche
public <V> List<V> getResults(Endpoint ep, Clreplaced<V> cls) {
    List<V> l = new ArrayList<V>();
    File f = getFile(ep, cls.getSimpleName());
    DatumReader<V> reader = new SpecificDatumReader<V>(cls);
    try {
        DataFileReader<V> dfr = new DataFileReader<V>(f, reader);
        while (dfr.hasNext()) {
            l.add(dfr.next());
        }
    } catch (IOException e) {
        Object[] t = { e.getClreplaced().getSimpleName(), e.getMessage(), cls.getSimpleName(), ep.getUri().toString() };
        log.error("{}:{} during deserialisation of {} results for {}", t);
    }
    Object[] t = { l.size(), cls.getSimpleName(), ep.getUri().toString() };
    log.info("Deserialised {} {} results for {}", t);
    return l;
}

15 View Complete Implementation : Converter.java
Copyright Apache License 2.0
Author : elodina
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);
    GenericRecord record = dataFileReader.next();
    Doreplacedent doc;
    try {
        doc = DoreplacedentBuilderFactory.newInstance().newDoreplacedentBuilder().newDoreplacedent();
    } catch (ParserConfigurationException e) {
        throw new RuntimeException(e);
    }
    Element el = unwrapElement(record, doc);
    doc.appendChild(el);
    saveDoreplacedent(doc, xmlFile);
}

15 View Complete Implementation : AvroSinkTest.java
Copyright Apache License 2.0
Author : hazelcast
private <R> void checkFileContent(DatumReader<R> datumReader) throws IOException {
    File[] files = directory.listFiles();
    replacedertNotNull(files);
    replacedertEquals(1, files.length);
    int[] count = { 0 };
    try (DataFileReader<R> reader = new DataFileReader<>(files[0], datumReader)) {
        reader.forEach(datum -> count[0]++);
    }
    replacedertEquals(TOTAL_RECORD_COUNT, count[0]);
}

15 View Complete Implementation : AvroExternalTable.java
Copyright Apache License 2.0
Author : apache
private Schema getSchemaFromAvroDataFile() throws IOException {
    String firstDataFilePath = HdfsReader.getFirstDataFilePathInDir(this.dataLocationInHdfs);
    LOG.info("Extracting schema for table " + this.name + " from avro data file " + firstDataFilePath);
    SeekableInput sin = new HdfsReader(firstDataFilePath).getFsInput();
    try (DataFileReader<Void> dfr = new DataFileReader<>(sin, new GenericDatumReader<Void>())) {
        Schema schema = dfr.getSchema();
        return schema;
    }
}

15 View Complete Implementation : LobAvroImportTestCase.java
Copyright Apache License 2.0
Author : dkhadoop
/**
 * Import blob data that is smaller than inline lob limit. Blob data
 * should be saved as Avro bytes.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobAvroImportInline() throws IOException, SQLException {
    String[] types = { getBlobType() };
    String expectedVal = "This is short BLOB data";
    String[] vals = { getBlobInsertStr(expectedVal) };
    createTableWithColTypes(types, vals);
    runImport(getArgv());
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record = reader.next();
    // Verify that blob data is imported as Avro bytes.
    ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
    String returnVal = new String(buf.array());
    replacedertEquals(getColName(0), expectedVal, returnVal);
}

15 View Complete Implementation : TestAvroImport.java
Copyright Apache License 2.0
Author : apache
private void checkRecordWithExtraArgs(String[] extraArgs, String tableName) throws IOException {
    String date = "2017-01-19";
    String timeStamp = "2017-01-19 14:47:57.112000";
    String[] names = { "INTFIELD1", "DATA_#_COL0", "DATA#COL1", "DATA___COL2" };
    String[] types = { "INT", "DATE", "TIMESTAMP", "DECIMAL(2,20)" };
    String[] vals = { "1", "{ts \'" + date + "\'}", "{ts \'" + timeStamp + "\'}", "2e20" };
    String[] checkNames = { "INTFIELD1", "DATA___COL0", "DATA_COL1", "DATA___COL2" };
    setCurTableName(tableName);
    createTableWithColTypesAndNames(names, types, vals);
    runImport(getOutputArgv(true, extraArgs));
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record = reader.next();
    for (String columnName : checkNames) {
        replacedertNotNull(record.get(columnName));
    }
    removeTableDir();
}

15 View Complete Implementation : ProtoGetSchemaTool.java
Copyright Apache License 2.0
Author : spotify
@Override
public int run(InputStream in, PrintStream out, PrintStream err, List<String> args) throws Exception {
    if (args.size() != 1) {
        err.println("Expected 1 argument: input_file");
        return 1;
    }
    DataFileReader<Void> reader = new DataFileReader<>(Util.openSeekableFromFS(args.get(0)), new GenericDatumReader<Void>());
    out.println(reader.getMetaString("protobuf.generic.schema"));
    return 0;
}

15 View Complete Implementation : LobAvroImportTestCase.java
Copyright Apache License 2.0
Author : aliyun
/**
 * Import blob data that is smaller than inline lob limit. Blob data
 * should be saved as Avro bytes.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobAvroImportInline() throws IOException, SQLException {
    String[] types = { getBlobType() };
    String expectedVal = "This is short BLOB data";
    String[] vals = { getBlobInsertStr(expectedVal) };
    createTableWithColTypes(types, vals);
    runImport(getArgv());
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record = reader.next();
    // Verify that blob data is imported as Avro bytes.
    ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
    String returnVal = new String(buf.array());
    replacedertEquals(getColName(0), expectedVal, returnVal);
}

14 View Complete Implementation : TestAvroImport.java
Copyright Apache License 2.0
Author : apache
@Test
public void testNullableAvroImport() throws IOException, SQLException {
    String[] types = { "INT" };
    String[] vals = { null };
    createTableWithColTypes(types, vals);
    runImport(getOutputArgv(true, null));
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record1 = reader.next();
    replacedertNull(record1.get("DATA_COL0"));
}

14 View Complete Implementation : LobAvroImportTestCase.java
Copyright Apache License 2.0
Author : apache
/**
 * Import multiple columns of blob data. Blob data should be saved as Avro
 * bytes.
 * @throws IOException
 * @throws SQLException
 */
@Test
public void testBlobAvroImportMultiCols() throws IOException, SQLException {
    String[] types = { getBlobType(), getBlobType(), getBlobType() };
    String expectedVal1 = "This is short BLOB data1";
    String expectedVal2 = "This is short BLOB data2";
    String expectedVal3 = "This is short BLOB data3";
    String[] vals = { getBlobInsertStr(expectedVal1), getBlobInsertStr(expectedVal2), getBlobInsertStr(expectedVal3) };
    createTableWithColTypes(types, vals);
    runImport(getArgv());
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record = reader.next();
    // Verify that all columns are imported correctly.
    ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
    String returnVal = new String(buf.array());
    replacedertEquals(getColName(0), expectedVal1, returnVal);
    buf = (ByteBuffer) record.get(getColName(1));
    returnVal = new String(buf.array());
    replacedertEquals(getColName(1), expectedVal2, returnVal);
    buf = (ByteBuffer) record.get(getColName(2));
    returnVal = new String(buf.array());
    replacedertEquals(getColName(2), expectedVal3, returnVal);
}

14 View Complete Implementation : SinkAvroTest.java
Copyright Mozilla Public License 2.0
Author : mozilla
private GenericRecord readRecord(AvroSchemaStore store, String outputPath, String schemaPath) throws IOException, SchemaNotFoundException {
    Path path = getPath(outputPath, schemaPath);
    String[] p = schemaPath.split("\\.");
    String schemaStorePath = String.format("%s/%s/%s.%s.avro.json", p[0], p[1], p[1], p[2]);
    Schema schema = store.getSchema(schemaStorePath);
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
    try (DataFileReader<GenericRecord> fileReader = new DataFileReader<>(path.toFile(), datumReader)) {
        return (GenericRecord) fileReader.next();
    }
}

14 View Complete Implementation : AvroTestUtils.java
Copyright Apache License 2.0
Author : apache
private static void readAndVerify(String[] expectedResults, Configuration conf, Path outputFile) {
    try (DataFileReader<GenericRecord> reader = read(outputFile, conf)) {
        GenericRecord record;
        if (!reader.hasNext() && expectedResults != null && expectedResults.length > 0) {
            fail("Empty file was not expected");
        }
        int i = 0;
        while (reader.hasNext()) {
            record = reader.next();
            replacedertEquals(expectedResults[i++], record.toString());
        }
        if (expectedResults != null && expectedResults.length > i) {
            fail("More output data was expected");
        }
    } catch (IOException ioe) {
        LOG.error("Issue with verifying the output", ioe);
        throw new RuntimeException(ioe);
    }
}

14 View Complete Implementation : AvroPipelineTest.java
Copyright Apache License 2.0
Author : apache
private List<GenericRecord> readGenericFile() throws IOException {
    List<GenericRecord> records = Lists.newArrayList();
    GenericDatumReader<GenericRecord> genericDatumReader = new GenericDatumReader<>();
    try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputFile + "-00000-of-00001"), genericDatumReader)) {
        for (GenericRecord record : dataFileReader) {
            records.add(record);
        }
    }
    return records;
}

14 View Complete Implementation : Purge.java
Copyright Apache License 2.0
Author : linkedin
private void loadMembersToPurge(String filename) throws IOException {
    // TODO: "memberId" column name should be configurable
    DataFileReader<GenericRecord> dataFileReader = createDataFileReader(filename, true);
    while (dataFileReader.hasNext()) {
        GenericRecord record = dataFileReader.next();
        Integer memberId = (Integer) record.get("memberId");
        if (memberId == null) {
            throw new NullPointerException("memberId is null");
        }
        membersToPurge.add(((Number) record.get("memberId")).intValue());
    }
    dataFileReader.close();
}

14 View Complete Implementation : AvroWordCountTest.java
Copyright Apache License 2.0
Author : tomslabs
private static void fill(File file, Map<String, Integer> results) throws Throwable {
    DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(file, new GenericDatumReader<GenericRecord>());
    while (reader.hasNext()) {
        GenericRecord record = reader.next();
        String word = record.get("key").toString();
        int count = (Integer) record.get("value");
        results.put(word, count);
    }
}

14 View Complete Implementation : AvroFsHelper.java
Copyright Apache License 2.0
Author : apache
public Schema getAvroSchema(String file) throws FileBasedHelperException {
    DataFileReader<GenericRecord> dfr = null;
    try {
        if (this.getState().getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) {
            dfr = new DataFileReader<>(new ProxyFsInput(new Path(file), this.getFileSystem()), new GenericDatumReader<GenericRecord>());
        } else {
            dfr = new DataFileReader<>(new FsInput(new Path(file), this.getFileSystem().getConf()), new GenericDatumReader<GenericRecord>());
        }
        return dfr.getSchema();
    } catch (IOException e) {
        throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e);
    } finally {
        if (dfr != null) {
            try {
                dfr.close();
            } catch (IOException e) {
                LOGGER.error("Failed to close avro file " + file, e);
            }
        }
    }
}

14 View Complete Implementation : DefaultGroupCheckpointStore.java
Copyright Apache License 2.0
Author : snuspl
@Override
public List<AvroDag> loadSavedQueries(final List<String> queryIdList) throws IOException {
    final List<AvroDag> savedQueries = new ArrayList<>();
    for (final String queryId : queryIdList) {
        final File storedFile = getQueryStoreFile(queryId);
        final DataFileReader<AvroDag> dataFileReader = new DataFileReader<>(storedFile, avroDagDatumReader);
        AvroDag avroDag = null;
        avroDag = dataFileReader.next(avroDag);
        savedQueries.add(avroDag);
    }
    return savedQueries;
}

14 View Complete Implementation : MemberServerConsumer.java
Copyright Apache License 2.0
Author : lrtdc
/**
 * 通过Java工具来生成必要的类,进行反序列化操作
 *
 * @throws IOException
 */
public void MemberInfoToolsDeser() throws IOException {
    // 1.构建反序列化读取对象
    DatumReader<Members> mDr = new SpecificDatumReader<Members>(Members.clreplaced);
    DataFileReader<Members> mDfr = new DataFileReader<Members>(new File("E:/avro/members.avro"), mDr);
    Members m = null;
    // 2.循环读取文件数据
    while (mDfr.hasNext()) {
        m = mDfr.next();
        System.err.println("tools deser data :" + m);
    }
    // 3.关闭读取对象
    mDfr.close();
    System.out.println("Tools Builder Ser Start Complete.");
}

14 View Complete Implementation : LobAvroImportTestCase.java
Copyright Apache License 2.0
Author : apache
/**
 * Import blob data that is smaller than inline lob limit. Blob data
 * should be saved as Avro bytes.
 * @throws IOException
 * @throws SQLException
 */
@Test
public void testBlobAvroImportInline() throws IOException, SQLException {
    String[] types = { getBlobType() };
    String expectedVal = "This is short BLOB data";
    String[] vals = { getBlobInsertStr(expectedVal) };
    createTableWithColTypes(types, vals);
    runImport(getArgv());
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record = reader.next();
    // Verify that blob data is imported as Avro bytes.
    ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
    String returnVal = new String(buf.array());
    replacedertEquals(getColName(0), expectedVal, returnVal);
}

14 View Complete Implementation : TestAvro.java
Copyright Apache License 2.0
Author : lrtdc
public void deserUserDynamic() {
    Schema schema = null;
    try {
        schema = new Schema.Parser().parse(new File("/Users/a/Desktop/tmp/user.avsc"));
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Deserialize users from disk
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    File file = new File("/Users/a/Desktop/tmp/userDyn.avro");
    DataFileReader<GenericRecord> dataFileReader = null;
    GenericRecord user = null;
    try {
        dataFileReader = new DataFileReader<GenericRecord>(file, datumReader);
        while (dataFileReader.hasNext()) {
            // Reuse user object by preplaceding it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            user = dataFileReader.next(user);
            System.out.println(user);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

13 View Complete Implementation : TestMergeContent.java
Copyright Apache License 2.0
Author : wangrenlei
private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException {
    // create a reader for the merged contet
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
    SeekableByteArrayInput input = new SeekableByteArrayInput(data);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader);
    // read all the records into a map to verify all the records are there
    Map<String, GenericRecord> records = new HashMap<>();
    while (dataFileReader.hasNext()) {
        GenericRecord user = dataFileReader.next();
        records.put(user.get(key).toString(), user);
    }
    return records;
}

13 View Complete Implementation : DefaultGroupCheckpointStore.java
Copyright Apache License 2.0
Author : snuspl
@Override
public GroupCheckpoint loadSavedGroupState(final String groupId) throws IOException {
    // Load the file.
    final File storedFile = getGroupCheckpointFile(groupId);
    final DataFileReader<GroupCheckpoint> dataFileReader = new DataFileReader<>(storedFile, groupCheckpointDatumReader);
    GroupCheckpoint mgc = null;
    mgc = dataFileReader.next(mgc);
    if (mgc != null) {
        LOG.log(Level.INFO, "Checkpoint file found. groupId is " + groupId);
    } else {
        LOG.log(Level.WARNING, "Checkpoint file not found or error during loading. groupId is " + groupId);
    }
    return mgc;
}

13 View Complete Implementation : HDFSUtils.java
Copyright Apache License 2.0
Author : Khalian
/**
 * Utility method to read from HDFS Store a Modulo7 song object
 *
 * @param path
 * @throws IOException
 */
private Song m7HDFSRead(final String path) throws IOException {
    Path hdfsPath = new Path(path);
    Configuration config = new Configuration();
    SeekableInput input = new FsInput(hdfsPath, config);
    DatumReader<Song> reader = new ReflectDatumReader<>(Song.clreplaced);
    DataFileReader<Song> dataFileReader = new DataFileReader<>(input, reader);
    for (Song song : dataFileReader) {
        return song;
    }
    // also closes underlying FsInput
    dataFileReader.close();
    return null;
}

12 View Complete Implementation : Purge.java
Copyright Apache License 2.0
Author : linkedin
private void purge(String src, String dst) throws IOException {
    DataFileReader<GenericRecord> dataFileReader = createDataFileReader(src, false);
    DataFileWriter<GenericRecord> writer = createDataFileWriter(dataFileReader);
    numRecords = 0;
    recordsPurged = 0;
    remainingRecords = 0;
    // Copy
    while (dataFileReader.hasNext()) {
        numRecords++;
        GenericRecord record = dataFileReader.next();
        if (record == null) {
            continue;
        }
        Number column = (Number) record.get(columnName);
        if ((column == null) || (!membersToPurge.contains(column.intValue()))) {
            remainingRecords++;
            writer.append(record);
        }
    }
    recordsPurged = numRecords - remainingRecords;
    writer.close();
    dataFileReader.close();
}

12 View Complete Implementation : FsUtils.java
Copyright Apache License 2.0
Author : gbif
/**
 * If a file is too small (less than 3Kb), checks any records inside, if the file is empty, removes it
 */
@SneakyThrows
public static boolean deleteAvroFileIfEmpty(FileSystem fs, Path path) {
    if (!fs.exists(path)) {
        return true;
    }
    if (fs.getFileStatus(path).getLen() > FILE_LIMIT_SIZE) {
        return false;
    }
    SpecificDatumReader<ExtendedRecord> datumReader = new SpecificDatumReader<>(ExtendedRecord.clreplaced);
    try (AvroFSInput input = new AvroFSInput(fs.open(path), fs.getFileStatus(path).getLen());
        DataFileReader<ExtendedRecord> dataFileReader = new DataFileReader<>(input, datumReader)) {
        if (!dataFileReader.hasNext()) {
            log.warn("File is empty - {}", path);
            Path parent = path.getParent();
            fs.delete(parent, true);
            Path subParent = parent.getParent();
            if (!fs.listFiles(subParent, true).hasNext()) {
                fs.delete(subParent, true);
            }
            return true;
        }
        return false;
    }
}

12 View Complete Implementation : LobAvroImportTestCase.java
Copyright Apache License 2.0
Author : aliyun
/**
 * Import blob data that is smaller than inline lob limit and compress with
 * deflate codec. Blob data should be encoded and saved as Avro bytes.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobCompressedAvroImportInline() throws IOException, SQLException {
    String[] types = { getBlobType() };
    String expectedVal = "This is short BLOB data";
    String[] vals = { getBlobInsertStr(expectedVal) };
    createTableWithColTypes(types, vals);
    runImport(getArgv("--compression-codec", CodecMap.DEFLATE));
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record = reader.next();
    // Verify that the data block of the Avro file is compressed with deflate
    // codec.
    replacedertEquals(CodecMap.DEFLATE, reader.getMetaString(DataFileConstants.CODEC));
    // Verify that all columns are imported correctly.
    ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
    String returnVal = new String(buf.array());
    replacedertEquals(getColName(0), expectedVal, returnVal);
}

12 View Complete Implementation : LobAvroImportTestCase.java
Copyright Apache License 2.0
Author : dkhadoop
/**
 * Import blob data that is smaller than inline lob limit and compress with
 * deflate codec. Blob data should be encoded and saved as Avro bytes.
 * @throws IOException
 * @throws SQLException
 */
public void testBlobCompressedAvroImportInline() throws IOException, SQLException {
    String[] types = { getBlobType() };
    String expectedVal = "This is short BLOB data";
    String[] vals = { getBlobInsertStr(expectedVal) };
    createTableWithColTypes(types, vals);
    runImport(getArgv("--compression-codec", CodecMap.DEFLATE));
    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
    DataFileReader<GenericRecord> reader = read(outputFile);
    GenericRecord record = reader.next();
    // Verify that the data block of the Avro file is compressed with deflate
    // codec.
    replacedertEquals(CodecMap.DEFLATE, reader.getMetaString(DataFileConstants.CODEC));
    // Verify that all columns are imported correctly.
    ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
    String returnVal = new String(buf.array());
    replacedertEquals(getColName(0), expectedVal, returnVal);
}