org.apache.spark.SparkConf - java examples

Here are the examples of the java api org.apache.spark.SparkConf taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

155 Examples 7

19 View Complete Implementation : JavaNaiveBayesExample.java
Copyright MIT License
Author : huangyueranbbc
public static void main(String[] args) {
    SparkConf sparkConf = new SparkConf().setAppName("JavaNaiveBayesExample");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
    // $example on$
    String path = "data/mllib/sample_libsvm_data.txt";
    JavaRDD<LabeledPoint> inputData = MLUtils.loadLibSVMFile(jsc.sc(), path).toJavaRDD();
    JavaRDD<LabeledPoint>[] tmp = inputData.randomSplit(new double[] { 0.6, 0.4 });
    // training set
    JavaRDD<LabeledPoint> training = tmp[0];
    // test set
    JavaRDD<LabeledPoint> test = tmp[1];
    final NaiveBayesModel model = NaiveBayes.train(training.rdd(), 1.0);
    JavaPairRDD<Double, Double> predictionAndLabel = test.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {

        @Override
        public Tuple2<Double, Double> call(LabeledPoint p) {
            return new Tuple2<>(model.predict(p.features()), p.label());
        }
    });
    double accuracy = predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {

        @Override
        public Boolean call(Tuple2<Double, Double> pl) {
            return pl._1().equals(pl._2());
        }
    }).count() / (double) test.count();
    // Save and load model
    model.save(jsc.sc(), "target/tmp/myNaiveBayesModel");
    NaiveBayesModel sameModel = NaiveBayesModel.load(jsc.sc(), "target/tmp/myNaiveBayesModel");
    // $example off$
    jsc.stop();
}

19 View Complete Implementation : MmtfWriterTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(MmtfWriterTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : MmtfImporterTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(MmtfImporterTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : StructureToProteinDimersTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(StructureToProteinDimersTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : PolymerInteractionFingerprintTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PolymerInteractionFingerprintTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : StructureToBioassemblyTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(StructureToBioreplacedemblyTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : ColumnarStructureXTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ColumnarStructureTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : JavaGradientBoostingClassificationExample.java
Copyright MIT License
Author : huangyueranbbc
public static void main(String[] args) {
    // $example on$
    SparkConf sparkConf = new SparkConf().setAppName("JavaGradientBoostedTreesClreplacedificationExample");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
    // Load and parse the data file.
    String datapath = "data/mllib/sample_libsvm_data.txt";
    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
    // Split the data into training and test sets (30% held out for testing)
    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[] { 0.7, 0.3 });
    JavaRDD<LabeledPoint> trainingData = splits[0];
    JavaRDD<LabeledPoint> testData = splits[1];
    // Train a GradientBoostedTrees model.
    // The defaultParams for Clreplacedification use LogLoss by default.
    BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Clreplacedification");
    // Note: Use more iterations in practice.
    boostingStrategy.setNumIterations(3);
    boostingStrategy.getTreeStrategy().setNumClreplacedes(2);
    boostingStrategy.getTreeStrategy().setMaxDepth(5);
    // Empty categoricalFeaturesInfo indicates all features are continuous.
    Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
    boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo);
    final GradientBoostedTreesModel model = GradientBoostedTrees.train(trainingData, boostingStrategy);
    // Evaluate model on test instances and compute test error
    JavaPairRDD<Double, Double> predictionAndLabel = testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {

        @Override
        public Tuple2<Double, Double> call(LabeledPoint p) {
            return new Tuple2<>(model.predict(p.features()), p.label());
        }
    });
    Double testErr = 1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {

        @Override
        public Boolean call(Tuple2<Double, Double> pl) {
            return !pl._1().equals(pl._2());
        }
    }).count() / testData.count();
    System.out.println("Test Error: " + testErr);
    System.out.println("Learned clreplacedification GBT model:\n" + model.toDebugString());
    // Save and load model
    model.save(jsc.sc(), "target/tmp/myGradientBoostingClreplacedificationModel");
    GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(jsc.sc(), "target/tmp/myGradientBoostingClreplacedificationModel");
    // $example off$
    jsc.stop();
}

19 View Complete Implementation : CoordinationGeometryTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ColumnarStructureTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    List<String> pdbIds = Arrays.asList("5Y20");
    pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc);
}

19 View Complete Implementation : StructureToBioJavaTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(StructureToBioJavaTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : CustomReportServiceTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(AdvancedQueryTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : SparkUtils.java
Copyright MIT License
Author : huangyueranbbc
public static JavaSparkContext getRemoteSparkContext(Clreplaced clazz) {
    System.setProperty("HADOOP_USER_NAME", "root");
    /**
     * SparkConf:第一步创建一个SparkConf,在这个对象里面可以设置允许模式Local Standalone yarn
     * AppName(可以在Web UI中看到) 还可以设置Spark运行时的资源要求
     */
    SparkConf conf = getRemoteSparkConf(clazz);
    /**
     * 基于SparkConf的对象可以创建出来一个SparkContext Spark上下文
     * SparkContext是通往集群的唯一通道,SparkContext在创建的时候还会创建任务调度器
     */
    return new JavaSparkContext(conf);
}

19 View Complete Implementation : SparkUtils.java
Copyright MIT License
Author : huangyueranbbc
public static JavaSparkContext getLocalSparkContext(Clreplaced clazz) {
    System.setProperty("HADOOP_USER_NAME", "root");
    /**
     * SparkConf:第一步创建一个SparkConf,在这个对象里面可以设置允许模式Local Standalone yarn
     * AppName(可以在Web UI中看到) 还可以设置Spark运行时的资源要求
     */
    SparkConf conf = getLocalSparkConf(clazz);
    /**
     * 基于SparkConf的对象可以创建出来一个SparkContext Spark上下文
     * SparkContext是通往集群的唯一通道,SparkContext在创建的时候还会创建任务调度器
     */
    return new JavaSparkContext(conf);
}

19 View Complete Implementation : MmtfReaderTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(MmtfReaderTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : StructureToPolymerChainsTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(StructureToPolymerChainsTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : StructureToCathDomainsTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(StructureToCathDomainsTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
}

19 View Complete Implementation : JavaBisectingKMeansExample.java
Copyright MIT License
Author : huangyueranbbc
public static void main(String[] args) {
    SparkConf sparkConf = new SparkConf().setAppName("JavaBisectingKMeansExample");
    JavaSparkContext sc = new JavaSparkContext(sparkConf);
    // $example on$
    ArrayList<Vector> localData = Lists.newArrayList(Vectors.dense(0.1, 0.1), Vectors.dense(0.3, 0.3), Vectors.dense(10.1, 10.1), Vectors.dense(10.3, 10.3), Vectors.dense(20.1, 20.1), Vectors.dense(20.3, 20.3), Vectors.dense(30.1, 30.1), Vectors.dense(30.3, 30.3));
    JavaRDD<Vector> data = sc.parallelize(localData, 2);
    BisectingKMeans bkm = new BisectingKMeans().setK(4);
    BisectingKMeansModel model = bkm.run(data);
    System.out.println("Compute Cost: " + model.computeCost(data));
    Vector[] clusterCenters = model.clusterCenters();
    for (int i = 0; i < clusterCenters.length; i++) {
        Vector clusterCenter = clusterCenters[i];
        System.out.println("Cluster Center " + i + ": " + clusterCenter);
    }
    // $example off$
    sc.stop();
}

19 View Complete Implementation : JavaSqlNetworkWordCount.java
Copyright MIT License
Author : huangyueranbbc
public static SparkSession getInstance(SparkConf sparkConf) {
    if (instance == null) {
        instance = SparkSession.builder().config(sparkConf).getOrCreate();
    }
    return instance;
}

19 View Complete Implementation : JavaGradientBoostingRegressionExample.java
Copyright MIT License
Author : huangyueranbbc
public static void main(String[] args) {
    // $example on$
    SparkConf sparkConf = new SparkConf().setAppName("JavaGradientBoostedTreesRegressionExample");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
    // Load and parse the data file.
    String datapath = "data/mllib/sample_libsvm_data.txt";
    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
    // Split the data into training and test sets (30% held out for testing)
    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[] { 0.7, 0.3 });
    JavaRDD<LabeledPoint> trainingData = splits[0];
    JavaRDD<LabeledPoint> testData = splits[1];
    // Train a GradientBoostedTrees model.
    // The defaultParams for Regression use SquaredError by default.
    BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Regression");
    // Note: Use more iterations in practice.
    boostingStrategy.setNumIterations(3);
    boostingStrategy.getTreeStrategy().setMaxDepth(5);
    // Empty categoricalFeaturesInfo indicates all features are continuous.
    Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
    boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo);
    final GradientBoostedTreesModel model = GradientBoostedTrees.train(trainingData, boostingStrategy);
    // Evaluate model on test instances and compute test error
    JavaPairRDD<Double, Double> predictionAndLabel = testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {

        @Override
        public Tuple2<Double, Double> call(LabeledPoint p) {
            return new Tuple2<>(model.predict(p.features()), p.label());
        }
    });
    Double testMSE = predictionAndLabel.map(new Function<Tuple2<Double, Double>, Double>() {

        @Override
        public Double call(Tuple2<Double, Double> pl) {
            Double diff = pl._1() - pl._2();
            return diff * diff;
        }
    }).reduce(new Function2<Double, Double, Double>() {

        @Override
        public Double call(Double a, Double b) {
            return a + b;
        }
    }) / data.count();
    System.out.println("Test Mean Squared Error: " + testMSE);
    System.out.println("Learned regression GBT model:\n" + model.toDebugString());
    // Save and load model
    model.save(jsc.sc(), "target/tmp/myGradientBoostingRegressionModel");
    GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(jsc.sc(), "target/tmp/myGradientBoostingRegressionModel");
    // $example off$
    jsc.stop();
}

18 View Complete Implementation : ReleaseDateFilterTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ReleaseDateFilterTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 1O6Y: released on 2003-01-30
    // 4MYA: released on 2014-01-01
    // 3VCO: released on 2013-03-06
    // 5N0Y: released on 2017-05-24
    List<String> pdbIds = Arrays.asList("1O6Y", "4MYA", "3VCO", "5N0Y");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : ContainsRnaChainTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsRnaChainTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and DNA chain
    // 5L2G: DNA chain
    // 2MK1: D-saccharide
    // 5UX0: 2 L-protein, 2 RNA, 2 DNA chains
    // 2NCQ: 2 RNA chains
    List<String> pdbIds = Arrays.asList("2ONX", "1JLP", "5X6H", "5L2G", "2MK1", "5UX0", "2NCQ");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : RepartitionHadoopSequenceFile.java
Copyright Apache License 2.0
Author : sbl-sdsc
/**
 * Reparations an MMTF-Hadoop Sequence file.
 *
 * @param args
 *            args[0] path to input Hadoop Sequence file, args[1] path to
 *            output Hadoop Sequence File, args[3] number of parreplacedions
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ReparreplacedionHadoopSequenceFile.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    long start = System.nanoTime();
    if (args.length != 3) {
        System.out.println("Usage: ReparreplacedionHadoopSequenceFile <input-path> <ouput-path> <number-of-parreplacedions>");
    }
    String inputPath = args[0];
    String outputPath = args[1];
    int numParreplacedions = Integer.parseInt(args[2]);
    JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readSequenceFile(inputPath, sc);
    pdb = pdb.reparreplacedion(numParreplacedions);
    MmtfWriter.writeSequenceFile(outputPath, sc, pdb);
    long end = System.nanoTime();
    System.out.println("Time: " + TimeUnit.NANOSECONDS.toSeconds(end - start) + " sec.");
    sc.close();
}

18 View Complete Implementation : RworkFilterTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(RworkFilterTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: 0.172 rwork x-ray resolution
    // 2OLX: 0.183 rfree x-ray resolution
    // 3REC: n/a NMR structure
    // 1LU3: n/a EM structure
    List<String> pdbIds = Arrays.asList("2ONX", "2OLX", "3REC", "1LU3");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : DepositionDateFilterTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(DepositionDateFilterTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 4MYA: deposited on 2013-09-27
    // 1O6Y: deposited on 2002-10-21
    // 3VCO: deposited on 2012-01-04
    // 5N0Y: deposited on 2017-02-03
    List<String> pdbIds = Arrays.asList("4MYA", "1O6Y", "3VCO", "5N0Y");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : WildTypeTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(WildTypeTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 1PEN wildtype query 100 matches: 1PEN:1
    // 1OCZ two enreplacedies wildtype query 100 matches: 1OCZ:1, 1OCZ:2
    // 2ONX structure result for author query
    List<String> pdbIds = Arrays.asList("1PEN", "1OCZ", "2ONX");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : SequenceSimilarityDemo.java
Copyright Apache License 2.0
Author : sbl-sdsc
/**
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(SequenceSimilarityDemo.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    String sequence = "NLVQFGVMIEKMTGKSALQYNDYGCYCGIGGSHWPVDQ";
    double eValueCutoff = 0.001;
    int sequenceIdenreplacedyCutoff = 40;
    boolean maskLowComplexity = true;
    // read PDB in MMTF format, split into polymer chains,
    // search by sequence similarity, and print sequences found
    MmtfReader.readReducedSequenceFile(sc).flatMapToPair(new StructureToPolymerChains(false, true)).filter(new SequenceSimilarity(sequence, SequenceSimilarity.BLAST, eValueCutoff, sequenceIdenreplacedyCutoff, maskLowComplexity)).foreach(t -> System.out.println(t._1 + ": " + t._2.getEnreplacedySequence(0)));
    sc.close();
}

18 View Complete Implementation : NotFilterExample.java
Copyright Apache License 2.0
Author : sbl-sdsc
public static void main(String[] args) throws FileNotFoundException {
    String path = MmtfReader.getMmtfReducedPath();
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(NotFilterExample.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    long count = MmtfReader.readSequenceFile(path, // read MMTF hadoop sequence file
    sc).filter(// retain pdb entries that exclusively contain L-peptide chains
    new ContainsLProteinChain()).filter(// should not contain any DNA chains
    new NotFilter(new ContainsDnaChain())).count();
    System.out.println("# PDB entries with L-protein and without DNA chains: " + count);
    sc.close();
}

18 View Complete Implementation : PolymerCompositionTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PolymerCompositionTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and DNA chain (with std. nucleotides)
    // 5L2G: DNA chain (with non-std. nucleotide)
    // 2MK1: D-saccharide
    // 5UZT: RNA chain (with std. nucleotides)
    // 1AA6: contains SEC, selenocysteine (21st amino acid)
    // 1NTH: contains PYL, pyrrolysine (22nd amino acid)
    List<String> pdbIds = Arrays.asList("2ONX", "1JLP", "5X6H", "5L2G", "2MK1", "5UZT", "1AA6", "1NTH");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : ContainsLProteinChainTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsLProteinChainTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and DNA chain
    // 5L2G: DNA chain
    // 2MK1: D-saccharide
    List<String> pdbIds = Arrays.asList("2ONX", "1JLP", "5X6H", "5L2G", "2MK1");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : ContainsDProteinChainTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsDProteinChainTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and DNA chain
    // 5L2G: DNA chain
    // 2MK1: D-saccharide
    // 2V5W: Chain C: GLY-GLY-GLY matches both D-protein and L-protein
    // 5XDP: L-protein and D-protein (modified)
    // 5GOD: 2 L-protein + 2 D-protein
    List<String> pdbIds = Arrays.asList("2ONX", "1JLP", "5X6H", "5L2G", "2MK1", "2V5W", "5XDP", "5GOD");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : FilterProteinDnaComplexes.java
Copyright Apache License 2.0
Author : sbl-sdsc
public static void main(String[] args) throws FileNotFoundException {
    String path = MmtfReader.getMmtfReducedPath();
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(FilterProteinDnaComplexes.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    long count = MmtfReader.readSequenceFile(path, // read MMTF hadoop sequence file
    sc).filter(// retain pdb entries that contain L-peptide chains
    new ContainsLProteinChain()).filter(// retain pdb entries that contain L-Dna chains
    new ContainsDnaChain()).filter(// filter out an RNA containing entries
    new NotFilter(new ContainsRnaChain())).count();
    System.out.println("# L-peptide/DNA complexes: " + count);
    sc.close();
}

18 View Complete Implementation : AuthorSearchDemo.java
Copyright Apache License 2.0
Author : sbl-sdsc
public static void main(String[] args) throws IOException {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(AuthorSearchDemo.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    // query to find PDB structures for Doudna, J.A. as a deposition (audit) author
    // or as an author in the primary PDB citation
    String sqlQuery = "SELECT pdbid from audit_author " + "WHERE name LIKE 'Doudna%J.A.%' " + "UNION " + "SELECT pdbid from citation_author " + "WHERE citation_id = 'primary' AND name LIKE 'Doudna%J.A.%'";
    // read PDB and filter by author
    JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readReducedSequenceFile(sc).filter(new PdbjMineSearch(sqlQuery));
    System.out.println("Number of entries matching query: " + pdb.count());
    sc.close();
}

18 View Complete Implementation : KinaseSearch.java
Copyright Apache License 2.0
Author : sbl-sdsc
public static void main(String[] args) throws IOException {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(KinaseSearch.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    // query for human protein-serine/threonine kinases using SIFTS data
    String sql = "SELECT t.pdbid, t.chain FROM sifts.pdb_chain_taxonomy AS t  " + "JOIN sifts.pdb_chain_enzyme AS e ON (t.pdbid = e.pdbid AND t.chain = e.chain) " + "WHERE t.scientific_name = 'replaced sapiens' AND e.ec_number = '2.7.11.1'";
    // read PDB in MMTF format, split into polymer chains and search using
    // PdbJMineSearch
    JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readReducedSequenceFile(sc).flatMapToPair(new StructureToPolymerChains()).filter(new PdbjMineSearch(sql));
    System.out.println("Number of entries matching query: " + pdb.count());
    sc.close();
}

18 View Complete Implementation : ContainsDSaccharideChainTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsDSaccharideChainTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and L-DNA chain
    // 5L2G: L-DNA chain
    // 2MK1: As of V5 of PDBx/mmCIF, saccharides seem to be represented as monomers,
    // instead of polysaccharides, so none of these tests returns true anymore.
    List<String> pdbIds = Arrays.asList("2ONX", "1JLP", "5X6H", "5L2G", "2MK1");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : KeywordSearch.java
Copyright Apache License 2.0
Author : sbl-sdsc
public static void main(String[] args) throws IOException {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(KeywordSearch.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    String sqlQuery = "SELECT pdbid from keyword_search('porin')";
    // read PDB and filter by keyword search
    JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.readReducedSequenceFile(sc).filter(new PdbjMineSearch(sqlQuery));
    pdb.keys().foreach(k -> System.out.println(k));
    System.out.println("Number of entries matching query: " + pdb.count());
    sc.close();
}

18 View Complete Implementation : WildTypeQuery.java
Copyright Apache License 2.0
Author : sbl-sdsc
/**
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(WildTypeQuery.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    boolean includeExpressionTags = true;
    int sequenceCoverage = 95;
    long count = MmtfReader.readReducedSequenceFile(sc).filter(new WildType(includeExpressionTags, sequenceCoverage)).count();
    System.out.println(count);
    sc.close();
}

18 View Complete Implementation : OrFilterTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(OrFilterTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and non-std. DNA chain
    // 5L2G: DNA chain
    // 2MK1: D-saccharide
    // 5UZT: RNA chain (with std. nucleotides)
    // 1AA6: contains SEC, selenocysteine (21st amino acid)
    // 1NTH: contains PYL, pyrrolysine (22nd amino acid)
    List<String> pdbIds = Arrays.asList("2ONX", "1JLP", "5X6H", "5L2G", "2MK1", "5UZT", "1AA6", "1NTH");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : ContainsGroupTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsGroupTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 1STP: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and DNA chain
    // 5L2G: DNA chain
    // 2MK1: D-saccharide
    List<String> pdbIds = Arrays.asList("1STP", "1JLP", "5X6H", "5L2G", "2MK1");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : ContainsSequenceRegexTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsSequenceRegexTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 5KE8: contains Zinc finger motif
    // 1JLP: does not contain Zinc finger motif
    // 5VAI: contains Walker P loop
    List<String> pdbIds = Arrays.asList("5KE8", "1JLP", "5VAI");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : ResolutionFilterTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ResolutionFilterTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: 1.52 A x-ray resolution
    // 2OLX: 1.42 A x-ray resolution
    // 3REC: n/a NMR structure
    // 1LU3: 16.8 A EM resolution
    List<String> pdbIds = Arrays.asList("2ONX", "2OLX", "3REC", "1LU3");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : ContainsDnaChainTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsDnaChainTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and non-std. DNA chain
    // 5L2G: DNA chain
    // 2MK1: D-saccharide
    List<String> pdbIds = Arrays.asList("2ONX", "1JLP", "5X6H", "5L2G", "2MK1");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : CustomReportQueryTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(CustomReportQueryTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    List<String> pdbIds = Arrays.asList("5JDE", "5CU4", "5L6W", "5UFU", "5IHB");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : SecondaryStructureTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(SecondaryStructureTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 1AIE: all alpha protein 20 alpha out of 31 = 0.645 helical
    // 1E0N: all beta protein, NMR structure with 10 models, 13 beta out of 27 = 0.481 sheet
    // 1EM7: alpha + beta, 14 alpha + 23 beta out of 56 = 0.25 helical and 0.411 sheet
    // 2C7M: 2 chains, alpha + beta (DSSP in MMTF doesn't match DSSP on RCSB PDB website)
    List<String> pdbIds = Arrays.asList("1AIE", "1E0N", "1EM7", "2C7M");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : PiscesTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PolymerCompositionTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // "4R4X.A" and "5X42.B" should preplaced filter
    List<String> pdbIds = Arrays.asList("5X42", "4R4X", "2ONX", "1JLP");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : PerformHadoopUpdate.java
Copyright Apache License 2.0
Author : sbl-sdsc
public static void main(String[] args) throws FileNotFoundException {
    // instantiate Spark. Each Spark application needs these two lines of code.
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ReadMmtfReduced.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    HadoopUpdate.performUpdate(sc);
    sc.close();
}

18 View Complete Implementation : FilterExclusivelyByLProteins.java
Copyright Apache License 2.0
Author : sbl-sdsc
public static void main(String[] args) throws FileNotFoundException {
    String path = MmtfReader.getMmtfReducedPath();
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(FilterExclusivelyByLProteins.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    boolean exclusive = true;
    long count = MmtfReader.readSequenceFile(path, // read MMTF hadoop sequence file
    sc).filter(new ContainsLProteinChain(exclusive)).count();
    System.out.println("# L-proteins: " + count);
    sc.close();
}

18 View Complete Implementation : AdvancedQueryTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(AdvancedQueryTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 1PEN wildtype query 100 matches: 1PEN:1
    // 1OCZ two enreplacedies wildtype query 100 matches: 1OCZ:1, 1OCZ:2
    // 2ONX structure result for author query
    // 5L6W two chains: chain L is EC 2.7.11.1, chain chain C is not EC 2.7.11.1
    // 5KHU many chains, chain Q is EC 2.7.11.1
    // 1F3M enreplacedy 1: chains A,B, enreplacedy 2: chains B,C, all chains are EC 2.7.11.1
    List<String> pdbIds = Arrays.asList("1PEN", "1OCZ", "2ONX", "5L6W", "5KHU", "1F3M");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : TraverseStructureHierarchy.java
Copyright Apache License 2.0
Author : sbl-sdsc
public static void main(String[] args) {
    // instantiate Spark. Each Spark application needs these two lines of code.
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ReadMmtfReduced.clreplaced.getSimpleName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    // List<String> pdbIds = Arrays.asList("5UTV"); // multiple models
    // List<String> pdbIds = Arrays.asList("1BZ1"); // multiple protein chains
    // List<String> pdbIds = Arrays.asList("1STP"); // single protein chain
    // structure with 2 bioreplacedemblies
    List<String> pdbIds = Arrays.asList("1HV4");
    // List<String> pdbIds = Arrays.asList("2NBK"); // single protein chain
    JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc).cache();
    pdb.foreach(t -> TraverseStructureHierarchy.printAll(t._2));
}

18 View Complete Implementation : RfreeFilterTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(RfreeFilterTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    // 2ONX: 0.202 rfree x-ray resolution
    // 2OLX: 0.235 rfree x-ray resolution
    // 3REC: n/a NMR structure
    // 1LU3: n/a EM structure
    List<String> pdbIds = Arrays.asList("2ONX", "2OLX", "3REC", "1LU3");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}

18 View Complete Implementation : PolymerSequenceExtractorTest.java
Copyright Apache License 2.0
Author : sbl-sdsc
@Before
public void setUp() throws Exception {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PolymerSequenceExtractorTest.clreplaced.getSimpleName());
    sc = new JavaSparkContext(conf);
    List<String> pdbIds = Arrays.asList("1STP", "4HHB");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}