Skip to main content

Java (MapReduce)

Currently, users can run dynamic decryption queries on Text or Sequence files in Java. This section details the steps necessary to set up decryption libraries for Java.

*This section assumes the library is being run as a MapReduce Java program and is using the DgDecrypter API. Running as a non-MapReduce Java program is covered in the next section.

Configuration

Invocation:

>hadoop  jar  sample.jar  /user/dguser/text/testData.txt  /dgwout/
where,
sample.jar is the runnable jar for the example,
/user/dguser/text/testData.txt is the input hdfs file and
/dgwout/ is the output hdfs directory

The dgSecure.properties file accompanying the DgDecrypter library has the same content as in the Hive case above.

Sample

The key point in the Java sample program is that the InputFormat needs to be overridden with EncryptedTextInputFormat class:

conf.setInputFormat(EncryptedTextInputFormat.class);

The rest of the code is the same as for any other MapReduce program. The EncryptedTextInputFormat will transparently take care of ACL-based decryption. The preJobHook call to the DgDecrypter Library is needed at the beginning of the program to handle the secure retrieval and propagation of the keys.

CODE
package com.dataguise.decrypterlib.java.examples.wordcount;
import java.io.IOException;
import java.util.Iterator;
import java.util.Random;
import java.util.StringTokenizer;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;

import com.dataguise.decrypterlib.java.textinputformat.EncryptedTextInputFormat;

import com.dataguise.decrypterlib.java.util.DecrypterConfigParams;
import com.dataguise.decrypterlib.java.util.DgHadoopHook;

public class WordCount 
{
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> 
{
private final static IntWritable one = new IntWritable(1);
					private Text word = new Text();

public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException 
{
//	System.out.println("Key: " + key + " Value: " + value);

						String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
						while (tokenizer.hasMoreTokens()) 
{
							word.set(tokenizer.nextToken());
							output.collect(word, one);
							System.out				.println("data: " + word);
						}
					}
				}
public static class Reduce extends MapReduceBase implements Reducer<Text,IntWritable, Text, IntWritable> 
{
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException 
{
						int sum = 0;
						while (values.hasNext()) 
{
							sum += values.next().get();
						}
						output.collect(key, new IntWritable(sum));
					}
				}

				public static void main(String[] args) 
{
//Initialize parameter settings from dgSecure.properties file
					java.util.Map<String, String> configMap = null;
					try 
{
configMap = DecrypterConfigParams.getHadoopIDPInfo();
					} 
catch (IOException e2) 
{
					// TODO Auto-generated catch block
					e2.printStackTrace();
					}
		
					JobConf conf = new JobConf(WordCount.class);
		
					if(configMap.get("distro").toLowerCase().equals("mapr"))
{
						conf.set("fs.default.name", "maprfs:///");
						conf.set("mapred.job.tracker", "maprfs:///");
					}
else 
if(configMap.get("distro").toLowerCase().equals("cdh4"))
{
conf.set("fs.default.name", configMap.get("fdn"));
conf.set("mapred.job.tracker", configMap.get("mjt"));
					}
			
System.out.println("fs default name: " + conf.get("fs.default.name"));
System.out.println("mapred job tracker: " + conf.get("mapred.job.tracker"));
		
//if you want to run this job from eclipse, you should export the project as a jar file
//and add the below line to upload the jar to the cluster
					conf.set("mapred.jar", "D:/DgDecrypter.jar");
		
		
					try 
{
						DgHadoopHook.preJobHook(conf, args);
					} 
catch (IOException e1) 
{
					// TODO Auto-generated catch block
					e1.printStackTrace();
					System.exit(1);
					}

					conf.setJobName("wordcount");
		
					conf.setOutputKeyClass(Text.class);
					conf.setOutputValueClass(IntWritable.class);

					conf.setMapperClass(Map.class);
					conf.setCombinerClass(Reduce.class);
					conf.setReducerClass(Reduce.class);
//We call EncryptedTextInputFormat to read the hdfs chunks and decrypt data subject to the user having the correct ACL permissions
					conf.setInputFormat(EncryptedTextInputFormat.class);
					conf.setOutputFormat(TextOutputFormat.class);
					conf.setJarByClass(WordCount.class);

FileOutputFormat.setOutputPath(conf, new Path(args[1] + "_" + new Random().nextInt(1000)));

FileInputFormat.setInputPaths(conf, new Path(args[0]));

					System.out.println("submitting the job");
					try 
{
						JobClient.runJob(conf);
					}
catch (IOException e) 
{
					// TODO Auto-generated catch block
					e.printStackTrace();
					}	
				}
}
JavaScript errors detected

Please note, these errors can depend on your browser setup.

If this problem persists, please contact our support.