In this article we will see how to perform file operations on HDFS using Java APIs. Hadoop core jar is required to be added to the classpath. The class org.apache.hadoop.fs.FileSystem provide APIs for performing operations on HDFS.
1. Read a file from HDFS
//Imports import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; //Code to read a file String hdfsUrl = "hdfs://192.168.1.1:54310" ; //url of namenode with port on which HDFS is accessible Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(new URI(hdfsUrl), configuration); StringBuilder str = new StringBuilder(); Path srcPath = new Path("/home/hadoop/example.txt"); //We will read a file example.txt from HDFS BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(srcPath))); String line; while ((line= br.readLine()) != null) { str.append(line); str.append("\n"); } br.close(); System.out.println(str.toString()); //Print the file content
2. Write content to a file on HDFS
//Imports import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.security.UserGroupInformation; //Code to write content to a file on hdfs String hdfsUrl = "hdfs://192.168.1.1:54310" ; //url of namenode with port on which HDFS is accessible Configuration configuration = new Configuration(); String username = "hadoopUser"; //name of the user having write permission on HDFS UserGroupInformation ugi = UserGroupInformation.createRemoteUser(username); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { String text = "Hello, this is sample text to be written to a HDFS file"; FileSystem fs= FileSystem.get(new URI(hdfsUrl), configuration); //We will write text to the file /home/hadoop/example2.txt on HDFS OutputStream outputStream = fs.create(new Path("/home/hadoop/example2.txt")); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(outputStream)); br.write(text); br.close(); return null; } });
3. Copy a file from local system to HDFS
//Imports import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.security.UserGroupInformation; //Code to copy a file from local system to hdfs String hdfsUrl = "hdfs://192.168.1.1:54310" ; //url of namenode with port on which HDFS is accessible Configuration configuration = new Configuration(); String username = "hadoopUser"; //name of the user having write permission on HDFS UserGroupInformation ugi = UserGroupInformation.createRemoteUser(username); ugi.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { Path srcPath = new Path("C:/example3.txt"); // path of file on local system Path destPath = new Path("/home/hadoop/example3.txt"); //path of file on hdfs FileSystem fileSystem = FileSystem.get(new URI(hdfsUrl), configuration); fileSystem.copyFromLocalFile(srcPath, destPath); return null; } });