HDFS File operations using Java APIs

In this article we will see how to perform file operations on HDFS using Java APIs. Hadoop core jar is required to be added to the classpath. The class org.apache.hadoop.fs.FileSystem provide APIs for performing operations on HDFS.

1. Read a file from HDFS

//Imports
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;

//Code to read a file
String hdfsUrl = "hdfs://192.168.1.1:54310" ;  //url of namenode with port on which HDFS is accessible
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI(hdfsUrl), configuration);

StringBuilder str = new StringBuilder();
Path srcPath = new Path("/home/hadoop/example.txt");  //We will read a file example.txt from HDFS
BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(srcPath)));

String line;
while ((line=  br.readLine()) != null) {
          str.append(line);
          str.append("\n");           
}
br.close();
System.out.println(str.toString()); //Print the file content

2. Write content to a file on HDFS

//Imports
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.security.UserGroupInformation;

//Code to write content to a file on hdfs
String hdfsUrl = "hdfs://192.168.1.1:54310" ;  //url of namenode with port on which HDFS is accessible 
Configuration configuration = new Configuration(); 

String username = "hadoopUser"; //name of the user having write permission on HDFS 
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(username);
ugi.doAs(new PrivilegedExceptionAction<Void>() {
    public Void run() throws Exception { 

        String text = "Hello, this is sample text to be written to a HDFS file"; 
        FileSystem fs= FileSystem.get(new URI(hdfsUrl), configuration);
        
        //We will write text to the file /home/hadoop/example2.txt on HDFS
        OutputStream outputStream = fs.create(new Path("/home/hadoop/example2.txt"));

        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(outputStream));
        br.write(text);
        br.close();
        return null;
    }
});

3. Copy a file from local system to HDFS

//Imports
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.security.UserGroupInformation;

//Code to copy a file from local system to hdfs
String hdfsUrl = "hdfs://192.168.1.1:54310" ;  //url of namenode with port on which HDFS is accessible 
Configuration configuration = new Configuration(); 

String username = "hadoopUser"; //name of the user having write permission on HDFS 
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(username);
ugi.doAs(new PrivilegedExceptionAction<Void>() {
    public Void run() throws Exception { 
           Path srcPath = new Path("C:/example3.txt"); // path of file on local system
           Path destPath = new Path("/home/hadoop/example3.txt"); //path of file on hdfs

           FileSystem fileSystem = FileSystem.get(new URI(hdfsUrl), configuration);
           fileSystem.copyFromLocalFile(srcPath, destPath);
           return null;
    }
});

Leave a Reply

Your email address will not be published. Required fields are marked *