Network Security Internet Technology Development Database Servers Mobile Phone Android Software Apple Software Computer Software News IT Information

In addition to Weibo, there is also WeChat

Please pay attention

WeChat public account

Shulou

Post a piece of java to read the code that hdfs unzipped gz zip tar.gz and saved to hdfs

2025-01-16 Update From: SLTechnology News&Howtos shulou NAV: SLTechnology News&Howtos > Internet Technology >

Share

Shulou(Shulou.com)06/03 Report--

Package main.java

Import java.io.*

Import java.util.LinkedList

Import java.util.List

Import java.util.zip.*

Import org.apache.commons.compress.archivers.ArchiveException

Import org.apache.commons.compress.archivers.ArchiveInputStream

Import org.apache.commons.compress.archivers.ArchiveStreamFactory

Import org.apache.commons.compress.archivers.tar.TarArchiveEntry

Import java.io.IOException

Import java.net.URI

Import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream

Import org.apache.hadoop.conf.Configuration

Import org.apache.hadoop.fs.FileSystem

Import org.apache.hadoop.fs.Path

Import org.apache.hadoop.fs.FSDataInputStream

Import org.apache.hadoop.fs.FSDataOutputStream

/ * *

* decompress the tar.gz zip gz package. The data source and output directory here are both HDFS.

*

, /

Public class GZipHdfs {

Private BufferedOutputStream bufferedOutputStream

String zipfileName = null

Public GZipHdfs (String fileName) {

This.zipfileName = fileName

}

/ *

* execution entry. RarFileName is the path of the file to be unzipped (specific to files), and the path of destDir is HDFS.

, /

Public List unTargzFile (String rarFileName, String destDir) throws IOException {

GZipHdfs GZipHdfs = new GZipHdfs (rarFileName)

Configuration conf = new Configuration ()

FileSystem fs = FileSystem.get (URI.create (destDir), conf)

Boolean result = fs.isDirectory (new Path (destDir))

If (! result) {

Fs.mkdirs (new Path (destDir))

}

String outputDirectory = destDir

List r = GZipHdfs.defUnTargzFile (outputDirectory, fs)

Fs.close ()

Return r

}

Public List defUnTargzFile (String outputDirectory, FileSystem fs) {

FileInputStream fis = null

ArchiveInputStream in = null

BufferedInputStream bufferedInputStream = null

List tarList = new LinkedList ()

Try {

FSDataInputStream hdfsInputStream = fs.open (new Path (zipfileName))

GZIPInputStream is = new GZIPInputStream (new BufferedInputStream (

HdfsInputStream))

In = new ArchiveStreamFactory () .createArchiveInputStream ("tar", is)

BufferedInputStream = new BufferedInputStream (in)

TarArchiveEntry entry = (TarArchiveEntry) in.getNextEntry ()

While (entry! = null) {

String name = entry.getName ()

String [] names = name.split ("/")

String fileName = outputDirectory

For (int I = 0; I < names.length; iTunes +) {

String str = names [I]

FileName = fileName + "/" + str

}

FSDataOutputStream hdfsOutStream = fs.create (new Path (fileName))

BufferedOutputStream = new BufferedOutputStream (

HdfsOutStream)

Int b

While ((b = bufferedInputStream.read ())! =-1) {

BufferedOutputStream.write (b)

}

BufferedOutputStream.flush ()

BufferedOutputStream.close ()

Entry = (TarArchiveEntry) in.getNextEntry ()

TarList.add (name)

}

} catch (FileNotFoundException e) {

E.printStackTrace ()

} catch (IOException e) {

E.printStackTrace ()

} catch (ArchiveException e) {

E.printStackTrace ()

} finally {

Try {

If (bufferedInputStream! = null) {

BufferedInputStream.close ()

}

} catch (IOException e) {

E.printStackTrace ()

}

}

Return tarList

}

/ *

* execution entry. RarFileName is the path of the file to be unzipped (specific to files), and the path of destDir is HDFS.

, /

Public List unZipFile (String rarFileName, String destDir) throws IOException {

GZipHdfs GZipHdfs = new GZipHdfs (rarFileName)

Configuration conf = new Configuration ()

FileSystem fs = FileSystem.get (URI.create (destDir), conf)

Boolean result = fs.isDirectory (new Path (destDir))

If (! result) {

Fs.mkdirs (new Path (destDir))

}

String outputDirectory = destDir

List r = GZipHdfs.defUnZipFile (outputDirectory, fs)

Fs.close ()

Return r

}

Public List defUnZipFile (String outputDirectory, FileSystem fs) {

FileInputStream fis = null

ArchiveInputStream in = null

BufferedInputStream bufferedInputStream = null

List zipList = new LinkedList ()

Try {

FSDataInputStream hdfsInputStream = fs.open (new Path (zipfileName))

ZipInputStream is = new ZipInputStream (new BufferedInputStream (

HdfsInputStream))

BufferedInputStream = new BufferedInputStream (is)

ZipEntry entry = is.getNextEntry ()

While (entry! = null) {

String name = entry.getName ()

String [] names = name.split ("/")

String fileName = outputDirectory

For (int I = 0; I < names.length; iTunes +) {

String str = names [I]

FileName = fileName + "/" + str

}

FSDataOutputStream hdfsOutStream = fs.create (new Path (fileName))

BufferedOutputStream = new BufferedOutputStream (

HdfsOutStream)

Int b

While ((b = bufferedInputStream.read ())! =-1) {

BufferedOutputStream.write (b)

}

BufferedOutputStream.flush ()

BufferedOutputStream.close ()

Entry = (ZipEntry) is.getNextEntry ()

ZipList.add (name)

}

} catch (FileNotFoundException e) {

E.printStackTrace ()

} catch (IOException e) {

E.printStackTrace ()

} finally {

Try {

If (bufferedInputStream! = null) {

BufferedInputStream.close ()

}

} catch (IOException e) {

E.printStackTrace ()

}

}

Return zipList

}

/ *

* execution entry. RarFileName is the path of the file to be unzipped (specific to files), and the path of destDir is HDFS.

, /

Public List unGZipFile (String rarFileName, String destDir) throws IOException {

GZipHdfs GZipHdfs = new GZipHdfs (rarFileName)

Configuration conf = new Configuration ()

FileSystem fs = FileSystem.get (URI.create (destDir), conf)

Boolean result = fs.isDirectory (new Path (destDir))

If (! result) {

Fs.mkdirs (new Path (destDir))

}

String outputDirectory = destDir

List r = GZipHdfs.defUnGZipFile (outputDirectory, fs)

Fs.close ()

Return r

}

Public List defUnGZipFile (String outputDirectory, FileSystem fs) {

FileInputStream fis = null

ArchiveInputStream in = null

BufferedInputStream bufferedInputStream = null

List tarList = new LinkedList ()

Try {

FSDataInputStream hdfsInputStream = fs.open (new Path (zipfileName))

GzipCompressorInputStream is = new GzipCompressorInputStream (new BufferedInputStream (

HdfsInputStream))

BufferedInputStream = new BufferedInputStream (is)

String [] nameList = zipfileName.split ("/")

String name= nameList [nameList.length-1] .replace (".gz", "")

String fileName = outputDirectory+ "/" + name

FSDataOutputStream hdfsOutStream = fs.create (new Path (fileName))

BufferedOutputStream = new BufferedOutputStream (

HdfsOutStream)

Int b

While ((b = bufferedInputStream.read ())! =-1) {

BufferedOutputStream.write (b)

}

BufferedOutputStream.flush ()

BufferedOutputStream.close ()

TarList.add (name)

} catch (FileNotFoundException e) {

E.printStackTrace ()

} catch (IOException e) {

E.printStackTrace ()

} finally {

Try {

If (bufferedInputStream! = null) {

BufferedInputStream.close ()

}

} catch (IOException e) {

E.printStackTrace ()

}

}

Return tarList

}

}

Welcome to subscribe "Shulou Technology Information " to get latest news, interesting things and hot topics in the IT industry, and controls the hottest and latest Internet news, technology news and IT industry trends.

Views: 0

*The comments in the above article only represent the author's personal views and do not represent the views and positions of this website. If you have more insights, please feel free to contribute and share.

Share To

Internet Technology

Wechat

© 2024 shulou.com SLNews company. All rights reserved.

12
Report