| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.commons.compress.archivers.dump; |
| |
| import org.apache.commons.compress.archivers.ArchiveException; |
| import org.apache.commons.compress.archivers.ArchiveInputStream; |
| |
| import java.io.EOFException; |
| import java.io.IOException; |
| import java.io.InputStream; |
| |
| import java.util.Arrays; |
| import java.util.Comparator; |
| import java.util.HashMap; |
| import java.util.Map; |
| import java.util.PriorityQueue; |
| import java.util.Queue; |
| import java.util.Stack; |
| |
| /** |
| * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. |
| * Methods are provided to position at each successive entry in |
| * the archive, and the read each entry as a normal input stream |
| * using read(). |
| * |
| * @NotThreadSafe |
| */ |
| public class DumpArchiveInputStream extends ArchiveInputStream { |
| private DumpArchiveSummary summary; |
| private DumpArchiveEntry active; |
| private boolean isClosed; |
| private boolean hasHitEOF; |
| private long entrySize; |
| private long entryOffset; |
| private int readIdx; |
| private byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; |
| private byte[] blockBuffer; |
| private int recordOffset; |
| private long filepos; |
| protected TapeInputStream raw; |
| |
| // map of ino -> dirent entry. We can use this to reconstruct full paths. |
| private Map<Integer, Dirent> names = new HashMap<Integer, Dirent>(); |
| |
| // map of ino -> (directory) entry when we're missing one or more elements in the path. |
| private Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>(); |
| |
| // queue of (directory) entries where we now have the full path. |
| private Queue<DumpArchiveEntry> queue; |
| |
| /** |
| * Constructor. |
| * |
| * @param is |
| * @throws ArchiveException |
| */ |
| public DumpArchiveInputStream(InputStream is) throws ArchiveException { |
| this.raw = new TapeInputStream(is); |
| this.hasHitEOF = false; |
| |
| try { |
| // read header, verify it's a dump archive. |
| byte[] headerBytes = raw.readRecord(); |
| |
| if (!DumpArchiveUtil.verify(headerBytes)) { |
| throw new UnrecognizedFormatException(); |
| } |
| |
| // get summary information |
| summary = new DumpArchiveSummary(headerBytes); |
| |
| // reset buffer with actual block size. |
| raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); |
| |
| // allocate our read buffer. |
| blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; |
| |
| // skip past CLRI and BITS segments since we don't handle them yet. |
| readCLRI(); |
| readBITS(); |
| } catch (IOException ex) { |
| throw new ArchiveException(ex.getMessage(), ex); |
| } |
| |
| // put in a dummy record for the root node. |
| Dirent root = new Dirent(2, 2, 4, "."); |
| names.put(Integer.valueOf(2), root); |
| |
| // use priority based on queue to ensure parent directories are |
| // released first. |
| queue = new PriorityQueue<DumpArchiveEntry>(10, |
| new Comparator<DumpArchiveEntry>() { |
| public int compare(DumpArchiveEntry p, DumpArchiveEntry q) { |
| if ((p.getOriginalName() == null) || (q.getOriginalName() == null)) { |
| return Integer.MAX_VALUE; |
| } |
| |
| return p.getOriginalName().compareTo(q.getOriginalName()); |
| } |
| }); |
| } |
| |
| @Deprecated |
| @Override |
| public int getCount() { |
| return (int) getBytesRead(); |
| } |
| |
| @Override |
| public long getBytesRead() { |
| return raw.getBytesRead(); |
| } |
| |
| /** |
| * Return the archive summary information. |
| */ |
| public DumpArchiveSummary getSummary() { |
| return summary; |
| } |
| |
| /** |
| * Read CLRI (deleted inode) segment. |
| */ |
| private void readCLRI() throws IOException { |
| byte[] readBuf = raw.readRecord(); |
| |
| if (!DumpArchiveUtil.verify(readBuf)) { |
| throw new InvalidFormatException(); |
| } |
| |
| active = DumpArchiveEntry.parse(readBuf); |
| |
| if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { |
| throw new InvalidFormatException(); |
| } |
| |
| // we don't do anything with this yet. |
| if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) |
| == -1) { |
| throw new EOFException(); |
| } |
| readIdx = active.getHeaderCount(); |
| } |
| |
| /** |
| * Read BITS segment. |
| */ |
| private void readBITS() throws IOException { |
| byte[] readBuf = raw.readRecord(); |
| |
| if (!DumpArchiveUtil.verify(readBuf)) { |
| throw new InvalidFormatException(); |
| } |
| |
| active = DumpArchiveEntry.parse(readBuf); |
| |
| if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { |
| throw new InvalidFormatException(); |
| } |
| |
| // we don't do anything with this yet. |
| if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) |
| == -1) { |
| throw new EOFException(); |
| } |
| readIdx = active.getHeaderCount(); |
| } |
| |
| /** |
| * Read the next entry. |
| */ |
| public DumpArchiveEntry getNextDumpEntry() throws IOException { |
| return getNextEntry(); |
| } |
| |
| /** |
| * Read the next entry. |
| */ |
| @Override |
| public DumpArchiveEntry getNextEntry() throws IOException { |
| DumpArchiveEntry entry = null; |
| String path = null; |
| |
| // is there anything in the queue? |
| if (!queue.isEmpty()) { |
| return queue.remove(); |
| } |
| |
| while (entry == null) { |
| if (hasHitEOF) { |
| return null; |
| } |
| |
| // skip any remaining records in this segment for prior file. |
| // we might still have holes... easiest to do it |
| // block by block. We may want to revisit this if |
| // the unnecessary decompression time adds up. |
| while (readIdx < active.getHeaderCount()) { |
| if (!active.isSparseRecord(readIdx++) |
| && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { |
| throw new EOFException(); |
| } |
| } |
| |
| readIdx = 0; |
| filepos = raw.getBytesRead(); |
| |
| byte[] headerBytes = raw.readRecord(); |
| |
| if (!DumpArchiveUtil.verify(headerBytes)) { |
| throw new InvalidFormatException(); |
| } |
| |
| active = DumpArchiveEntry.parse(headerBytes); |
| |
| // skip any remaining segments for prior file. |
| while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { |
| if (raw.skip(DumpArchiveConstants.TP_SIZE |
| * (active.getHeaderCount() |
| - active.getHeaderHoles())) == -1) { |
| throw new EOFException(); |
| } |
| |
| filepos = raw.getBytesRead(); |
| headerBytes = raw.readRecord(); |
| |
| if (!DumpArchiveUtil.verify(headerBytes)) { |
| throw new InvalidFormatException(); |
| } |
| |
| active = DumpArchiveEntry.parse(headerBytes); |
| } |
| |
| // check if this is an end-of-volume marker. |
| if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { |
| hasHitEOF = true; |
| isClosed = true; |
| raw.close(); |
| |
| return null; |
| } |
| |
| entry = active; |
| |
| if (entry.isDirectory()) { |
| readDirectoryEntry(active); |
| |
| // now we create an empty InputStream. |
| entryOffset = 0; |
| entrySize = 0; |
| readIdx = active.getHeaderCount(); |
| } else { |
| entryOffset = 0; |
| entrySize = active.getEntrySize(); |
| readIdx = 0; |
| } |
| |
| recordOffset = readBuf.length; |
| |
| path = getPath(entry); |
| |
| if (path == null) { |
| entry = null; |
| } |
| } |
| |
| entry.setName(path); |
| entry.setSimpleName(names.get(Integer.valueOf(entry.getIno())).getName()); |
| entry.setOffset(filepos); |
| |
| return entry; |
| } |
| |
| /** |
| * Read directory entry. |
| */ |
| private void readDirectoryEntry(DumpArchiveEntry entry) |
| throws IOException { |
| long size = entry.getEntrySize(); |
| boolean first = true; |
| |
| while (first || |
| (DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType())) { |
| // read the header that we just peeked at. |
| if (!first) { |
| raw.readRecord(); |
| } |
| |
| if (!names.containsKey(Integer.valueOf(entry.getIno())) && |
| (DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType())) { |
| pending.put(Integer.valueOf(entry.getIno()), entry); |
| } |
| |
| int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); |
| |
| if (blockBuffer.length < datalen) { |
| blockBuffer = new byte[datalen]; |
| } |
| |
| if (raw.read(blockBuffer, 0, datalen) != datalen) { |
| throw new EOFException(); |
| } |
| |
| int reclen = 0; |
| |
| for (int i = 0; (i < (datalen - 8)) && (i < (size - 8)); |
| i += reclen) { |
| int ino = DumpArchiveUtil.convert32(blockBuffer, i); |
| reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); |
| |
| byte type = blockBuffer[i + 6]; |
| |
| String name = new String(blockBuffer, i + 8, blockBuffer[i + 7]); // TODO default charset? |
| |
| if (".".equals(name) || "..".equals(name)) { |
| // do nothing... |
| continue; |
| } |
| |
| Dirent d = new Dirent(ino, entry.getIno(), type, name); |
| |
| /* |
| if ((type == 4) && names.containsKey(ino)) { |
| System.out.println("we already have ino: " + |
| names.get(ino)); |
| } |
| */ |
| |
| names.put(Integer.valueOf(ino), d); |
| |
| // check whether this allows us to fill anything in the pending list. |
| for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) { |
| String path = getPath(e.getValue()); |
| |
| if (path != null) { |
| e.getValue().setName(path); |
| e.getValue() |
| .setSimpleName(names.get(e.getKey()).getName()); |
| queue.add(e.getValue()); |
| } |
| } |
| |
| // remove anything that we found. (We can't do it earlier |
| // because of concurrent modification exceptions.) |
| for (DumpArchiveEntry e : queue) { |
| pending.remove(Integer.valueOf(e.getIno())); |
| } |
| } |
| |
| byte[] peekBytes = raw.peek(); |
| |
| if (!DumpArchiveUtil.verify(peekBytes)) { |
| throw new InvalidFormatException(); |
| } |
| |
| entry = DumpArchiveEntry.parse(peekBytes); |
| first = false; |
| size -= DumpArchiveConstants.TP_SIZE; |
| } |
| } |
| |
| /** |
| * Get full path for specified archive entry, or null if there's a gap. |
| * |
| * @param entry |
| * @return full path for specified archive entry, or null if there's a gap. |
| */ |
| private String getPath(DumpArchiveEntry entry) { |
| // build the stack of elements. It's possible that we're |
| // still missing an intermediate value and if so we |
| Stack<String> elements = new Stack<String>(); |
| Dirent dirent = null; |
| |
| for (int i = entry.getIno();; i = dirent.getParentIno()) { |
| if (!names.containsKey(Integer.valueOf(i))) { |
| elements.clear(); |
| break; |
| } |
| |
| dirent = names.get(Integer.valueOf(i)); |
| elements.push(dirent.getName()); |
| |
| if (dirent.getIno() == dirent.getParentIno()) { |
| break; |
| } |
| } |
| |
| // if an element is missing defer the work and read next entry. |
| if (elements.isEmpty()) { |
| pending.put(Integer.valueOf(entry.getIno()), entry); |
| |
| return null; |
| } |
| |
| // generate full path from stack of elements. |
| StringBuilder sb = new StringBuilder(elements.pop()); |
| |
| while (!elements.isEmpty()) { |
| sb.append('/'); |
| sb.append(elements.pop()); |
| } |
| |
| return sb.toString(); |
| } |
| |
| /** |
| * Reads bytes from the current dump archive entry. |
| * |
| * This method is aware of the boundaries of the current |
| * entry in the archive and will deal with them as if they |
| * were this stream's start and EOF. |
| * |
| * @param buf The buffer into which to place bytes read. |
| * @param off The offset at which to place bytes read. |
| * @param len The number of bytes to read. |
| * @return The number of bytes read, or -1 at EOF. |
| * @throws IOException on error |
| */ |
| @Override |
| public int read(byte[] buf, int off, int len) throws IOException { |
| int totalRead = 0; |
| |
| if (isClosed || (entryOffset >= entrySize)) { |
| return -1; |
| } |
| |
| if ((len + entryOffset) > entrySize) { |
| len = (int) (entrySize - entryOffset); |
| } |
| |
| while (len > 0) { |
| int sz = (len > (readBuf.length - recordOffset)) |
| ? (readBuf.length - recordOffset) : len; |
| |
| // copy any data we have |
| if ((recordOffset + sz) <= readBuf.length) { |
| System.arraycopy(readBuf, recordOffset, buf, off, sz); |
| totalRead += sz; |
| recordOffset += sz; |
| len -= sz; |
| off += sz; |
| } |
| |
| // load next block if necessary. |
| if (len > 0) { |
| if (readIdx >= 512) { |
| byte[] headerBytes = raw.readRecord(); |
| |
| if (!DumpArchiveUtil.verify(headerBytes)) { |
| throw new InvalidFormatException(); |
| } |
| |
| active = DumpArchiveEntry.parse(headerBytes); |
| readIdx = 0; |
| } |
| |
| if (!active.isSparseRecord(readIdx++)) { |
| int r = raw.read(readBuf, 0, readBuf.length); |
| if (r != readBuf.length) { |
| throw new EOFException(); |
| } |
| } else { |
| Arrays.fill(readBuf, (byte) 0); |
| } |
| |
| recordOffset = 0; |
| } |
| } |
| |
| entryOffset += totalRead; |
| |
| return totalRead; |
| } |
| |
| /** |
| * Closes the stream for this entry. |
| */ |
| @Override |
| public void close() throws IOException { |
| if (!isClosed) { |
| isClosed = true; |
| raw.close(); |
| } |
| } |
| |
| /** |
| * Look at the first few bytes of the file to decide if it's a dump |
| * archive. With 32 bytes we can look at the magic value, with a full |
| * 1k we can verify the checksum. |
| */ |
| public static boolean matches(byte[] buffer, int length) { |
| // do we have enough of the header? |
| if (length < 32) { |
| return false; |
| } |
| |
| // this is the best test |
| if (length >= DumpArchiveConstants.TP_SIZE) { |
| return DumpArchiveUtil.verify(buffer); |
| } |
| |
| // this will work in a pinch. |
| return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, |
| 24); |
| } |
| |
| } |