Squashed 'external/unarr/' content from commit f243d72fb3
git-subtree-dir: external/unarr git-subtree-split: f243d72fb3fe418c26a19514609ac7167d089df4
This commit is contained in:
280
tar/parse-tar.c
Normal file
280
tar/parse-tar.c
Normal file
@@ -0,0 +1,280 @@
|
||||
/* Copyright 2015 the unarr project authors (see AUTHORS file).
|
||||
License: LGPLv3 */
|
||||
|
||||
#include "tar.h"
|
||||
|
||||
static bool tar_is_number(const char *data, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
if ((data[i] < '0' || '7' < data[i]) && data[i] != ' ' && data[i] != '\0')
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint64_t tar_parse_number(const char *data, size_t size)
|
||||
{
|
||||
uint64_t value = 0;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
if (data[i] == ' ' || data[i] == '\0')
|
||||
continue;
|
||||
if (data[i] < '0' || '7' < data[i])
|
||||
break;
|
||||
value = value * 8 + (data[i] - '0');
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static bool tar_is_zeroed_block(const char *data)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < TAR_BLOCK_SIZE; i++) {
|
||||
if (data[i] != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ar_is_valid_utf8(const char *string)
|
||||
{
|
||||
const unsigned char *s;
|
||||
for (s = (const unsigned char *)string; *s; s++) {
|
||||
int skip = *s < 0x80 ? 0 :
|
||||
*s < 0xC0 ? -1 :
|
||||
*s < 0xE0 ? 1 :
|
||||
*s < 0xF0 ? 2 :
|
||||
*s < 0xF5 ? 3 : -1;
|
||||
if (skip < 0)
|
||||
return false;
|
||||
while (skip-- > 0) {
|
||||
if ((*++s & 0xC0) != 0x80)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool tar_parse_header(ar_archive_tar *tar)
|
||||
{
|
||||
char data[TAR_BLOCK_SIZE];
|
||||
uint32_t checksum;
|
||||
int32_t checksum2;
|
||||
size_t i;
|
||||
|
||||
if (ar_read(tar->super.stream, data, sizeof(data)) != sizeof(data))
|
||||
return false;
|
||||
|
||||
if (tar_is_zeroed_block(data)) {
|
||||
free(tar->entry.name);
|
||||
memset(&tar->entry, 0, sizeof(tar->entry));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!tar_is_number(data + 124, 12) || !tar_is_number(data + 136, 12) || !tar_is_number(data + 148, 8))
|
||||
return false;
|
||||
|
||||
tar->entry.filesize = (size_t)tar_parse_number(data + 124, 12);
|
||||
tar->entry.mtime = (tar_parse_number(data + 136, 12) + 11644473600) * 10000000;
|
||||
tar->entry.checksum = (uint32_t)tar_parse_number(data + 148, 8);
|
||||
tar->entry.filetype = data[156];
|
||||
free(tar->entry.name);
|
||||
tar->entry.name = NULL;
|
||||
|
||||
if (tar->entry.filetype == TYPE_FILE_OLD) {
|
||||
i = 100;
|
||||
while (--i > 0 && data[i] == '\0');
|
||||
if (data[i] == '/')
|
||||
tar->entry.filetype = TYPE_DIRECTORY;
|
||||
}
|
||||
tar->entry.is_ustar = memcmp(data + 257, "ustar\x00""00", 8) == 0 && memcmp(data + 508, "tar\0", 4) != 0;
|
||||
|
||||
if (tar->entry.filesize > (size_t)-1 - tar->super.entry_offset - 2 * TAR_BLOCK_SIZE)
|
||||
return false;
|
||||
|
||||
checksum = 0;
|
||||
checksum2 = 0;
|
||||
memset(data + 148, ' ', 8);
|
||||
for (i = 0; i < sizeof(data); i++) {
|
||||
checksum += (unsigned char)data[i];
|
||||
checksum2 += (signed char)data[i];
|
||||
}
|
||||
|
||||
if (checksum != (uint32_t)checksum2 && tar->entry.checksum == (uint32_t)checksum2) {
|
||||
log("Checksum was calculated using signed data");
|
||||
tar->entry.checksum = checksum;
|
||||
}
|
||||
return tar->entry.checksum == checksum;
|
||||
}
|
||||
|
||||
bool tar_handle_pax_extended(ar_archive *ar)
|
||||
{
|
||||
ar_archive_tar *tar = (ar_archive_tar *)ar;
|
||||
off64_t offset = ar->entry_offset;
|
||||
size_t size = tar->entry.filesize;
|
||||
char *data, *line;
|
||||
|
||||
data = malloc(size);
|
||||
if (!data) {
|
||||
log("Ignoring PAX extended header on OOM");
|
||||
return ar_parse_entry(ar);
|
||||
}
|
||||
if (!ar_entry_uncompress(ar, data, size) || !ar_parse_entry(ar)) {
|
||||
free(data);
|
||||
return false;
|
||||
}
|
||||
if (tar->last_seen_dir > offset) {
|
||||
free(data);
|
||||
return true;
|
||||
}
|
||||
|
||||
line = data;
|
||||
while (line < data + size) {
|
||||
char *key, *value, *ptr;
|
||||
size_t length, max_size = line - data + size;
|
||||
|
||||
ptr = memchr(line, '=', max_size);
|
||||
if (!ptr || *line < '1' || '9' < *line) {
|
||||
warn("Invalid PAX extended header record @%" PRIi64, offset);
|
||||
break;
|
||||
}
|
||||
value = ptr + 1;
|
||||
*ptr = '\0';
|
||||
length = (size_t)strtoul(line, &ptr, 10);
|
||||
if (max_size < length || length <= (size_t)(value - line) || line[length - 1] != '\n' || *ptr != ' ') {
|
||||
warn("Invalid PAX extended header record @%" PRIi64, offset);
|
||||
break;
|
||||
}
|
||||
key = ptr + 1;
|
||||
line += length;
|
||||
line[-1] = '\0';
|
||||
|
||||
if (strcmp(key, "path") == 0) {
|
||||
ptr = malloc(strlen(value) + 1);
|
||||
if (ptr) {
|
||||
strcpy(ptr, value);
|
||||
free(tar->entry.name);
|
||||
tar->entry.name = ptr;
|
||||
}
|
||||
}
|
||||
else if (strcmp(key, "mtime") == 0)
|
||||
tar->entry.mtime = (time64_t)((strtod(value, &ptr) + 11644473600) * 10000000);
|
||||
else if (strcmp(key, "size") == 0)
|
||||
tar->entry.filesize = (size_t)strtoul(value, &ptr, 10);
|
||||
else
|
||||
log("Skipping value for %s", key);
|
||||
}
|
||||
free(data);
|
||||
|
||||
tar_get_name(ar, false);
|
||||
ar->entry_offset = offset;
|
||||
ar->entry_size_uncompressed = tar->entry.filesize;
|
||||
ar->entry_filetime = tar->entry.mtime;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool tar_handle_gnu_longname(ar_archive *ar)
|
||||
{
|
||||
ar_archive_tar *tar = (ar_archive_tar *)ar;
|
||||
off64_t offset = ar->entry_offset;
|
||||
size_t size = tar->entry.filesize;
|
||||
char *longname;
|
||||
|
||||
longname = malloc(size + 1);
|
||||
if (!longname || size == (size_t)-1) {
|
||||
log("Falling back to the short filename on OOM");
|
||||
free(longname);
|
||||
return ar_parse_entry(ar);
|
||||
}
|
||||
if (!ar_entry_uncompress(ar, longname, size) || !ar_parse_entry(ar)) {
|
||||
free(longname);
|
||||
return false;
|
||||
}
|
||||
if (tar->last_seen_dir > offset) {
|
||||
free(longname);
|
||||
return true;
|
||||
}
|
||||
if (tar->entry.name) {
|
||||
log("Skipping GNU long filename in favor of PAX name");
|
||||
free(longname);
|
||||
return true;
|
||||
}
|
||||
longname[size] = '\0';
|
||||
ar->entry_offset = offset;
|
||||
/* name could be in any encoding, assume UTF-8 or whatever (DOS) */
|
||||
if (ar_is_valid_utf8(longname)) {
|
||||
tar->entry.name = longname;
|
||||
}
|
||||
else {
|
||||
tar->entry.name = ar_conv_dos_to_utf8(longname);
|
||||
free(longname);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const char *tar_get_name(ar_archive *ar, bool raw)
|
||||
{
|
||||
if (raw)
|
||||
return NULL;
|
||||
|
||||
ar_archive_tar *tar = (ar_archive_tar *)ar;
|
||||
if (!tar->entry.name) {
|
||||
char *name;
|
||||
|
||||
if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
|
||||
return NULL;
|
||||
|
||||
name = malloc(100 + 1);
|
||||
if (!name || ar_read(ar->stream, name, 100) != 100) {
|
||||
free(name);
|
||||
ar_seek(ar->stream, ar->entry_offset + TAR_BLOCK_SIZE, SEEK_SET);
|
||||
return NULL;
|
||||
}
|
||||
name[100] = '\0';
|
||||
|
||||
if (tar->entry.is_ustar) {
|
||||
char *prefixed = malloc(256 + 1);
|
||||
if (!prefixed || !ar_skip(ar->stream, 245) || ar_read(ar->stream, prefixed, 167) != 167) {
|
||||
free(name);
|
||||
free(prefixed);
|
||||
ar_seek(ar->stream, ar->entry_offset + TAR_BLOCK_SIZE, SEEK_SET);
|
||||
return NULL;
|
||||
}
|
||||
if (prefixed[0] != '\0') {
|
||||
prefixed[156] = '\0';
|
||||
strcat(prefixed, "/");
|
||||
strcat(prefixed, name);
|
||||
free(name);
|
||||
name = prefixed;
|
||||
prefixed = NULL;
|
||||
}
|
||||
free(prefixed);
|
||||
}
|
||||
else
|
||||
ar_skip(ar->stream, TAR_BLOCK_SIZE - 100);
|
||||
|
||||
/* name could be in any encoding, assume UTF-8 or whatever (DOS) */
|
||||
if (ar_is_valid_utf8(name)) {
|
||||
tar->entry.name = name;
|
||||
}
|
||||
else {
|
||||
tar->entry.name = ar_conv_dos_to_utf8(name);
|
||||
free(name);
|
||||
}
|
||||
/* normalize path separators */
|
||||
if (tar->entry.name) {
|
||||
char *p = tar->entry.name;
|
||||
while ((p = strchr(p, '\\')) != NULL) {
|
||||
*p = '/';
|
||||
}
|
||||
}
|
||||
}
|
||||
return tar->entry.name;
|
||||
}
|
||||
93
tar/tar.c
Normal file
93
tar/tar.c
Normal file
@@ -0,0 +1,93 @@
|
||||
/* Copyright 2015 the unarr project authors (see AUTHORS file).
|
||||
License: LGPLv3 */
|
||||
|
||||
#include "tar.h"
|
||||
|
||||
static void tar_close(ar_archive *ar)
|
||||
{
|
||||
ar_archive_tar *tar = (ar_archive_tar *)ar;
|
||||
free(tar->entry.name);
|
||||
}
|
||||
|
||||
static bool tar_parse_entry(ar_archive *ar, off64_t offset)
|
||||
{
|
||||
ar_archive_tar *tar = (ar_archive_tar *)ar;
|
||||
|
||||
if (!ar_seek(ar->stream, offset, SEEK_SET)) {
|
||||
warn("Couldn't seek to offset %" PRIi64, offset);
|
||||
return false;
|
||||
}
|
||||
if (!tar_parse_header(tar)) {
|
||||
warn("Invalid tar header data @%" PRIi64, offset);
|
||||
return false;
|
||||
}
|
||||
if (!tar->entry.checksum) {
|
||||
ar->at_eof = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
ar->entry_offset = offset;
|
||||
ar->entry_offset_next = offset + TAR_BLOCK_SIZE + (tar->entry.filesize + TAR_BLOCK_SIZE - 1) / TAR_BLOCK_SIZE * TAR_BLOCK_SIZE;
|
||||
ar->entry_size_uncompressed = tar->entry.filesize;
|
||||
ar->entry_filetime = tar->entry.mtime;
|
||||
tar->bytes_done = 0;
|
||||
|
||||
if (tar->last_seen_dir > offset)
|
||||
tar->last_seen_dir = 0;
|
||||
|
||||
switch (tar->entry.filetype) {
|
||||
case TYPE_FILE:
|
||||
case TYPE_FILE_OLD:
|
||||
return true;
|
||||
case TYPE_DIRECTORY:
|
||||
log("Skipping directory entry \"%s\"", tar_get_name(ar, false));
|
||||
tar->last_seen_dir = ar->entry_offset;
|
||||
return tar_parse_entry(ar, ar->entry_offset_next);
|
||||
case TYPE_PAX_GLOBAL:
|
||||
log("Skipping PAX global extended header record");
|
||||
return tar_parse_entry(ar, ar->entry_offset_next);
|
||||
case TYPE_PAX_EXTENDED:
|
||||
return tar_handle_pax_extended(ar);
|
||||
case TYPE_GNU_LONGNAME:
|
||||
return tar_handle_gnu_longname(ar);
|
||||
default:
|
||||
warn("Unknown entry type '%c'", tar->entry.filetype);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static bool tar_uncompress(ar_archive *ar, void *buffer, size_t count)
|
||||
{
|
||||
ar_archive_tar *tar = (ar_archive_tar *)ar;
|
||||
if (count > ar->entry_size_uncompressed - tar->bytes_done) {
|
||||
warn("Requesting too much data (%" PRIuPTR " < %" PRIuPTR ")", ar->entry_size_uncompressed - tar->bytes_done, count);
|
||||
return false;
|
||||
}
|
||||
if (ar_read(ar->stream, buffer, count) != count) {
|
||||
warn("Unexpected EOF in stored data");
|
||||
return false;
|
||||
}
|
||||
tar->bytes_done += count;
|
||||
return true;
|
||||
}
|
||||
|
||||
ar_archive *ar_open_tar_archive(ar_stream *stream)
|
||||
{
|
||||
ar_archive *ar;
|
||||
ar_archive_tar *tar;
|
||||
|
||||
if (!ar_seek(stream, 0, SEEK_SET))
|
||||
return NULL;
|
||||
|
||||
ar = ar_open_archive(stream, sizeof(ar_archive_tar), tar_close, tar_parse_entry, tar_get_name, tar_uncompress, NULL, 0);
|
||||
if (!ar)
|
||||
return NULL;
|
||||
|
||||
tar = (ar_archive_tar *)ar;
|
||||
if (!tar_parse_header(tar) || !tar->entry.checksum) {
|
||||
free(ar);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ar;
|
||||
}
|
||||
46
tar/tar.h
Normal file
46
tar/tar.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/* Copyright 2015 the unarr project authors (see AUTHORS file).
|
||||
License: LGPLv3 */
|
||||
|
||||
#ifndef tar_tar_h
|
||||
#define tar_tar_h
|
||||
|
||||
#include "../common/unarr-imp.h"
|
||||
|
||||
typedef struct ar_archive_tar_s ar_archive_tar;
|
||||
|
||||
/***** parse-tar *****/
|
||||
|
||||
#define TAR_BLOCK_SIZE 512
|
||||
|
||||
enum tar_filetype {
|
||||
TYPE_FILE = '0', TYPE_FILE_OLD = '\0',
|
||||
TYPE_HARD_LINK = '1', TYPE_SOFT_LINK = '2',
|
||||
TYPE_DIRECTORY = '5',
|
||||
TYPE_GNU_LONGNAME = 'L',
|
||||
TYPE_PAX_GLOBAL = 'g', TYPE_PAX_EXTENDED = 'x',
|
||||
};
|
||||
|
||||
struct tar_entry {
|
||||
char *name;
|
||||
size_t filesize;
|
||||
time64_t mtime;
|
||||
uint32_t checksum;
|
||||
char filetype;
|
||||
bool is_ustar;
|
||||
};
|
||||
|
||||
bool tar_parse_header(ar_archive_tar *tar);
|
||||
bool tar_handle_pax_extended(ar_archive *ar);
|
||||
bool tar_handle_gnu_longname(ar_archive *ar);
|
||||
const char *tar_get_name(ar_archive *ar, bool raw);
|
||||
|
||||
/***** tar *****/
|
||||
|
||||
struct ar_archive_tar_s {
|
||||
ar_archive super;
|
||||
struct tar_entry entry;
|
||||
size_t bytes_done;
|
||||
off64_t last_seen_dir;
|
||||
};
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user