Squashed 'external/unarr/' content from commit f243d72fb3

git-subtree-dir: external/unarr
git-subtree-split: f243d72fb3fe418c26a19514609ac7167d089df4
This commit is contained in:
SimoneN64
2024-09-14 16:23:23 +02:00
commit ee861bc6cf
99 changed files with 20941 additions and 0 deletions

280
tar/parse-tar.c Normal file
View File

@@ -0,0 +1,280 @@
/* Copyright 2015 the unarr project authors (see AUTHORS file).
License: LGPLv3 */
#include "tar.h"
static bool tar_is_number(const char *data, size_t size)
{
size_t i;
for (i = 0; i < size; i++) {
if ((data[i] < '0' || '7' < data[i]) && data[i] != ' ' && data[i] != '\0')
return false;
}
return true;
}
static uint64_t tar_parse_number(const char *data, size_t size)
{
uint64_t value = 0;
size_t i;
for (i = 0; i < size; i++) {
if (data[i] == ' ' || data[i] == '\0')
continue;
if (data[i] < '0' || '7' < data[i])
break;
value = value * 8 + (data[i] - '0');
}
return value;
}
static bool tar_is_zeroed_block(const char *data)
{
size_t i;
for (i = 0; i < TAR_BLOCK_SIZE; i++) {
if (data[i] != 0)
return false;
}
return true;
}
static bool ar_is_valid_utf8(const char *string)
{
const unsigned char *s;
for (s = (const unsigned char *)string; *s; s++) {
int skip = *s < 0x80 ? 0 :
*s < 0xC0 ? -1 :
*s < 0xE0 ? 1 :
*s < 0xF0 ? 2 :
*s < 0xF5 ? 3 : -1;
if (skip < 0)
return false;
while (skip-- > 0) {
if ((*++s & 0xC0) != 0x80)
return false;
}
}
return true;
}
bool tar_parse_header(ar_archive_tar *tar)
{
char data[TAR_BLOCK_SIZE];
uint32_t checksum;
int32_t checksum2;
size_t i;
if (ar_read(tar->super.stream, data, sizeof(data)) != sizeof(data))
return false;
if (tar_is_zeroed_block(data)) {
free(tar->entry.name);
memset(&tar->entry, 0, sizeof(tar->entry));
return true;
}
if (!tar_is_number(data + 124, 12) || !tar_is_number(data + 136, 12) || !tar_is_number(data + 148, 8))
return false;
tar->entry.filesize = (size_t)tar_parse_number(data + 124, 12);
tar->entry.mtime = (tar_parse_number(data + 136, 12) + 11644473600) * 10000000;
tar->entry.checksum = (uint32_t)tar_parse_number(data + 148, 8);
tar->entry.filetype = data[156];
free(tar->entry.name);
tar->entry.name = NULL;
if (tar->entry.filetype == TYPE_FILE_OLD) {
i = 100;
while (--i > 0 && data[i] == '\0');
if (data[i] == '/')
tar->entry.filetype = TYPE_DIRECTORY;
}
tar->entry.is_ustar = memcmp(data + 257, "ustar\x00""00", 8) == 0 && memcmp(data + 508, "tar\0", 4) != 0;
if (tar->entry.filesize > (size_t)-1 - tar->super.entry_offset - 2 * TAR_BLOCK_SIZE)
return false;
checksum = 0;
checksum2 = 0;
memset(data + 148, ' ', 8);
for (i = 0; i < sizeof(data); i++) {
checksum += (unsigned char)data[i];
checksum2 += (signed char)data[i];
}
if (checksum != (uint32_t)checksum2 && tar->entry.checksum == (uint32_t)checksum2) {
log("Checksum was calculated using signed data");
tar->entry.checksum = checksum;
}
return tar->entry.checksum == checksum;
}
bool tar_handle_pax_extended(ar_archive *ar)
{
ar_archive_tar *tar = (ar_archive_tar *)ar;
off64_t offset = ar->entry_offset;
size_t size = tar->entry.filesize;
char *data, *line;
data = malloc(size);
if (!data) {
log("Ignoring PAX extended header on OOM");
return ar_parse_entry(ar);
}
if (!ar_entry_uncompress(ar, data, size) || !ar_parse_entry(ar)) {
free(data);
return false;
}
if (tar->last_seen_dir > offset) {
free(data);
return true;
}
line = data;
while (line < data + size) {
char *key, *value, *ptr;
size_t length, max_size = line - data + size;
ptr = memchr(line, '=', max_size);
if (!ptr || *line < '1' || '9' < *line) {
warn("Invalid PAX extended header record @%" PRIi64, offset);
break;
}
value = ptr + 1;
*ptr = '\0';
length = (size_t)strtoul(line, &ptr, 10);
if (max_size < length || length <= (size_t)(value - line) || line[length - 1] != '\n' || *ptr != ' ') {
warn("Invalid PAX extended header record @%" PRIi64, offset);
break;
}
key = ptr + 1;
line += length;
line[-1] = '\0';
if (strcmp(key, "path") == 0) {
ptr = malloc(strlen(value) + 1);
if (ptr) {
strcpy(ptr, value);
free(tar->entry.name);
tar->entry.name = ptr;
}
}
else if (strcmp(key, "mtime") == 0)
tar->entry.mtime = (time64_t)((strtod(value, &ptr) + 11644473600) * 10000000);
else if (strcmp(key, "size") == 0)
tar->entry.filesize = (size_t)strtoul(value, &ptr, 10);
else
log("Skipping value for %s", key);
}
free(data);
tar_get_name(ar, false);
ar->entry_offset = offset;
ar->entry_size_uncompressed = tar->entry.filesize;
ar->entry_filetime = tar->entry.mtime;
return true;
}
bool tar_handle_gnu_longname(ar_archive *ar)
{
ar_archive_tar *tar = (ar_archive_tar *)ar;
off64_t offset = ar->entry_offset;
size_t size = tar->entry.filesize;
char *longname;
longname = malloc(size + 1);
if (!longname || size == (size_t)-1) {
log("Falling back to the short filename on OOM");
free(longname);
return ar_parse_entry(ar);
}
if (!ar_entry_uncompress(ar, longname, size) || !ar_parse_entry(ar)) {
free(longname);
return false;
}
if (tar->last_seen_dir > offset) {
free(longname);
return true;
}
if (tar->entry.name) {
log("Skipping GNU long filename in favor of PAX name");
free(longname);
return true;
}
longname[size] = '\0';
ar->entry_offset = offset;
/* name could be in any encoding, assume UTF-8 or whatever (DOS) */
if (ar_is_valid_utf8(longname)) {
tar->entry.name = longname;
}
else {
tar->entry.name = ar_conv_dos_to_utf8(longname);
free(longname);
}
return true;
}
const char *tar_get_name(ar_archive *ar, bool raw)
{
if (raw)
return NULL;
ar_archive_tar *tar = (ar_archive_tar *)ar;
if (!tar->entry.name) {
char *name;
if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
return NULL;
name = malloc(100 + 1);
if (!name || ar_read(ar->stream, name, 100) != 100) {
free(name);
ar_seek(ar->stream, ar->entry_offset + TAR_BLOCK_SIZE, SEEK_SET);
return NULL;
}
name[100] = '\0';
if (tar->entry.is_ustar) {
char *prefixed = malloc(256 + 1);
if (!prefixed || !ar_skip(ar->stream, 245) || ar_read(ar->stream, prefixed, 167) != 167) {
free(name);
free(prefixed);
ar_seek(ar->stream, ar->entry_offset + TAR_BLOCK_SIZE, SEEK_SET);
return NULL;
}
if (prefixed[0] != '\0') {
prefixed[156] = '\0';
strcat(prefixed, "/");
strcat(prefixed, name);
free(name);
name = prefixed;
prefixed = NULL;
}
free(prefixed);
}
else
ar_skip(ar->stream, TAR_BLOCK_SIZE - 100);
/* name could be in any encoding, assume UTF-8 or whatever (DOS) */
if (ar_is_valid_utf8(name)) {
tar->entry.name = name;
}
else {
tar->entry.name = ar_conv_dos_to_utf8(name);
free(name);
}
/* normalize path separators */
if (tar->entry.name) {
char *p = tar->entry.name;
while ((p = strchr(p, '\\')) != NULL) {
*p = '/';
}
}
}
return tar->entry.name;
}

93
tar/tar.c Normal file
View File

@@ -0,0 +1,93 @@
/* Copyright 2015 the unarr project authors (see AUTHORS file).
License: LGPLv3 */
#include "tar.h"
static void tar_close(ar_archive *ar)
{
ar_archive_tar *tar = (ar_archive_tar *)ar;
free(tar->entry.name);
}
static bool tar_parse_entry(ar_archive *ar, off64_t offset)
{
ar_archive_tar *tar = (ar_archive_tar *)ar;
if (!ar_seek(ar->stream, offset, SEEK_SET)) {
warn("Couldn't seek to offset %" PRIi64, offset);
return false;
}
if (!tar_parse_header(tar)) {
warn("Invalid tar header data @%" PRIi64, offset);
return false;
}
if (!tar->entry.checksum) {
ar->at_eof = true;
return false;
}
ar->entry_offset = offset;
ar->entry_offset_next = offset + TAR_BLOCK_SIZE + (tar->entry.filesize + TAR_BLOCK_SIZE - 1) / TAR_BLOCK_SIZE * TAR_BLOCK_SIZE;
ar->entry_size_uncompressed = tar->entry.filesize;
ar->entry_filetime = tar->entry.mtime;
tar->bytes_done = 0;
if (tar->last_seen_dir > offset)
tar->last_seen_dir = 0;
switch (tar->entry.filetype) {
case TYPE_FILE:
case TYPE_FILE_OLD:
return true;
case TYPE_DIRECTORY:
log("Skipping directory entry \"%s\"", tar_get_name(ar, false));
tar->last_seen_dir = ar->entry_offset;
return tar_parse_entry(ar, ar->entry_offset_next);
case TYPE_PAX_GLOBAL:
log("Skipping PAX global extended header record");
return tar_parse_entry(ar, ar->entry_offset_next);
case TYPE_PAX_EXTENDED:
return tar_handle_pax_extended(ar);
case TYPE_GNU_LONGNAME:
return tar_handle_gnu_longname(ar);
default:
warn("Unknown entry type '%c'", tar->entry.filetype);
return true;
}
}
static bool tar_uncompress(ar_archive *ar, void *buffer, size_t count)
{
ar_archive_tar *tar = (ar_archive_tar *)ar;
if (count > ar->entry_size_uncompressed - tar->bytes_done) {
warn("Requesting too much data (%" PRIuPTR " < %" PRIuPTR ")", ar->entry_size_uncompressed - tar->bytes_done, count);
return false;
}
if (ar_read(ar->stream, buffer, count) != count) {
warn("Unexpected EOF in stored data");
return false;
}
tar->bytes_done += count;
return true;
}
ar_archive *ar_open_tar_archive(ar_stream *stream)
{
ar_archive *ar;
ar_archive_tar *tar;
if (!ar_seek(stream, 0, SEEK_SET))
return NULL;
ar = ar_open_archive(stream, sizeof(ar_archive_tar), tar_close, tar_parse_entry, tar_get_name, tar_uncompress, NULL, 0);
if (!ar)
return NULL;
tar = (ar_archive_tar *)ar;
if (!tar_parse_header(tar) || !tar->entry.checksum) {
free(ar);
return NULL;
}
return ar;
}

46
tar/tar.h Normal file
View File

@@ -0,0 +1,46 @@
/* Copyright 2015 the unarr project authors (see AUTHORS file).
License: LGPLv3 */
#ifndef tar_tar_h
#define tar_tar_h
#include "../common/unarr-imp.h"
typedef struct ar_archive_tar_s ar_archive_tar;
/***** parse-tar *****/
#define TAR_BLOCK_SIZE 512
enum tar_filetype {
TYPE_FILE = '0', TYPE_FILE_OLD = '\0',
TYPE_HARD_LINK = '1', TYPE_SOFT_LINK = '2',
TYPE_DIRECTORY = '5',
TYPE_GNU_LONGNAME = 'L',
TYPE_PAX_GLOBAL = 'g', TYPE_PAX_EXTENDED = 'x',
};
struct tar_entry {
char *name;
size_t filesize;
time64_t mtime;
uint32_t checksum;
char filetype;
bool is_ustar;
};
bool tar_parse_header(ar_archive_tar *tar);
bool tar_handle_pax_extended(ar_archive *ar);
bool tar_handle_gnu_longname(ar_archive *ar);
const char *tar_get_name(ar_archive *ar, bool raw);
/***** tar *****/
struct ar_archive_tar_s {
ar_archive super;
struct tar_entry entry;
size_t bytes_done;
off64_t last_seen_dir;
};
#endif