summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Müller <ulm@gentoo.org>2021-06-29 10:09:10 +0200
committerLars Wendler <polynomial-c@gentoo.org>2021-06-29 10:29:33 +0200
commit68e24256a0449b2a27e44879f95171780f21526b (patch)
tree9ca16e8c59f84a003b228d75bc685c9507bdf1e6 /sys-apps/file
parentdev-go/gopls: Initial import (diff)
downloadgentoo-68e24256a0449b2a27e44879f95171780f21526b.tar.gz
gentoo-68e24256a0449b2a27e44879f95171780f21526b.tar.bz2
gentoo-68e24256a0449b2a27e44879f95171780f21526b.zip
sys-apps/file: Fix character count heuristic
This restores behaviour of file-5.39, i.e. correctly identifies small text files as text/plain rather than application/octet-stream again. Patch taken from upstream and (trivially) backported to 5.40. Closes: https://bugs.gentoo.org/799188 Bug: https://bugs.astron.com/view.php?id=261 Package-Manager: Portage-3.0.20, Repoman-3.0.3 Signed-off-by: Ulrich Müller <ulm@gentoo.org> Signed-off-by: Lars Wendler <polynomial-c@gentoo.org>
Diffstat (limited to 'sys-apps/file')
-rw-r--r--sys-apps/file/file-5.40-r3.ebuild147
-rw-r--r--sys-apps/file/files/file-5.40-revert-char-count.patch49
2 files changed, 196 insertions, 0 deletions
diff --git a/sys-apps/file/file-5.40-r3.ebuild b/sys-apps/file/file-5.40-r3.ebuild
new file mode 100644
index 000000000000..7fd5ac3b71b9
--- /dev/null
+++ b/sys-apps/file/file-5.40-r3.ebuild
@@ -0,0 +1,147 @@
+# Copyright 1999-2021 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=7
+
+PYTHON_COMPAT=( python3_{8..10} )
+DISTUTILS_OPTIONAL=1
+
+inherit distutils-r1 libtool toolchain-funcs multilib-minimal
+
+if [[ ${PV} == "9999" ]] ; then
+ EGIT_REPO_URI="https://github.com/glensc/file.git"
+ inherit autotools git-r3
+else
+ SRC_URI="ftp://ftp.astron.com/pub/file/${P}.tar.gz"
+ KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86 ~x64-cygwin ~amd64-linux ~x86-linux ~ppc-macos ~x64-macos ~sparc-solaris ~sparc64-solaris ~x64-solaris ~x86-solaris"
+fi
+
+DESCRIPTION="identify a file's format by scanning binary data for patterns"
+HOMEPAGE="https://www.darwinsys.com/file/"
+
+LICENSE="BSD-2"
+SLOT="0"
+IUSE="bzip2 lzma python seccomp static-libs zlib"
+REQUIRED_USE="python? ( ${PYTHON_REQUIRED_USE} )"
+
+DEPEND="
+ bzip2? ( app-arch/bzip2[${MULTILIB_USEDEP}] )
+ lzma? ( app-arch/xz-utils[${MULTILIB_USEDEP}] )
+ python? (
+ ${PYTHON_DEPS}
+ dev-python/setuptools[${PYTHON_USEDEP}]
+ )
+ zlib? ( >=sys-libs/zlib-1.2.8-r1[${MULTILIB_USEDEP}] )"
+RDEPEND="${DEPEND}
+ python? ( !dev-python/python-magic )
+ seccomp? ( sys-libs/libseccomp[${MULTILIB_USEDEP}] )"
+
+PATCHES=(
+ "${FILESDIR}/file-5.39-portage-sandbox.patch" #713710 #728978
+ "${FILESDIR}/file-5.40-xz_magic.patch" #784773
+ "${FILESDIR}/file-5.40-seccomp-faccessat.patch"
+ "${FILESDIR}/file-5.40-seccomp-fstatat64.patch" #784857
+ "${FILESDIR}/file-5.40-revert-char-count.patch" #799188
+)
+
+src_prepare() {
+ default
+
+ if [[ ${PV} == 9999 ]] ; then
+ eautoreconf
+ fi
+
+ elibtoolize
+
+ # don't let python README kill main README #60043
+ mv python/README.md python/README.python.md || die
+ sed 's@README.md@README.python.md@' -i python/setup.py || die #662090
+}
+
+multilib_src_configure() {
+ local myeconfargs=(
+ --enable-fsect-man5
+ $(use_enable bzip2 bzlib)
+ $(use_enable lzma xzlib)
+ $(use_enable seccomp libseccomp)
+ $(use_enable static-libs static)
+ $(use_enable zlib)
+ )
+ econf "${myeconfargs[@]}"
+}
+
+build_src_configure() {
+ local myeconfargs=(
+ --disable-shared
+ --disable-libseccomp
+ --disable-bzlib
+ --disable-xzlib
+ --disable-zlib
+ )
+ tc-env_build econf "${myeconfargs[@]}"
+}
+
+need_build_file() {
+ # when cross-compiling, we need to build up our own file
+ # because people often don't keep matching host/target
+ # file versions #362941
+ tc-is-cross-compiler && ! has_version -b "~${CATEGORY}/${P}"
+}
+
+src_configure() {
+ local ECONF_SOURCE="${S}"
+
+ if need_build_file ; then
+ mkdir -p "${WORKDIR}"/build || die
+ cd "${WORKDIR}"/build || die
+ build_src_configure
+ fi
+
+ multilib-minimal_src_configure
+}
+
+multilib_src_compile() {
+ if multilib_is_native_abi ; then
+ emake
+ else
+ cd src || die
+ emake magic.h #586444
+ emake libmagic.la
+ fi
+}
+
+src_compile() {
+ if need_build_file ; then
+ emake -C "${WORKDIR}"/build/src magic.h #586444
+ emake -C "${WORKDIR}"/build/src file
+ local -x PATH="${WORKDIR}/build/src:${PATH}"
+ fi
+ multilib-minimal_src_compile
+
+ if use python ; then
+ cd python || die
+ distutils-r1_src_compile
+ fi
+}
+
+multilib_src_install() {
+ if multilib_is_native_abi ; then
+ default
+ else
+ emake -C src install-{nodist_includeHEADERS,libLTLIBRARIES} DESTDIR="${D}"
+ fi
+}
+
+multilib_src_install_all() {
+ dodoc ChangeLog MAINT README
+
+ # Required for `file -C`
+ insinto /usr/share/misc/magic
+ doins -r magic/Magdir/*
+
+ if use python ; then
+ cd python || die
+ distutils-r1_src_install
+ fi
+ find "${ED}" -type f -name "*.la" -delete || die
+}
diff --git a/sys-apps/file/files/file-5.40-revert-char-count.patch b/sys-apps/file/files/file-5.40-revert-char-count.patch
new file mode 100644
index 000000000000..9d6f5be60fac
--- /dev/null
+++ b/sys-apps/file/files/file-5.40-revert-char-count.patch
@@ -0,0 +1,49 @@
+From c07e242e766242a44ff720c149b1bdd4924ec247 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Tue, 27 Apr 2021 19:37:14 +0000
+Subject: [PATCH] Revert the fix for PR/180. It lead to PR/261. Using character
+ count heuristics ends up with confusing behavior, the following should not be
+ producing different results: echo -n xx | ./file - echo -n xy |
+ ./file -
+
+---
+[patch backported to 5.40 release -- ulm]
+
+ src/encoding.c | 15 ++-------------
+ 1 file changed, 2 insertions(+), 13 deletions(-)
+
+diff --git a/src/encoding.c b/src/encoding.c
+index 31d4d125..3647a481 100644
+--- a/src/encoding.c
++++ b/src/encoding.c
+@@ -265,9 +265,7 @@ private int \
+ looks_ ## NAME(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf, \
+ size_t *ulen) \
+ { \
+- size_t i, u; \
+- unsigned char dist[256]; \
+- memset(dist, 0, sizeof(dist)); \
++ size_t i; \
+ \
+ *ulen = 0; \
+ \
+@@ -278,16 +276,7 @@ looks_ ## NAME(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf, \
+ return 0; \
+ \
+ ubuf[(*ulen)++] = buf[i]; \
+- dist[buf[i]]++; \
+ } \
+- u = 0; \
+- for (i = 0; i < __arraycount(dist); i++) { \
+- if (dist[i]) \
+- u++; \
+- } \
+- if (u < 3) \
+- return 0; \
+-\
+ return 1; \
+ }
+
+--
+2.32.0
+