tesseract-ocr: update to 5.3.3

This commit is contained in:
chrysos349 2023-09-19 04:07:50 +03:00 committed by Piraty
parent c2cfa9fdd5
commit 08f32cfe3a
7 changed files with 44 additions and 55 deletions

View file

@ -2273,7 +2273,7 @@ libhttp_parser.so.2.9 http-parser-2.9.0_1
libmaa.so.4 libmaa-1.4.2_1
libcodeblocks.so.0 codeblocks-13.12_1
libleptonica.so.6 leptonica-1.84.0_1
libtesseract.so.4 tesseract-ocr-4.0.0_1
libtesseract.so.5 tesseract-ocr-5.3.3_1
libffmpegthumbnailer.so.4 ffmpegthumbnailer-2.0.10_1
libopenraw.so.7 libopenraw-0.1.0_1
libopenrawgnome.so.7 libopenraw-0.1.0_1

View file

@ -1 +0,0 @@
tesseract-ocr

View file

@ -1,14 +0,0 @@
This repository contains language data for Tesseract Open Source
OCR Engine. All data in the repository are licensed under the Apache
License:
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.

View file

@ -0,0 +1,14 @@
--- a/configure.ac
+++ b/configure.ac
@@ -177,6 +177,11 @@
AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions])
;;
+ arm|armv7l)
+
+ AC_MSG_WARN([No compiler options for $host_cpu])
+ ;;
+
arm*)
AX_CHECK_COMPILE_FLAG([-mfpu=neon], [neon=true], [neon=false], [$WERROR])

View file

@ -1,12 +1,13 @@
--- a/src/ccutil/ocrclass.h 2019-07-07 14:34:08.000000000 +0200
+++ b/src/ccutil/ocrclass.h 2019-07-08 10:47:15.347415888 +0200
@@ -31,6 +31,9 @@
#ifdef _WIN32
#include <winsock2.h> // for timeval
#endif
--- a/include/tesseract/ocrclass.h
+++ b/include/tesseract/ocrclass.h
@@ -29,6 +29,10 @@
#include <chrono>
#include <ctime>
+#ifndef __GLIBC__
+#include <sys/time.h>
+#endif
+
namespace tesseract {
/**********************************************************************
* EANYCODE_CHAR

View file

@ -1,14 +1,15 @@
# Template file for 'tesseract-ocr'
pkgname=tesseract-ocr
version=4.1.1
revision=9
_tessdataver=4.0.0
version=5.3.3
revision=1
_tessdataver=4.1.0
create_wrksrc=yes
build_style=gnu-configure
configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include $(vopt_enable openmp)"
make_build_args="all training"
hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc"
makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel"
makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel
libarchive-devel libcurl-devel"
short_desc="Tesseract Open Source OCR engine"
maintainer="Orphaned <orphan@voidlinux.org>"
license="Apache-2.0"
@ -16,13 +17,15 @@ homepage="https://github.com/tesseract-ocr/tesseract"
distfiles="
https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz
https://github.com/tesseract-ocr/tessdata/archive/${_tessdataver}.tar.gz>tessdata-${_tessdataver}.tar.gz"
checksum="2a66ff0d8595bff8f04032165e6c936389b1e5727c3ce5a27b3e059d218db1cb
38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08"
checksum="dc4329f85f41191b2d813b71b528ba6047745813474e583ccce8795ff2ff5681
990fffb9b7a9b52dc9a2d053a9ef6852ca2b72bd8dfb22988b0b990a700fd3c7"
build_options="openmp"
build_options_default="openmp"
desc_option_openmp="Enable Open MP (gomp)"
disable_parallel_build=yes # fails to build otherwise
# Create a package for one specific language $1
pkg_lang() {
local f script lang=$1
@ -46,8 +49,8 @@ pkg_lang() {
post_extract() {
mv tesseract-${version}/* .
rm -rf tessdata-${_tessdataver}/{tessconfigs,configs,pdf.ttf}
mv tessdata-${_tessdataver}/* ${wrksrc}/tessdata
rmdir tessdata-${_tessdataver}
}
pre_configure() {
NOCONFIGURE=1 ./autogen.sh
@ -62,7 +65,6 @@ post_install() {
mv ${DESTDIR}/usr/share/man/man1/tesseract{,-ocr}.1
vdoc ChangeLog
vdoc README.md
vlicense ${FILESDIR}/COPYING LICENSE-tessdata
# Move the pseudo languges "equ" (math / equation detection) and
# "osd" (orientation and script detection) to the main package
for lang in equ osd; do
@ -79,13 +81,6 @@ tesseract-ocr-tools_package() {
vmkdir usr/share/tesseract
vmkdir usr/share/man/man1
vmkdir usr/share/man/man5
# Copy shell scripts
for f in language-specific.sh tesstrain.sh tesstrain_utils.sh; do
if [ -e ${wrksrc}/training/${f} ]; then
cp -a ${wrksrc}/training/${f} \
${PKGDESTDIR}/usr/share/tesseract
fi
done
# Move tool manual pages
for f in ambiguous_words cntraining combine_tessdata \
dawg2wordlist mftraining shapeclustering unicharambigs \
@ -99,7 +94,8 @@ tesseract-ocr-tools_package() {
}
}
tesseract-ocr-devel_package() {
depends="${sourcepkg}>=${version}_${revision}"
depends="${sourcepkg}>=${version}_${revision} leptonica-devel
libarchive-devel libcurl-devel"
short_desc+=" - development files"
pkg_install() {
vmove usr/include/tesseract
@ -129,7 +125,7 @@ tesseract-ocr-all_package() {
for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bre bul cat ceb \
ces chi_sim chi_tra chr cos cym dan deu div dzo ell eng enm epo est eus fao \
fas fil fin fra frk frm fry gla gle glg grc guj hat heb hin hrv hun hye iku ind isl ita \
ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \
ita_old jav jpn kan kat kat_old kaz khm kir kmr kor lao lat lav lit ltz mal mar \
mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \
snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \
uig ukr urd uzb uzb_cyrl vie yid yor \
@ -576,6 +572,13 @@ tesseract-ocr-kir_package() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-kmr_package() {
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Kurmanji (Kurdish - Latin Script) language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-kor_package() {
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Korean language data"
@ -583,20 +586,6 @@ tesseract-ocr-kor_package() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-kur_package() {
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Kurdish language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-kur_ara_package() {
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Kurdish (Arabic) language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-lao_package() {
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Lao language data"