mirror of
https://github.com/void-linux/void-packages.git
synced 2025-04-16 06:07:00 +02:00
ccextractor: revbump for tesseract-5.3.3
This commit is contained in:
parent
22791d7127
commit
ef1371a93f
2 changed files with 114 additions and 2 deletions
106
srcpkgs/ccextractor/patches/fix-ocr.patch
Normal file
106
srcpkgs/ccextractor/patches/fix-ocr.patch
Normal file
|
@ -0,0 +1,106 @@
|
|||
--- a/src/lib_ccx/hardsubx.c
|
||||
+++ b/src/lib_ccx/hardsubx.c
|
||||
@@ -221,7 +221,7 @@
|
||||
char *pars_values = strdup("/dev/null");
|
||||
char *tessdata_path = NULL;
|
||||
|
||||
- char *lang = options->ocrlang;
|
||||
+ char *lang = (char *)options->ocrlang;
|
||||
if (!lang)
|
||||
lang = "eng"; // English is default language
|
||||
|
||||
@@ -245,7 +245,7 @@
|
||||
|
||||
int ret = -1;
|
||||
|
||||
- if (!strncmp("4.", TessVersion(), 2))
|
||||
+ if (!strncmp("4.", TessVersion(), 2) || !strncmp("5.", TessVersion(), 2))
|
||||
{
|
||||
char tess_path[1024];
|
||||
if (ccx_options.ocr_oem < 0)
|
||||
--- a/src/lib_ccx/ocr.c
|
||||
+++ b/src/lib_ccx/ocr.c
|
||||
@@ -97,36 +97,22 @@
|
||||
char *probe_tessdata_location(const char *lang)
|
||||
{
|
||||
int ret = 0;
|
||||
- char *tessdata_dir_path = getenv("TESSDATA_PREFIX");
|
||||
|
||||
- ret = search_language_pack(tessdata_dir_path, lang);
|
||||
- if (!ret)
|
||||
- return tessdata_dir_path;
|
||||
-
|
||||
- tessdata_dir_path = "./";
|
||||
- ret = search_language_pack(tessdata_dir_path, lang);
|
||||
- if (!ret)
|
||||
- return tessdata_dir_path;
|
||||
-
|
||||
- tessdata_dir_path = "/usr/share/";
|
||||
- ret = search_language_pack(tessdata_dir_path, lang);
|
||||
- if (!ret)
|
||||
- return tessdata_dir_path;
|
||||
-
|
||||
- tessdata_dir_path = "/usr/local/share/";
|
||||
- ret = search_language_pack(tessdata_dir_path, lang);
|
||||
- if (!ret)
|
||||
- return tessdata_dir_path;
|
||||
-
|
||||
- tessdata_dir_path = "/usr/share/tesseract-ocr/";
|
||||
- ret = search_language_pack(tessdata_dir_path, lang);
|
||||
- if (!ret)
|
||||
- return tessdata_dir_path;
|
||||
-
|
||||
- tessdata_dir_path = "/usr/share/tesseract-ocr/4.00/";
|
||||
- ret = search_language_pack(tessdata_dir_path, lang);
|
||||
- if (!ret)
|
||||
- return tessdata_dir_path;
|
||||
+ const char *paths[] = {
|
||||
+ getenv("TESSDATA_PREFIX"),
|
||||
+ "./",
|
||||
+ "/usr/share/",
|
||||
+ "/usr/local/share/",
|
||||
+ "/usr/share/tesseract-ocr/",
|
||||
+ "/usr/share/tesseract-ocr/4.00/",
|
||||
+ "/usr/share/tesseract-ocr/5/",
|
||||
+ "/usr/share/tesseract/"};
|
||||
+
|
||||
+ for (int i = 0; i < sizeof(paths) / sizeof(paths[0]); i++)
|
||||
+ {
|
||||
+ if (!search_language_pack(paths[i], lang))
|
||||
+ return (char *)paths[i];
|
||||
+ }
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@@ -174,7 +160,7 @@
|
||||
char *pars_values = strdup("tess.log");
|
||||
|
||||
ctx->api = TessBaseAPICreate();
|
||||
- if (!strncmp("4.", TessVersion(), 2))
|
||||
+ if (!strncmp("4.", TessVersion(), 2) || !strncmp("5.", TessVersion(), 2))
|
||||
{
|
||||
char tess_path[1024];
|
||||
snprintf(tess_path, 1024, "%s%s%s", tessdata_path, "/", "tessdata");
|
||||
@@ -331,6 +317,11 @@
|
||||
}
|
||||
|
||||
BOX *crop_points = ignore_alpha_at_edge(copy->alpha, copy->data, w, h, color_pix, &color_pix_out);
|
||||
+
|
||||
+ l_int32 x, y, _w, _h;
|
||||
+
|
||||
+ boxGetGeometry(crop_points, &x, &y, &_w, &_h);
|
||||
+
|
||||
// Converting image to grayscale for OCR to avoid issues with transparency
|
||||
cpix_gs = pixConvertRGBToGray(cpix, 0.0, 0.0, 0.0);
|
||||
|
||||
@@ -426,8 +417,8 @@
|
||||
{
|
||||
for (int j = x1; j <= x2; j++)
|
||||
{
|
||||
- if (copy->data[(crop_points->y + i) * w + (crop_points->x + j)] != firstpixel)
|
||||
- histogram[copy->data[(crop_points->y + i) * w + (crop_points->x + j)]]++;
|
||||
+ if (copy->data[(y + i) * w + (x + j)] != firstpixel)
|
||||
+ histogram[copy->data[(y + i) * w + (x + j)]]++;
|
||||
}
|
||||
}
|
||||
/* sorted in increasing order of intensity */
|
|
@ -1,7 +1,7 @@
|
|||
# Template file for 'ccextractor'
|
||||
pkgname=ccextractor
|
||||
version=0.93
|
||||
revision=1
|
||||
revision=2
|
||||
build_wrksrc="linux"
|
||||
build_style=gnu-configure
|
||||
configure_args="--enable-ocr --enable-hardsubx"
|
||||
|
@ -16,8 +16,14 @@ distfiles="https://github.com/CCExtractor/${pkgname}/archive/v${version}.tar.gz"
|
|||
checksum=0e66d3e360db1b02a88271af11313ca4c9bbda1b03728e264a44c4c9f77192e3
|
||||
CFLAGS="-I${XBPS_CROSS_BASE}/usr/include/tesseract -DPNG_POWERPC_VSX_OPT=0 -fcommon"
|
||||
|
||||
if [ "$CROSS_BUILD" ]; then
|
||||
hostmakedepends+=" tesseract-ocr-devel"
|
||||
fi
|
||||
|
||||
pre_configure() {
|
||||
sed -i -e "s/tesseract --version/tesseract-ocr --version/g" configure.ac
|
||||
vsed -i configure.ac \
|
||||
-e "s/tesseract --version/tesseract-ocr --version/g" \
|
||||
-e "s/\[lept\]/[leptonica]/"
|
||||
./autogen.sh
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue