From ef1371a93fcd7bfb678e33fec58677f718578748 Mon Sep 17 00:00:00 2001 From: chrysos349 <chrysostom349@gmail.com> Date: Tue, 19 Sep 2023 04:11:27 +0300 Subject: [PATCH] ccextractor: revbump for tesseract-5.3.3 --- srcpkgs/ccextractor/patches/fix-ocr.patch | 106 ++++++++++++++++++++++ srcpkgs/ccextractor/template | 10 +- 2 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 srcpkgs/ccextractor/patches/fix-ocr.patch diff --git a/srcpkgs/ccextractor/patches/fix-ocr.patch b/srcpkgs/ccextractor/patches/fix-ocr.patch new file mode 100644 index 00000000000..2681c60aa41 --- /dev/null +++ b/srcpkgs/ccextractor/patches/fix-ocr.patch @@ -0,0 +1,106 @@ +--- a/src/lib_ccx/hardsubx.c ++++ b/src/lib_ccx/hardsubx.c +@@ -221,7 +221,7 @@ + char *pars_values = strdup("/dev/null"); + char *tessdata_path = NULL; + +- char *lang = options->ocrlang; ++ char *lang = (char *)options->ocrlang; + if (!lang) + lang = "eng"; // English is default language + +@@ -245,7 +245,7 @@ + + int ret = -1; + +- if (!strncmp("4.", TessVersion(), 2)) ++ if (!strncmp("4.", TessVersion(), 2) || !strncmp("5.", TessVersion(), 2)) + { + char tess_path[1024]; + if (ccx_options.ocr_oem < 0) +--- a/src/lib_ccx/ocr.c ++++ b/src/lib_ccx/ocr.c +@@ -97,36 +97,22 @@ + char *probe_tessdata_location(const char *lang) + { + int ret = 0; +- char *tessdata_dir_path = getenv("TESSDATA_PREFIX"); + +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "./"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "/usr/share/"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "/usr/local/share/"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "/usr/share/tesseract-ocr/"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; +- +- tessdata_dir_path = "/usr/share/tesseract-ocr/4.00/"; +- ret = search_language_pack(tessdata_dir_path, lang); +- if (!ret) +- return tessdata_dir_path; ++ const char *paths[] = { ++ getenv("TESSDATA_PREFIX"), ++ "./", ++ "/usr/share/", ++ "/usr/local/share/", ++ "/usr/share/tesseract-ocr/", ++ "/usr/share/tesseract-ocr/4.00/", ++ "/usr/share/tesseract-ocr/5/", ++ "/usr/share/tesseract/"}; ++ ++ for (int i = 0; i < sizeof(paths) / sizeof(paths[0]); i++) ++ { ++ if (!search_language_pack(paths[i], lang)) ++ return (char *)paths[i]; ++ } + + return NULL; + } +@@ -174,7 +160,7 @@ + char *pars_values = strdup("tess.log"); + + ctx->api = TessBaseAPICreate(); +- if (!strncmp("4.", TessVersion(), 2)) ++ if (!strncmp("4.", TessVersion(), 2) || !strncmp("5.", TessVersion(), 2)) + { + char tess_path[1024]; + snprintf(tess_path, 1024, "%s%s%s", tessdata_path, "/", "tessdata"); +@@ -331,6 +317,11 @@ + } + + BOX *crop_points = ignore_alpha_at_edge(copy->alpha, copy->data, w, h, color_pix, &color_pix_out); ++ ++ l_int32 x, y, _w, _h; ++ ++ boxGetGeometry(crop_points, &x, &y, &_w, &_h); ++ + // Converting image to grayscale for OCR to avoid issues with transparency + cpix_gs = pixConvertRGBToGray(cpix, 0.0, 0.0, 0.0); + +@@ -426,8 +417,8 @@ + { + for (int j = x1; j <= x2; j++) + { +- if (copy->data[(crop_points->y + i) * w + (crop_points->x + j)] != firstpixel) +- histogram[copy->data[(crop_points->y + i) * w + (crop_points->x + j)]]++; ++ if (copy->data[(y + i) * w + (x + j)] != firstpixel) ++ histogram[copy->data[(y + i) * w + (x + j)]]++; + } + } + /* sorted in increasing order of intensity */ diff --git a/srcpkgs/ccextractor/template b/srcpkgs/ccextractor/template index 9abcd82852b..84059ffd023 100644 --- a/srcpkgs/ccextractor/template +++ b/srcpkgs/ccextractor/template @@ -1,7 +1,7 @@ # Template file for 'ccextractor' pkgname=ccextractor version=0.93 -revision=1 +revision=2 build_wrksrc="linux" build_style=gnu-configure configure_args="--enable-ocr --enable-hardsubx" @@ -16,8 +16,14 @@ distfiles="https://github.com/CCExtractor/${pkgname}/archive/v${version}.tar.gz" checksum=0e66d3e360db1b02a88271af11313ca4c9bbda1b03728e264a44c4c9f77192e3 CFLAGS="-I${XBPS_CROSS_BASE}/usr/include/tesseract -DPNG_POWERPC_VSX_OPT=0 -fcommon" +if [ "$CROSS_BUILD" ]; then + hostmakedepends+=" tesseract-ocr-devel" +fi + pre_configure() { - sed -i -e "s/tesseract --version/tesseract-ocr --version/g" configure.ac + vsed -i configure.ac \ + -e "s/tesseract --version/tesseract-ocr --version/g" \ + -e "s/\[lept\]/[leptonica]/" ./autogen.sh }