# 2025.01.14 - [gagneux / up_gagneux] Add fichier.gagneux.info as upload / download host

# 2025.01.14 - [uwabaki] Add uwabaki.party as download host
# 2025.01.14 - [fileblade] Additional retries and handling for blocked Tor ips (until alternative)
# 2025.01.13 - [ocr_captcha] Create imagemagick OCR function for testing without tesseract
# 2025.01.13 - [anonfile, dailyuploads] Update ocr call to use tesseract function
# 2025.01.13 - [up_anonfile] Modify to use new upload url
# 2025.01.12 - [ateasystems] Update 404 Not found response
# 2025.01.11 - [mad] Update direct head response handling
# 2025.01.11 - [ranoz] Add 404 Not found handling on head
# 2025.01.09 - [ranoz] Add handling of "NEXT_NOT_FOUND" response
# 2025.01.09 - [fileblade] Fix cdn url parsing
# 2025.01.08 - [up_pixeldrain] Fix success response from pixeldrain
# 2025.01.08 - [ramsgaard / up_ramsgaard] Add data.ramsgaard.me as upload / download host
# 2025.01.08 - [euromussels / up_euromussels] Add uploads.euromussels.eu as upload / download host
# 2025.01.07 - [up_fileland] Add fileland.io as upload host
# 2025.01.07 - [up_fireget] Add fireget.com as upload host
# 2025.01.06 - [uploadhive] Update the removed / gone response detection
# 2025.01.06 - [fileblade] Add "user does not allow free downloads over 100MB" response (and warnings)
# 2025.01.06 - [desiupload] Add desiupload as download host
# 2025.01.05 - [isupload] Fix filename detection
This commit is contained in:
kittykat 2025-01-16 07:54:05 +00:00
parent 30eedaf567
commit eeb8054960
Signed by: kittykat
GPG key ID: E3F1556620F70C3C
29 changed files with 1951 additions and 634 deletions

View file

@ -4,4 +4,6 @@ How to setup tesseract-ocr traineddata:
https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata
(SHA256: 8280AED0782FE27257A68EA10FE7EF324CA0F8D85BD2FD145D1C2B560BCB66BA)
* And then extracted to ./plugins/ocr/tessdata/ folder (15,400,601 bytes)
* And then extracted to ./plugins/ocr/tessdata/ folder (15,400,601 bytes)
!! Rename "eng.traineddata" to "eng_best.traineddata"

69
plugins/ocr_captcha.sh Executable file → Normal file
View file

@ -1,6 +1,6 @@
#! Name: ocr_captcha.sh
#! Author: kittykat
#! Version: 2024.10.13
#! Version: 2025.01.14
#! Desc: Script to extract captcha from image using tesseract-ocr and imagemagick
#! Usage: Edit LoadPlugin="" line in mad.sh or mad.config
#! LoadPlugin="ocr_captcha.sh"
@ -31,14 +31,71 @@
#! * PostSuccessfulUpload(): occurs after an upload success (after upload completed ticket is created in ./downloads/).
#! * PostFailedUpload(): occurs after an upload fails definitively -- #FAIL# in the temp_upload_handler.txt
#! * PostFailRetryUpload(): occurs after an upload fails with a retry (network drop, unexpected result)
#! * DoneProcessingAllUploads: occurs after alll the files have finished processing
#! * DoneProcessingAllUploads: occurs after all the files have finished processing
#!
#!
#! CaptchaOcrImage: Uses imagemagick only to alter 4 digit horizontal captchas (WIP)
CaptchaOcrImage() {
local plugName='ocr_captcha'
local plugFunc='CaptchaOcrImage'
if [ "${DebugPluginsEnabled}" == "true" ]; then
echo -e "[${PINK}DEBUG${NC}]: Running ${PINK}$plugFunc${NC} in ${BLUE}$plugName${NC} ...${NC}"
fi
DEPENDENCIES=(convert)
for DEPENDENCY in ${DEPENDENCIES[@]} ; do
if [ -z $(which $DEPENDENCY) ] ; then
if [ "$DEPENDENCY" == "convert" ]; then
echo "imagemagick not installed. Aborting"
else
echo "$DEPENDENCY not installed. Aborting"
fi
return 1
fi
done
captcha_image_filepath="$1"
data_type="$2"
imagemagick_extra_params="$3"
local captcha_image_filename="${captcha_image_filepath##*/}"
if [ ! -f "$captcha_image_filepath" ]; then
echo -e "Image not found."
return 1
fi
local digitschars='"data:image/webp;base64,'$(base64 -w 0 $captcha_image_filepath)'"'
if grep -Eqi "NUMBERONLY" <<< "$data_type" ; then
local i e r
for i in {0..3}; do
e=$(compare -metric NCC -subimage-search $digitschars \( "$1" -crop 8x10+$((22+9*i))+8 \) null: 2>&1)
[[ $e =~ @\ ([0-9]+) ]] && r+=$((1+BASH_REMATCH[1]/8))
done
echo "$r" > "$WorkDir/.temp/ocr_final.txt"
captcha="$r"
elif grep -Eqi "ALPHAONLY" <<< "$data_type" ; then
local i e r
for i in {0..3}; do
e=$(compare -metric NCC -subimage-search $digitschars \( "$1" -crop 8x10+$((22+9*i))+8 \) null: 2>&1)
[[ $e =~ @\ ([a-zA-Z]+) ]] && r+=$((1+BASH_REMATCH[1]/8))
done
echo "$r" > "$WorkDir/.temp/ocr_final.txt"
captcha="$r"
else
local i e r
for i in {0..3}; do
e=$(compare -metric NCC -subimage-search $digitschars \( "$1" -crop 8x10+$((22+9*i))+8 \) null: 2>&1)
[[ $e =~ @\ ([0-9a-zA-Z]+) ]] && r+=$((1+BASH_REMATCH[1]/8))
done
echo "$r" > "$WorkDir/.temp/ocr_final.txt"
captcha="$r"
fi
echo -e "[CAPTCHA_CODE:${captcha}]"
}
#!
#! CaptchaOcrImageTesseract: Uses imagemagick to alter, and Tesseract OCR to process captchas
CaptchaOcrImageTesseract() {
local plugName='ocr_captcha'
local plugFunc='CaptchaOcrImageTesseract'
if [ "${DebugPluginsEnabled}" == "true" ]; then
echo -e "[${PINK}DEBUG${NC}]: Running ${PINK}$plugFunc${NC} in ${BLUE}$plugName${NC} ...${NC}"
fi
DEPENDENCIES=(tesseract convert)
for DEPENDENCY in ${DEPENDENCIES[@]} ; do
if [ -z $(which $DEPENDENCY) ] ; then
@ -53,7 +110,7 @@ CaptchaOcrImage() {
TESSERACT_CMD=$(which tesseract)
export TESSDATA_PREFIX="${ScriptDir}/plugins/ocr/tessdata"
captcha_image_filepath="$1"
tessdata_type="$2"
data_type="$2"
imagemagick_extra_params="$3"
local captcha_image_filename="${captcha_image_filepath##*/}"
if [ ! -f "$captcha_image_filepath" ]; then
@ -110,13 +167,13 @@ CaptchaOcrImage() {
elif grep -Eqi "Brightness_160" <<< "$imagemagick_extra_params" ; then
convert "$IMGtemp" -modulate 160 "$IMGtemp"
fi
if grep -Eqi "NUMBERONLY" <<< "$tessdata_type" ; then
if grep -Eqi "NUMBERONLY" <<< "$data_type" ; then
captcha=$($TESSERACT_CMD --psm 8 --oem 1 -l eng_best --dpi 70 -c tessedit_char_whitelist=0123456789 "$IMGtemp" stdout | tr -d " " | xargs)
captcha=${captcha//[!0-9]/}
elif grep -Eqi "ALPHAONLY" <<< "$tessdata_type" ; then
elif grep -Eqi "ALPHAONLY" <<< "$data_type" ; then
captcha=$($TESSERACT_CMD --psm 8 --oem 1 -l eng_best --dpi 70 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ "$IMGtemp" stdout | tr -d " " | xargs)
captcha=${captcha//[!a-zA-Z]/}
elif grep -Eqi "ALPHANUMERIC" <<< "$tessdata_type" ; then
elif grep -Eqi "ALPHANUMERIC" <<< "$data_type" ; then
captcha=$($TESSERACT_CMD --psm 8 --oem 1 -l eng_best --dpi 70 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ "$IMGtemp" stdout | tr -d " " | xargs)
captcha=${captcha//[!0-9a-zA-Z]/}
else