# 2025.01.14 - [gagneux / up_gagneux] Add fichier.gagneux.info as upload / download host
# 2025.01.14 - [uwabaki] Add uwabaki.party as download host # 2025.01.14 - [fileblade] Additional retries and handling for blocked Tor ips (until alternative) # 2025.01.13 - [ocr_captcha] Create imagemagick OCR function for testing without tesseract # 2025.01.13 - [anonfile, dailyuploads] Update ocr call to use tesseract function # 2025.01.13 - [up_anonfile] Modify to use new upload url # 2025.01.12 - [ateasystems] Update 404 Not found response # 2025.01.11 - [mad] Update direct head response handling # 2025.01.11 - [ranoz] Add 404 Not found handling on head # 2025.01.09 - [ranoz] Add handling of "NEXT_NOT_FOUND" response # 2025.01.09 - [fileblade] Fix cdn url parsing # 2025.01.08 - [up_pixeldrain] Fix success response from pixeldrain # 2025.01.08 - [ramsgaard / up_ramsgaard] Add data.ramsgaard.me as upload / download host # 2025.01.08 - [euromussels / up_euromussels] Add uploads.euromussels.eu as upload / download host # 2025.01.07 - [up_fileland] Add fileland.io as upload host # 2025.01.07 - [up_fireget] Add fireget.com as upload host # 2025.01.06 - [uploadhive] Update the removed / gone response detection # 2025.01.06 - [fileblade] Add "user does not allow free downloads over 100MB" response (and warnings) # 2025.01.06 - [desiupload] Add desiupload as download host # 2025.01.05 - [isupload] Fix filename detection
This commit is contained in:
parent
30eedaf567
commit
eeb8054960
29 changed files with 1951 additions and 634 deletions
|
|
@ -4,4 +4,6 @@ How to setup tesseract-ocr traineddata:
|
|||
https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata
|
||||
(SHA256: 8280AED0782FE27257A68EA10FE7EF324CA0F8D85BD2FD145D1C2B560BCB66BA)
|
||||
|
||||
* And then extracted to ./plugins/ocr/tessdata/ folder (15,400,601 bytes)
|
||||
* And then extracted to ./plugins/ocr/tessdata/ folder (15,400,601 bytes)
|
||||
|
||||
!! Rename "eng.traineddata" to "eng_best.traineddata"
|
||||
69
plugins/ocr_captcha.sh
Executable file → Normal file
69
plugins/ocr_captcha.sh
Executable file → Normal file
|
|
@ -1,6 +1,6 @@
|
|||
#! Name: ocr_captcha.sh
|
||||
#! Author: kittykat
|
||||
#! Version: 2024.10.13
|
||||
#! Version: 2025.01.14
|
||||
#! Desc: Script to extract captcha from image using tesseract-ocr and imagemagick
|
||||
#! Usage: Edit LoadPlugin="" line in mad.sh or mad.config
|
||||
#! LoadPlugin="ocr_captcha.sh"
|
||||
|
|
@ -31,14 +31,71 @@
|
|||
#! * PostSuccessfulUpload(): occurs after an upload success (after upload completed ticket is created in ./downloads/).
|
||||
#! * PostFailedUpload(): occurs after an upload fails definitively -- #FAIL# in the temp_upload_handler.txt
|
||||
#! * PostFailRetryUpload(): occurs after an upload fails with a retry (network drop, unexpected result)
|
||||
#! * DoneProcessingAllUploads: occurs after alll the files have finished processing
|
||||
#! * DoneProcessingAllUploads: occurs after all the files have finished processing
|
||||
#!
|
||||
#!
|
||||
#! CaptchaOcrImage: Uses imagemagick only to alter 4 digit horizontal captchas (WIP)
|
||||
CaptchaOcrImage() {
|
||||
local plugName='ocr_captcha'
|
||||
local plugFunc='CaptchaOcrImage'
|
||||
if [ "${DebugPluginsEnabled}" == "true" ]; then
|
||||
echo -e "[${PINK}DEBUG${NC}]: Running ${PINK}$plugFunc${NC} in ${BLUE}$plugName${NC} ...${NC}"
|
||||
fi
|
||||
DEPENDENCIES=(convert)
|
||||
for DEPENDENCY in ${DEPENDENCIES[@]} ; do
|
||||
if [ -z $(which $DEPENDENCY) ] ; then
|
||||
if [ "$DEPENDENCY" == "convert" ]; then
|
||||
echo "imagemagick not installed. Aborting"
|
||||
else
|
||||
echo "$DEPENDENCY not installed. Aborting"
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
done
|
||||
captcha_image_filepath="$1"
|
||||
data_type="$2"
|
||||
imagemagick_extra_params="$3"
|
||||
local captcha_image_filename="${captcha_image_filepath##*/}"
|
||||
if [ ! -f "$captcha_image_filepath" ]; then
|
||||
echo -e "Image not found."
|
||||
return 1
|
||||
fi
|
||||
local digitschars='"data:image/webp;base64,'$(base64 -w 0 $captcha_image_filepath)'"'
|
||||
if grep -Eqi "NUMBERONLY" <<< "$data_type" ; then
|
||||
local i e r
|
||||
for i in {0..3}; do
|
||||
e=$(compare -metric NCC -subimage-search $digitschars \( "$1" -crop 8x10+$((22+9*i))+8 \) null: 2>&1)
|
||||
[[ $e =~ @\ ([0-9]+) ]] && r+=$((1+BASH_REMATCH[1]/8))
|
||||
done
|
||||
echo "$r" > "$WorkDir/.temp/ocr_final.txt"
|
||||
captcha="$r"
|
||||
elif grep -Eqi "ALPHAONLY" <<< "$data_type" ; then
|
||||
local i e r
|
||||
for i in {0..3}; do
|
||||
e=$(compare -metric NCC -subimage-search $digitschars \( "$1" -crop 8x10+$((22+9*i))+8 \) null: 2>&1)
|
||||
[[ $e =~ @\ ([a-zA-Z]+) ]] && r+=$((1+BASH_REMATCH[1]/8))
|
||||
done
|
||||
echo "$r" > "$WorkDir/.temp/ocr_final.txt"
|
||||
captcha="$r"
|
||||
else
|
||||
local i e r
|
||||
for i in {0..3}; do
|
||||
e=$(compare -metric NCC -subimage-search $digitschars \( "$1" -crop 8x10+$((22+9*i))+8 \) null: 2>&1)
|
||||
[[ $e =~ @\ ([0-9a-zA-Z]+) ]] && r+=$((1+BASH_REMATCH[1]/8))
|
||||
done
|
||||
echo "$r" > "$WorkDir/.temp/ocr_final.txt"
|
||||
captcha="$r"
|
||||
fi
|
||||
echo -e "[CAPTCHA_CODE:${captcha}]"
|
||||
}
|
||||
#!
|
||||
#! CaptchaOcrImageTesseract: Uses imagemagick to alter, and Tesseract OCR to process captchas
|
||||
CaptchaOcrImageTesseract() {
|
||||
local plugName='ocr_captcha'
|
||||
local plugFunc='CaptchaOcrImageTesseract'
|
||||
if [ "${DebugPluginsEnabled}" == "true" ]; then
|
||||
echo -e "[${PINK}DEBUG${NC}]: Running ${PINK}$plugFunc${NC} in ${BLUE}$plugName${NC} ...${NC}"
|
||||
fi
|
||||
DEPENDENCIES=(tesseract convert)
|
||||
for DEPENDENCY in ${DEPENDENCIES[@]} ; do
|
||||
if [ -z $(which $DEPENDENCY) ] ; then
|
||||
|
|
@ -53,7 +110,7 @@ CaptchaOcrImage() {
|
|||
TESSERACT_CMD=$(which tesseract)
|
||||
export TESSDATA_PREFIX="${ScriptDir}/plugins/ocr/tessdata"
|
||||
captcha_image_filepath="$1"
|
||||
tessdata_type="$2"
|
||||
data_type="$2"
|
||||
imagemagick_extra_params="$3"
|
||||
local captcha_image_filename="${captcha_image_filepath##*/}"
|
||||
if [ ! -f "$captcha_image_filepath" ]; then
|
||||
|
|
@ -110,13 +167,13 @@ CaptchaOcrImage() {
|
|||
elif grep -Eqi "Brightness_160" <<< "$imagemagick_extra_params" ; then
|
||||
convert "$IMGtemp" -modulate 160 "$IMGtemp"
|
||||
fi
|
||||
if grep -Eqi "NUMBERONLY" <<< "$tessdata_type" ; then
|
||||
if grep -Eqi "NUMBERONLY" <<< "$data_type" ; then
|
||||
captcha=$($TESSERACT_CMD --psm 8 --oem 1 -l eng_best --dpi 70 -c tessedit_char_whitelist=0123456789 "$IMGtemp" stdout | tr -d " " | xargs)
|
||||
captcha=${captcha//[!0-9]/}
|
||||
elif grep -Eqi "ALPHAONLY" <<< "$tessdata_type" ; then
|
||||
elif grep -Eqi "ALPHAONLY" <<< "$data_type" ; then
|
||||
captcha=$($TESSERACT_CMD --psm 8 --oem 1 -l eng_best --dpi 70 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ "$IMGtemp" stdout | tr -d " " | xargs)
|
||||
captcha=${captcha//[!a-zA-Z]/}
|
||||
elif grep -Eqi "ALPHANUMERIC" <<< "$tessdata_type" ; then
|
||||
elif grep -Eqi "ALPHANUMERIC" <<< "$data_type" ; then
|
||||
captcha=$($TESSERACT_CMD --psm 8 --oem 1 -l eng_best --dpi 70 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ "$IMGtemp" stdout | tr -d " " | xargs)
|
||||
captcha=${captcha//[!0-9a-zA-Z]/}
|
||||
else
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue