mad/hosts/ateasystems.sh
kittykat 83d17967d6
# 2025.02.25 - [mad + allhosts] Re-engineer BadHtml scan to only scan the first 10kb of downloaded partials
# 2025.02.24 - [pixeldrain] Update "The file is IP limited" response handling retry
# 2025.02.22 - [blackcloud_onion] Add bcloud.onion download handling (url fixing)
# 2025.02.21 - [anonfile] Update cdn link parsing to handle new subdomains
# 2025.02.21 - [anonfile] Add download limit reached response handling
# 2025.02.21 - [anonfile] Update file info retrieval (head no longer responds)
# 2025.02.21 - [sendspace] Add sendspace.com as download host
# 2025.02.21 - [oshi / up_oshi] Revert /nossl/ changes for oshi.at (clearnet)
2025-02-26 12:00:57 +00:00

265 lines
12 KiB
Bash

#! Name: ateasystems.sh
#! Author: kittykat
#! Version: 2025.01.11
#! Desc: Add support for downloading and processing of urls for a new host
#! Usage: Copy this file into the ./${ScriptDir}/hosts/ folder
#!
#!
#! ------------ REQUIRED SECTION ---------------
#! @[UPDATE] HostAndDomainRegexes: This string is loaded into mad.sh and allows dynamic handling of new url data
#! Format: '/HostCode/HostNick/HostFuncPrefix:HostDomainRegex@'
#! HostCode: <aUniqueCodeForHost> (ie. 'fh' for filehaus -- cannot be used by other hosts)
#! HostNick: What is displayed throughout MAD output (ie. 'filehaus' -- "urls.txt has 10 filehaus.." will be displayed)
#! HostFuncPrefix: <aUniqueStringThatMustPrefixHostFunctions> (ie. 'fh' -- fh_DownloadFile(), fh_FetchFileInfo() .. )
#! * Note: Must begin with a letter a-z (functions beginning with numbers are no bueno)
#! HostDomainRegex: The regex used to verify matching urls
HostCode='atea'
HostNick='atea'
HostFuncPrefix='atea'
HostUrls='share.ateasystems.com'
HostDomainRegex='^(http|https)://(.*\.)?share\.ateasystems\.com/share/'
#!
#! !! DO NOT UPDATE OR REMOVE !!
#! This merges the Required HostAndDomainRegexes into mad.sh
ListHostAndDomainRegexes=${ListHostAndDomainRegexes}'/'${HostCode}'/'${HostNick}'/'${HostFuncPrefix}'/'${HostUrls}':'${HostDomainRegex}'@'
#!
#!
#! ------------ (1) Host Main Download Function --------------- #
#!
#! @REQUIRED: Host Main Download function
#! Must be named specifically as such:
#! <HostFuncPrefix>_DownloadFile()
atea_DownloadFile() {
local remote_url=${1}
local file_url=${1}
local filecnt=${2}
warnAndRetryUnknownError=false
exitDownloadError=false
exitDownloadNotAvailable=false
fileAlreadyDone=false
download_inflight_path="${WorkDir}/.inflight/"
mkdir -p "$download_inflight_path"
completed_location="${WorkDir}/downloads/"
tor_identity="${RANDOM}"
finalAttempt="false"
for ((z=0; z<=$MaxUrlRetries; z++)); do
if [[ $z -eq $MaxUrlRetries ]] ; then
finalAttempt="true"
fi
CLEANSTRING=${remote_url//[^a-zA-Z0-9]/}
trap "rm -f ${WorkDir}/.flocks/${CLEANSTRING}; echo ""; tput cnorm; exit" 0 1 2 3 6 15
if atea_FetchFileInfo $finalAttempt && atea_GetFile "${filecnt}" $((z+1)) $finalAttempt ; then
return 0
elif [[ $z -lt $MaxUrlRetries ]]; then
if [[ "${fileAlreadyDone}" == "true" ]] ; then
break
fi
if [[ "${warnAndRetryUnknownError}" == "true" ]] ; then
if [[ "${DebugAllEnabled}" == "true" ]] ; then
debugHtml "${remote_url##*/}" "error" "Retry due to an unknown issue: attempt #$((z+1)) of ${MaxUrlRetries}"
fi
fi
if [[ "${exitDownloadError}" == "true" || "${exitDownloadNotAvailable}" == "true" ]] ; then
if [[ "${DebugAllEnabled}" == "true" ]] ; then
debugHtml "${remote_url##*/}" "error" "Exit due to unrecoverable issue"
fi
rm -f "${WorkDir}/.flocks/${remote_url//[^a-zA-Z0-9]/}"
break
fi
echo -e "\n${YELLOW}A recoverable error occurred, retry attempt $((z+1))/${MaxUrlRetries}${NC}"
sleep 3
fi
done
rm -f "${WorkDir}/.flocks/${remote_url//[^a-zA-Z0-9]/}"
}
#!
#! ------------- (2) Fetch File Info Function ----------------- #
#!
atea_FetchFileInfo() {
finalAttempt=$1
maxfetchretries=6
echo -e "${GREEN}# Fetching post info…${NC}"
for ((i=1; i<=$maxfetchretries; i++)); do
mkdir -p "${WorkDir}/.temp"
printf " ."
tor_identity="${RANDOM}"
CLEANSTRING=${remote_url//[^a-zA-Z0-9]/}
trap "rm -f ${WorkDir}/.flocks/${CLEANSTRING}; echo ""; tput cnorm; exit" 0 1 2 3 6 15
response=$(tor_curl_request --insecure -L -s "$remote_url")
if [[ "${DebugAllEnabled}" == "true" ]] ; then
debugHtml "${remote_url##*/}" "atea_fetch$i" "${response}"
fi
if [[ -z $response ]] ; then
if [[ $i == $maxfetchretries ]] ; then
printf "\\n"
echo -e "${RED}| Failed to extract download link [1]${NC}"
warnAndRetryUnknownError=true
if [[ "${finalAttempt}" == "true" ]] ; then
failedRetryDownload "${remote_url}" "Failed to extract download link [1]" ""
fi
return 1
else
continue
fi
fi
if grep -Eqi 'File Not Found|404 Not Found|was not found on this server|No such file with this filename|File was deleted' <<< "$response"; then
printf "\\n"
echo -e "${RED}| The file was not found. It could be deleted or expired.${NC}"
exitDownloadError=true
removedDownload "${remote_url}"
return 1
fi
if grep -Eqi 'name="code" class="captcha_code"' <<< "$response"; then
printf "\\n"
echo -e "${GREEN}| Captcha found.${NC}"
post_sc=$(grep -oP '(?<=input type="hidden" name="sc" value=").*(?=">.*$)' <<< "$response")
codeline=$(grep -oP -m 1 '(?<=<tr><td><div style='"'"'width:80px;height:26px;).*(?=</div></td>)' <<< "$response")
pval1=$(grep -oP -m 1 '<span style='"'"'position:absolute;padding-left:[0-9]px;padding-top:[0-9]+px;'"'"'>\K.*?(?=</span>)' <<< "$codeline" )
if ((pval1 <= 0)); then
pval1=$(grep -oP -m 1 '<span style='"'"'position:absolute;padding-left:1[0-9]px;padding-top:[0-9]+px;'"'"'>\K.*?(?=</span>)' <<< "$codeline" )
fi
pval2=$(grep -oP -m 1 '<span style='"'"'position:absolute;padding-left:2[0-9]px;padding-top:[0-9]+px;'"'"'>\K.*?(?=</span>)' <<< "$codeline" )
pval3=$(grep -oP -m 1 '<span style='"'"'position:absolute;padding-left:4[0-9]px;padding-top:[0-9]+px;'"'"'>\K.*?(?=</span>)' <<< "$codeline" )
pval4=$(grep -oP -m 1 '<span style='"'"'position:absolute;padding-left:6[0-9]px;padding-top:[0-9]+px;'"'"'>\K.*?(?=</span>)' <<< "$codeline" )
val1=$((pval1-0)); val2=$((pval2-0)); val3=$((pval3-0)); val4=$((pval4-0))
captcha_code="${val1}${val2}${val3}${val4}"
if grep -Eqi '-' <<< "$captcha_code"; then
if [[ $i == $maxfetchretries ]] ; then
rm -f "${atea_cookie_jar}";
printf "\\n"
echo -e "${RED}| Bad captcha code [2]${NC}"
warnAndRetryUnknownError=true
if [[ "${finalAttempt}" == "true" ]] ; then
failedRetryDownload "${remote_url}" "Bad captcha code [2]" ""
fi
return 1
else
tor_identity="${RANDOM}"
continue
fi
fi
fi
if grep -Eqi 'input type="hidden" name="id" value="' <<< "$response"; then
echo -e "${GREEN}| Post link found.${NC}"
post_action=$(grep -oP '(?<=Form name="F1" method="POST" action=").*(?=" onSubmit.*$)' <<< "$response")
post_act=$(grep -oP '(?<=input type="hidden" name="act" value=").*(?=">.*$)' <<< "$response")
post_id=$(grep -oP '(?<=input type="hidden" name="id" value=").*(?=">.*$)' <<< "$response")
post_fname=$(grep -oP '(?<=input type="hidden" name="fname" value=").*(?=">.*$)' <<< "$response")
post_rand=$(grep -oP '(?<=input type="hidden" name="rand" value=").*(?=">.*$)' <<< "$response")
post_fname=$(urlencode_literal_grouped_case "${post_fname}")
post_action="${post_action//[$'\t\r\n']}"
if [[ "$filename_override" == "" ]]; then
filename=$(urlencode_literal_grouped_case "${post_fname}")
fi
else
if [[ $i == $maxfetchretries ]] ; then
printf "\\n"
echo -e "${RED}| Failed to extract download link [2]${NC}"
warnAndRetryUnknownError=true
if [[ "${finalAttempt}" == "true" ]] ; then
failedRetryDownload "${remote_url}" "Failed to extract download link [2]" ""
fi
return 1
else
continue
fi
fi
if [[ -z "$post_action" ]] || [[ -z "$post_act" ]] || [[ -z "$post_id" ]] || [[ -z "$post_sc" ]] || \
[[ -z "$post_fname" ]] || [[ -z "$post_rand" ]] || [[ -z "$captcha_code" ]] ; then
if [[ $i == $maxfetchretries ]] ; then
printf "\\n"
echo -e "${RED}| Failed to extract download link [3]${NC}"
warnAndRetryUnknownError=true
if [[ "${finalAttempt}" == "true" ]] ; then
failedRetryDownload "${remote_url}" "Failed to extract download link [3]" ""
fi
return 1
else
continue
fi
else
break
fi
done
form_data="act=${post_act}&id=${post_id}&fname=${post_fname}&rand=${post_rand}&sc=${post_sc}&code=${captcha_code}&btn=Download+File"
echo -e "| Captcha countdown (3s)…"
sleep 3s
touch "${WorkDir}/.flocks/${remote_url//[^a-zA-Z0-9]/}"
if [[ ! "$filename_override" == "" ]] ; then
filename="$filename_override"
fi
filename=$(sanitize_file_or_folder_name "${filename}")
printf "\\n"
echo -e "${YELLOW}| File name:${NC}\t\"${filename}\""
echo -e "${YELLOW}| File size:${NC}\tUnknown${NC}"
file_path="${download_inflight_path}${filename}"
flockDownload="${WorkDir}/.flocks/${filename//[^a-zA-Z0-9\.\_\-]/}.flock"
if CheckDownloadExists "$remote_url" "$MoveToFolder" "$filecnt" "$filename" "$file_path" "$completed_location" ; then
return 1
fi
echo "${remote_url//[^a-zA-Z0-9]/}" > $flockDownload
}
#!
#! ----------- (3) Fetch File / Download File Function --------------- #
#!
atea_GetFile() {
echo -e "${GREEN}# Downloading…\t${BLUE}(No Resume)${NC}"
echo -e "${YELLOW}| File path:${NC}\t./.inflight/${filename}\n"
fileCnt=$1
retryCnt=$2
finalAttempt=$3
flockDownload="${WorkDir}/.flocks/${filename//[^a-zA-Z0-9\.\_\-]/}.flock"
GetRandomUA
if [[ -f "$file_path" ]]; then
rm -f "file_path"
fi
CLEANSTRING=${remote_url//[^a-zA-Z0-9]/}
trap "rm -f ${WorkDir}/.flocks/${CLEANSTRING}; rm -f $flockDownload; echo ""; tput cnorm; exit" 0 1 2 3 6 15
if [[ "${UseTorCurlImpersonate}" == "true" ]]; then
if [[ "${RateMonitorEnabled}" == "true" ]]; then
tor_curl_request --insecure \
--speed-limit $DownloadSpeedMin --speed-time $DownloadTimeoutInterval \
--data "$form_data" "$post_action" \
--output "$file_path" --output "$file_path"
else
tor_curl_request --insecure \
--data "$form_data" "$post_action" \
--output "$file_path" --output "$file_path"
fi
else
if [[ "${RateMonitorEnabled}" == "true" ]]; then
tor_curl_request --insecure \
--speed-limit $DownloadSpeedMin --speed-time $DownloadTimeoutInterval \
-H "User-Agent: $RandomUA" \
--data "$form_data" "$post_action" \
--output "$file_path" --output "$file_path"
else
tor_curl_request --insecure \
-H "User-Agent: $RandomUA" \
--data "$form_data" "$post_action" \
--output "$file_path" --output "$file_path"
fi
fi
if CheckNoHtml "$remote_url" "$filename" "$file_path" "$((received_file_size - 0))" ; then
containsHtml=false
else
containsHtml=true
fi
if [[ "$containsHtml" == "true" ]]; then
if [[ -f "$file_path" ]] ; then
rm -rf "$file_path"
fi
echo -e "\n${YELLOW}Bad node / HTML found:${NC} tainted partial removed..."
rm -f $flockDownload;
if [[ "${finalAttempt}" == "true" ]] ; then
droppedSizeBadDownload "${remote_url}" "${filename}" "${received_file_size}"
fi
return 1
fi
rm -f "$flockDownload";
ProcessCompletedDownload "$remote_url" "$MoveToFolder" "$filecnt" "$filename" "$file_size_bytes" "$completed_location" "$file_path"
return 0
}
#!
#! --------------- Host Extra Functions ------------------- #
#!