Title: [Bash] archive anon's beauty
Author: Anonymous
Pastebin link: http://pastebin.com/9gB0jcFS
First Edit: Thursday 28th of June 2012 08:46:23 AM CDT
Last Edit: Thursday 28th of June 2012 08:46:23 AM CDT
#!/bin/sh
if [ "$1" = "" ] || [ "$(echo "$1" | egrep "https?://boards.4chan.org/[a-z0-9]+/res/[0-9]+")" = "" ]; then
echo "Usage: `basename $0` <4chan thread url>"
exit 0
fi
echo "4chan downloader"
LOC=$(echo "$1" | sed 's_.\+/res/\([^#]\+\).*_\1_g')
if [ "$LOC" = "" ]; then
echo "Can't determine the thread's number"
exit 0
fi
ST="static.4chan.org"
if [ -s /system/build.prop ]; then
for path in $(echo "$PATH" | sed "s_:_\n_g"); do
if [ -s ${path}/busybox ]; then
alias echo="${path}/busybox echo -ne"
alias read="${path}/busybox sh read -n 1 Q"
N="\n"
R="\r"
break
fi
done
else
alias echo="echo -ne"
N="\r"
R="\n"
fi
thejob () {
if [ ! -d $LOC ]; then
mkdir $LOC
fi
if [ ! -d $LOC/misc ]; then
mkdir $LOC/misc
fi
egrep "//.\.thumbs\.4chan\.org/[a-z0-9]+/thumb/[0-9]*s\.jpg" $LOC.html -o | sed 's_^//_http://_g' > $LOC/misc/misc
egrep "//${ST}/image/spoiler-?[a-z0-9]*\.png" $LOC.html -o | sed 's_^//_http://_g' | head -n1 >> $LOC/misc/misc
egrep "//${ST}/image/favicon-?[a-z]*\.ico" $LOC.html -o | sed 's_^//_http://_g' >> $LOC/misc/misc
egrep "//${ST}/css/[a-z]+\.[0-9]+\.css" $LOC.html -o | sed -e 's_\.css_\.css\n_g' -e 's_//stat_\nhttp://stat_g' | grep /css/ | head -n1 >> $LOC/misc/misc
egrep "//${ST}/image/title/[a-z]+/[0-9a-z]+\.(jpg|png|gif)" $LOC.html -o | sed 's_^//_http://_g' > $LOC/misc/logo
egrep "//images\.4chan\.org/[a-z0-9]+/src/[0-9]*\.(jpg|png|gif)" $LOC.html -o | sed 's_^//_http://_g' > $LOC/images
sed -e "s_//.\.thumbs\.4chan\.org/[a-z0-9]\+/thumb/\([0-9]\+\)s\.jpg_${LOC}/misc/\1s.jpg_g" -e "s_//images\.4chan\.org/[a-z0-9]\+/src/\([0-9]\+\)\.\(jpg\|gif\|png\)_${LOC}/\1.\2_g" -e "s_//${ST}/image/title/[a-z]\+/[0-9a-z]\+\.\(jpg\|gif\|png\)_${LOC}/misc/logo.\1_g" -e "s_//${ST}/image/spoiler\(-\?[0-9a-z]*\)\.png_${LOC}/misc/spoiler\1.png_g" -e "s_//${ST}/image/favicon\(-\?[a-z]*\)\.ico_${LOC}/misc/favicon\1.ico_g" -e "s_//${ST}/css/\([a-z0-9\.]\+\)\.css_${LOC}/misc/\1.css_g" -e "s_\(>>${LOC}\)_\1 (OP)_g" -e 's_\(>>[0-9]\+\)_\1.html\2 (Cross-thread)_g' $LOC.html > a
# :a;N;$!ba;
mv a $LOC.html
cd $LOC
wget -nc -q -i images
rm images
cd misc
if [ "$(ls|grep .css)" != "" ]; then
rm "$(ls|grep .css)"
fi
wget -nc -q -i misc
CSS=$(cat misc | tail -n1 | sed 's_.*/\([a-z]\+\.[0-9]\+\.css\)_\1_')
sed "s_.*fade\(-\?[a-z]*\)\.png.*_http://${ST}/image/fade\1.png_g" $CSS > misc
wget -nc -q -i misc
sed 's_/image/fade\(-\?[a-z]*\)\.png_fade\1.png_g' $CSS > a
mv a $CSS
if [ "$(ls|grep logo.)" != "" ]; then
rm "$(ls|grep logo.)"
fi
wget -q -i logo -O "logo.$(sed "s_\._\n_g" logo|tail -n1)"
rm misc logo
touch .nomedia
cd ../..
}
echo "${N}Downloading to $LOC${N}"
echo "${N}"
echo "------------------------------${N}"
while [ "1" = "1" ]; do
trap 'EXIT=1' 1 2 3 15
if [ -s $LOC.html ]; then
wget -np -nd -nH -q -erobots=off "$1" -O a
if [ "$(wc -c a|cut -d" " -f1)" -eq "0" ]; then
echo "Thread has 404'd or 4chan is down. Stopping script${N}"
rm a
exit 0
fi
if [ "$(wc -c a|cut -d" " -f1)" -gt "$(wc -c $LOC.html|cut -d" " -f1)" ]; then
mv a $LOC.html
thejob
else
rm a
fi
else
wget -np -nd -nH -q -erobots=off "$1" -O $LOC.html
if [ "$(wc -c $LOC.html|cut -d" " -f1)" -eq "0" ]; then
echo "Thread doesn't exist or 4chan is down. Stopping script${N}"
rm $LOC.html
exit 0
fi
thejob
fi
trap - 1 2 3 15
if [ "$EXIT" = "1" ] || [ "$SLP" = "1" ]; then
echo "Session completed. Exiting ${N}"
exit 0
fi
echo "OK"
sleep 10
echo "\b\b \b\b"
done;