Pastebin launched a little side project called HostCabi.net, check it out ;-)Don't like ads? PRO users don't see any ads ;-)
Guest

4chan thread downloader

By: a guest on May 2nd, 2012  |  syntax: Bash  |  size: 3.06 KB  |  hits: 96  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #!/bin/bash
  2.  
  3. if [ "$1" = "" ] || [ "$(echo "$1" | egrep "https?://boards.4chan.org/[a-z0-9]+/res/[0-9]+")" = "" ]; then
  4.         echo "Usage: $(echo "$0" | sed "s_.*/\(.\+\)_\1_g") <4chan thread url>"
  5.         exit 1
  6. fi
  7.  
  8. echo "4chan downloader"
  9.  
  10. echo ""
  11.  
  12. LOC=$(echo "$1" | sed 's_.\+/res/\([0-9]\+\)_\1_g' )
  13.  
  14. ST="static.4chan.org"
  15.  
  16. if [ "$LOC" = "" ]; then
  17.         echo "Can't determine the thread's number"
  18.         exit 1
  19. fi
  20.  
  21. echo "Downloading to $LOC"
  22.  
  23. echo ""
  24.  
  25. while [ "1" = "1" ]; do
  26.  
  27.         echo "------------------------------"
  28.  
  29.         echo ""
  30.  
  31.         echo "Downloading thread's HTML"
  32.  
  33.         if [ -s $LOC.html ]; then
  34.  
  35.                 wget -np -nd -nH -q -erobots=off "$1" -O a
  36.  
  37.                 if [ "$(wc -c a|cut -d" " -f1)" -eq "0" ]; then
  38.  
  39.                         echo "Thread has 404'd. Stopping script."
  40.  
  41.                         rm a
  42.  
  43.                         exit 0
  44.  
  45.                 fi
  46.  
  47.                 if [ "$(wc -c a|cut -d" " -f1)" -gt "$(wc -c $LOC.html|cut -d" " -f1)" ]; then
  48.  
  49.                         mv a $LOC.html
  50.  
  51.                 else
  52.  
  53.                         rm a
  54.  
  55.                 fi
  56.  
  57.         else
  58.  
  59.                 wget -np -nd -nH -q -erobots=off "$1" -O $LOC.html
  60.  
  61.                 if [ "$(wc -c $LOC.html|cut -d" " -f1)" -eq "0" ]; then
  62.  
  63.                         echo "Thread doesn't exist. Stopping script."
  64.  
  65.                         rm $LOC.html
  66.  
  67.                         exit 0
  68.  
  69.                 fi
  70.  
  71.         fi
  72.  
  73.         if [ ! -d $LOC ]; then
  74.                 mkdir $LOC
  75.         fi
  76.  
  77.         if [ ! -d $LOC/misc ]; then
  78.                 mkdir $LOC/misc
  79.         fi
  80.  
  81.         echo "Parsing HTML"
  82.  
  83.         egrep "//[0-9].thumbs.4chan.org/[a-z0-9]+/thumb/([0-9]*)s.jpg" $LOC.html -o | sed 's_^//_http://_g' > $LOC/misc/misc
  84.  
  85.         egrep "//${ST}/image/spoiler.png" $LOC.html -o | sed 's_^//_http://_g' | head -n1 >> $LOC/misc/misc
  86.  
  87.         egrep "//${ST}/image/title/[a-z]+/([0-9]*).(jpg|png|gif)" $LOC.html -o | sed 's_^//_http://_g' >> $LOC/misc/misc
  88.  
  89.         egrep "//${ST}/image/favicon(.*).ico" $LOC.html -o | sed 's_^//_http://_g' >> $LOC/misc/misc
  90.  
  91.         egrep "//${ST}/css/(.+)\.css" $LOC.html -o | sed -e 's_\.css_\.css\n_g' -e 's_//stat_\nhttp://stat_g' | grep /css/ | head -n1 >> $LOC/misc/misc
  92.  
  93.         egrep "//images.4chan.org/[a-z0-9]+/src/([0-9]*).(jpg|png|gif)" $LOC.html -o | sed 's_^//_http://_g' > $LOC/images
  94.  
  95.         echo "Resources' lists created and filled with URLs"
  96.  
  97.         echo ""
  98.  
  99.         sed -e "s_//.\.thumbs\.4chan\.org/[a-z0-9]\+/thumb/\([0-9]\+\)s\.jpg_${LOC}/misc/\1s.jpg_g" -e "s_//images\.4chan\.org/[a-z0-9]\+/src/\([0-9]\+\)\.\(jpg\|gif\|png\)_${LOC}/\1.\2_g" -e "s_//${ST}/image/title/[a-z]\+/\([0-9]\+\)\.\(jpg\|gif\|png\)_${LOC}/misc/\1.\2_g" -e "s_//${ST}/image/spoiler\.png_${LOC}/misc/spoiler.png_g" -e "s_//${ST}/image/favicon\(.*\)\.ico_${LOC}/misc/favicon\1.ico_g" -e "s_//${ST}/css/\(.\+\)\.css_${LOC}/misc/\1.css_g" $LOC.html > a
  100.  
  101.         # :a;N;$!ba;
  102.  
  103.         mv a $LOC.html
  104.  
  105.         cd $LOC
  106.  
  107.         echo "Downloading full images"
  108.  
  109.         wget -nc -q -i images
  110.  
  111.         rm images
  112.  
  113.         cd misc
  114.  
  115.         echo "Downloading miscellaneous files"
  116.  
  117.         wget -nc -q -i misc
  118.  
  119.         CSS=$(cat misc | tail -n1 | sed 's_.*/\(.\+\)_\1_g')
  120.  
  121.         sed "s_.*fade\(.*\)\.png.*_http://${ST}/image/fade\1.png_g" $CSS > misc
  122.  
  123.         wget -nc -q -i misc
  124.  
  125.         sed 's_/image/fade\(.*\)\.png_fade\1.png_g' $CSS > a
  126.  
  127.         mv a $CSS
  128.  
  129.         rm misc
  130.  
  131.         touch .nomedia
  132.  
  133.         cd ../..
  134.  
  135.         echo "Session completed and temporary files've been cleaned up"
  136.  
  137.         echo "Waiting 30 seconds before next run"
  138.  
  139.         sleep 30
  140.  
  141.         echo ""
  142. done;