def copy_over_if_duplicate(settings,submission_id,output_folder):
"""Check if there is already a copy of the submission downloaded in the download path.
If there is, copy the existing version to the suppplied output location then return True
If no copy can be found, return False"""
assert_is_string(submission_id)
# Generate expected filename pattern
expected_submission_filename = "*"+submission_id+".*"
# Generate search pattern
glob_string = os.path.join(settings.output_folder, "*", expected_submission_filename)
# Use glob to check for existing files matching the expected pattern
glob_matches = glob.glob(glob_string)
# Check if any matches, if no matches then return False
if len(glob_matches) == 0:
return False
else:
# If there is an existing version:
for glob_match in glob_matches:
# If there is an existing version in the output path, nothing needs to be copied
if output_folder in glob_match:
return False
else:
# Copy over submission file and metadata JSON
logging.info("Trying to copy from previous download: "+glob_match)
# Check output folders exist
# Build expected paths
match_dir, match_filename = os.path.split(glob_match)
expected_json_input_filename = submission_id+".json"
expected_json_input_folder = os.path.join(match_dir, "json")
expected_json_input_location = os.path.join(expected_json_input_folder, expected_json_input_filename)
json_output_folder = os.path.join(output_folder, "json")
json_output_filename = submission_id+".json"
json_output_path = os.path.join(json_output_folder, json_output_filename)
submission_output_path = os.path.join(output_folder,match_filename)
# Redownload if a file is missing
if not os.path.exists(glob_match):
logging.debug("Submission file to copy is missing.")
return False
if not os.path.exists(expected_json_input_location):
logging.debug("JSON file to copy is missing.")
return False
# Ensure output path exists
if not os.path.exists(json_output_folder):
os.makedirs(json_output_folder)
if not os.path.exists(output_folder):
os.makedirs(output_folder)
logging.info("Copying files for submission: "+submission_id+" from "+match_dir+" to "+output_folder)
# Copy over files
try:
# Copy submission file
shutil.copy2(glob_match, submission_output_path)
# Copy JSON
shutil.copy2(expected_json_input_location, json_output_path)
return True
except IOError, err:
logging.error("Error copying files!")
logging.exception(err)
return False
def download_submission(settings,search_tag,submission_id):
"""Download a submission from Derpibooru"""
assert_is_string(search_tag)
assert_is_string(submission_id)
setup_browser()
#logging.debug("Downloading submission:"+submission_id)
# Build JSON paths
json_output_filename = submission_id+".json"
json_output_path = os.path.join(settings.output_folder,search_tag,"json",json_output_filename)
# Check if download can be skipped
# Check if JSON exists
if os.path.exists(json_output_path):
logging.debug("JSON for this submission already exists, skipping.")
return
# Check for dupliactes in download folder
output_folder = os.path.join(settings.output_folder,search_tag)
if copy_over_if_duplicate(settings, submission_id, output_folder):
return
# Build JSON URL
json_url = "https://derpibooru.org/"+submission_id+".json?key="+settings.api_key
# Load JSON URL
json_page = get(json_url)
if not json_page:
return
# Convert JSON to dict
json_dict = decode_json(json_page)
# Check if submission is deleted
if check_if_deleted_submission(json_dict):
logging.debug(json_page)
return
# Extract needed info from JSON
image_url = json_dict["image"]
image_filename = json_dict["file_name"]
image_file_ext = json_dict["original_format"]
# Build image output filenames
if settings.output_long_filenames:
image_output_filename = settings.filename_prefix+image_filename+"."+image_file_ext
else:
image_output_filename = settings.filename_prefix+submission_id+"."+image_file_ext
image_output_path = os.path.join(output_folder,image_output_filename)
# Load image data
authenticated_image_url = image_url+"?key="+settings.api_key
logging.debug("Loading submission image: "+authenticated_image_url)
image_data = get(authenticated_image_url)
if not image_data:
return
# Image should always be bigger than this, if it isn't we got a bad file
if len(image_data) < 100:
logging.error("Image data was too small! "+str(image_data))
return
# Save image
save_file(image_output_path, image_data, True)
# Save JSON
save_file(json_output_path, json_page, True)
logging.debug("Download successful")