Don't like ads? PRO users don't see any ads ;-)

Untitled

By: waterapple on Apr 23rd, 2013 | syntax: Python | size: 5.74 KB | hits: 5 | expires: Never

download | raw | embed | report abuse | print

Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)

#-------------------------------------------------------------------------------
# Name: module1
# Purpose:
#
# Author: new
#
# Created: 22/04/2013
# Copyright: (c) new 2013
# Licence: <your licence>
#-------------------------------------------------------------------------------
#!/usr/bin/env python
from selenium import webdriver
import logging
from bs4 import BeautifulSoup, SoupStrainer
import re
import lxml.html
# Setup logging (Before running any other code)
# http://inventwithpython.com/blog/2012/04/06/stop-using-print-for-debugging-a-5-minute-quickstart-guide-to-pythons-logging-module/
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
fh = logging.FileHandler('Log.txt')
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
logger.addHandler(ch)
logger.debug('Logging started.')
# End logging setup
def login(username,password):
logger.debug("Logging in")
# Load login page
driver.get("http://writing.com")
# select username box
usernamebox = driver.find_element_by_name("login_username")
# enter username
usernamebox.send_keys(username)
# Select password box
passwordbox = driver.find_element_by_name("login_password")
# enter password
passwordbox.send_keys(password)
# submit form
passwordbox.submit()
def findlinks(html):
# Copied from:
# http://stackoverflow.com/questions/520031/whats-the-cleanest-way-to-extract-urls-from-a-string-using-python
url_regex = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
links = re.findall(url_regex,html, re.DOTALL)
return links
def main():
login(username,password)
if __name__ == '__main__':
# Init selenium browser
driver = webdriver.Firefox()
# Set username and password to use
username = "drymarker"
password = "notrouble"
main()
#def searchInteractives(searchstring,itemtype,sortmode,maxsearchpages):
# Run search for items
searchstring = "vore"
itemtype = "Interactives"
sortmode = "Newest Last"
maxsearchpages = 100
foundlinks = []
# Load search page
driver.get("http://www.writing.com/main/search?")
# Set view to detailed
viewdetailed = driver.find_element_by_name("lp2")
viewdetailed.click()
# Fill in search form
searchbox = driver.find_element_by_name("search_for")
searchbox.clear()
searchbox.send_keys(searchstring)
itemtypemenu = driver.find_element_by_name("ps_type")
for typeoption in itemtypemenu.find_elements_by_tag_name("option"):
if itemtype in typeoption.text:
typeoption.click()
# Run search page one
searchbox.submit()
# Change sort mode
logger.debug("Changing sort mode to:" + sortmode)
sortmenu = driver.find_element_by_name("sort_by")
for sortoption in sortmenu.find_elements_by_tag_name("option"):
if sortmode in sortoption.text:
sortoption.click()
nextpagebutton = driver.find_element_by_xpath('//*[@id="Content_Column_Inner"]/font/table[1]/tbody/tr/td/form/table/tbody/tr[2]/td/input[5]')
nextpagebutton.click()
# Iterate over searvh
for searchloopcounter in xrange(maxsearchpages):
# Get page HTML
pagehtml = driver.page_source
# TODO: Run search for more pages, collecting thml for all
# Find all item links inside ceter pane
# Extract center pane
centerpanesearch = re.search(r"Content_Column_Inner.+Footer_Wrapper", pagehtml, re.IGNORECASE|re.DOTALL)
centerpanehtml = centerpanesearch.group()
# Grab links to items
pagelinks = findlinks(centerpanehtml)
# Remove anythin not an item link
for link in pagelinks:
if "view_item" in link:
foundlinks.append(link)
# Grab page number
# "Viewing page 54 of 53"
pagenumbersearch = re.search(r"Viewing page (\d+) of (\d+)", pagehtml, re.IGNORECASE|re.DOTALL)
if pagenumbersearch:
currentpage = int(pagenumbersearch.group(1))
totalpages = int(pagenumbersearch.group(2))
logger.debug(pagenumbersearch.group(0))
# Check if there is a next page
if currentpage < totalpages:
# Load next page of results
nextpagebutton = driver.find_element_by_xpath('//*[@id="Content_Column_Inner"]/font/table[1]/tbody/tr/td/form/table/tbody/tr[2]/td/input[5]')
nextpagebutton.click()
else:
break
#def favitems(favurllist):
logger.debug("Starting to add items to favs.")
favurllist = foundlinks[:]
logger.debug(str(favurllist))
# Add items to favs
for favpageurl in favurllist:
logger.debug("Loading url: "+favpageurl)
# Open item page
driver.get(favpageurl)
# Get page HTML
favpagehtml = driver.page_source
# Check if already a fav
favchecksearch = re.search(r"Item #\d+ is a favorite of yours", favpagehtml, re.IGNORECASE|re.DOTALL)
if favchecksearch:
isfav = True
else:
isfav = False
# Add to favs if needed
if not isfav:
logger.debug("Item is not faved, adding to favs.")
# Click add to favs link
favxpath = '//*[@id="Content_Column_Inner"]/font/div[3]/table/tbody/tr/td[2]/div[1]/div[1]/table/tbody/tr/td[2]/a[2]'
favbutton = driver.find_element_by_xpath(favxpath)
favbutton.click()
# Confirm fav add worked
favconfirmhtml = driver.page_source
favconfirmsearch = re.search(r"Item #\d+ is a favorite of yours", favconfirmhtml, re.IGNORECASE|re.DOTALL)
assert(favconfirmsearch)
if favconfirmsearch:
console.log("Added to favs")
else:
logger.debug("Item is already a fav")

create a new version of this paste RAW Paste Data

#-------------------------------------------------------------------------------
# Name:        module1
# Purpose:
#
# Author:      new
#
# Created:     22/04/2013
# Copyright:   (c) new 2013
# Licence:     <your licence>
#-------------------------------------------------------------------------------
#!/usr/bin/env python

from selenium import webdriver
import logging
from bs4 import BeautifulSoup, SoupStrainer
import re
import lxml.html

# Setup logging (Before running any other code)
# http://inventwithpython.com/blog/2012/04/06/stop-using-print-for-debugging-a-5-minute-quickstart-guide-to-pythons-logging-module/
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
fh = logging.FileHandler('Log.txt')
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logger.addHandler(fh)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
logger.addHandler(ch)
logger.debug('Logging started.')
# End logging setup

def login(username,password):
    logger.debug("Logging in")
    # Load login page
    driver.get("http://writing.com")
    # select username box
    usernamebox = driver.find_element_by_name("login_username")
    # enter username
    usernamebox.send_keys(username)
    # Select password box
    passwordbox = driver.find_element_by_name("login_password")
    # enter password
    passwordbox.send_keys(password)
    # submit form
    passwordbox.submit()

def findlinks(html):
    # Copied from:
    # http://stackoverflow.com/questions/520031/whats-the-cleanest-way-to-extract-urls-from-a-string-using-python
    url_regex = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    links = re.findall(url_regex,html, re.DOTALL)
    return links

def main():
    login(username,password)

if __name__ == '__main__':
    # Init selenium browser
    driver = webdriver.Firefox()
    # Set username and password to use
    username = "drymarker"
    password = "notrouble"
    main()







#def searchInteractives(searchstring,itemtype,sortmode,maxsearchpages):
# Run search for items
searchstring = "vore"
itemtype = "Interactives"
sortmode = "Newest Last"
maxsearchpages = 100
foundlinks = []
# Load search page
driver.get("http://www.writing.com/main/search?")
# Set view to detailed
viewdetailed = driver.find_element_by_name("lp2")
viewdetailed.click()
# Fill in search form
searchbox = driver.find_element_by_name("search_for")
searchbox.clear()
searchbox.send_keys(searchstring)
itemtypemenu = driver.find_element_by_name("ps_type")
for typeoption in itemtypemenu.find_elements_by_tag_name("option"):
    if itemtype in typeoption.text:
        typeoption.click()
# Run search page one
searchbox.submit()
# Change sort mode
logger.debug("Changing sort mode to:" + sortmode)
sortmenu = driver.find_element_by_name("sort_by")
for sortoption in sortmenu.find_elements_by_tag_name("option"):
    if sortmode in sortoption.text:
        sortoption.click()
nextpagebutton = driver.find_element_by_xpath('//*[@id="Content_Column_Inner"]/font/table[1]/tbody/tr/td/form/table/tbody/tr[2]/td/input[5]')
nextpagebutton.click()
# Iterate over searvh
for searchloopcounter in xrange(maxsearchpages):
    # Get page HTML
    pagehtml = driver.page_source
    # TODO: Run search for more pages, collecting thml for all
    # Find all item links inside ceter pane
    # Extract center pane
    centerpanesearch = re.search(r"Content_Column_Inner.+Footer_Wrapper", pagehtml, re.IGNORECASE|re.DOTALL)
    centerpanehtml = centerpanesearch.group()
    # Grab links to items
    pagelinks = findlinks(centerpanehtml)
    # Remove anythin not an item link
    for link in pagelinks:
        if "view_item" in link:
            foundlinks.append(link)
    # Grab page number
    # "Viewing page <b>54</b> of <b>53</b>"
    pagenumbersearch = re.search(r"Viewing page <b>(\d+)</b> of <b>(\d+)</b>", pagehtml, re.IGNORECASE|re.DOTALL)
    if pagenumbersearch:
        currentpage = int(pagenumbersearch.group(1))
        totalpages = int(pagenumbersearch.group(2))
        logger.debug(pagenumbersearch.group(0))
    # Check if there is a next page
    if currentpage < totalpages:
        # Load next page of results
        nextpagebutton = driver.find_element_by_xpath('//*[@id="Content_Column_Inner"]/font/table[1]/tbody/tr/td/form/table/tbody/tr[2]/td/input[5]')
        nextpagebutton.click()
    else:
        break

#def favitems(favurllist):
logger.debug("Starting to add items to favs.")
favurllist = foundlinks[:]
logger.debug(str(favurllist))
# Add items to favs
for favpageurl in favurllist:
    logger.debug("Loading url: "+favpageurl)
    # Open item page
    driver.get(favpageurl)
    # Get page HTML
    favpagehtml = driver.page_source
    # Check if already a fav
    favchecksearch = re.search(r"Item #\d+ is a favorite of yours", favpagehtml, re.IGNORECASE|re.DOTALL)
    if favchecksearch:
        isfav = True
    else:
        isfav = False
    # Add to favs if needed
    if not isfav:
        logger.debug("Item is not faved, adding to favs.")
        # Click add to favs link
        favxpath = '//*[@id="Content_Column_Inner"]/font/div[3]/table/tbody/tr/td[2]/div[1]/div[1]/table/tbody/tr/td[2]/a[2]'
        favbutton = driver.find_element_by_xpath(favxpath)
        favbutton.click()
        # Confirm fav add worked
        favconfirmhtml = driver.page_source
        favconfirmsearch = re.search(r"Item #\d+ is a favorite of yours", favconfirmhtml, re.IGNORECASE|re.DOTALL)
        assert(favconfirmsearch)
        if favconfirmsearch:
            console.log("Added to favs")
    else:
        logger.debug("Item is already a fav")

Pastebin.com Tools & Applications

iPhone/iPad

Windows

Firefox

Chrome

WebOS

Android

Mac

Opera

Click.to

UNIX

WinPhone