Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: waterapple on Apr 23rd, 2013  |  syntax: Python  |  size: 5.74 KB  |  hits: 5  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #-------------------------------------------------------------------------------
  2. # Name:        module1
  3. # Purpose:
  4. #
  5. # Author:      new
  6. #
  7. # Created:     22/04/2013
  8. # Copyright:   (c) new 2013
  9. # Licence:     <your licence>
  10. #-------------------------------------------------------------------------------
  11. #!/usr/bin/env python
  12.  
  13. from selenium import webdriver
  14. import logging
  15. from bs4 import BeautifulSoup, SoupStrainer
  16. import re
  17. import lxml.html
  18.  
  19. # Setup logging (Before running any other code)
  20. # http://inventwithpython.com/blog/2012/04/06/stop-using-print-for-debugging-a-5-minute-quickstart-guide-to-pythons-logging-module/
  21. logger = logging.getLogger()
  22. logger.setLevel(logging.DEBUG)
  23. formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
  24. fh = logging.FileHandler('Log.txt')
  25. fh.setLevel(logging.DEBUG)
  26. fh.setFormatter(formatter)
  27. logger.addHandler(fh)
  28. ch = logging.StreamHandler()
  29. ch.setLevel(logging.DEBUG)
  30. ch.setFormatter(formatter)
  31. logger.addHandler(ch)
  32. logger.debug('Logging started.')
  33. # End logging setup
  34.  
  35. def login(username,password):
  36.     logger.debug("Logging in")
  37.     # Load login page
  38.     driver.get("http://writing.com")
  39.     # select username box
  40.     usernamebox = driver.find_element_by_name("login_username")
  41.     # enter username
  42.     usernamebox.send_keys(username)
  43.     # Select password box
  44.     passwordbox = driver.find_element_by_name("login_password")
  45.     # enter password
  46.     passwordbox.send_keys(password)
  47.     # submit form
  48.     passwordbox.submit()
  49.  
  50. def findlinks(html):
  51.     # Copied from:
  52.     # http://stackoverflow.com/questions/520031/whats-the-cleanest-way-to-extract-urls-from-a-string-using-python
  53.     url_regex = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
  54.     links = re.findall(url_regex,html, re.DOTALL)
  55.     return links
  56.  
  57. def main():
  58.     login(username,password)
  59.  
  60. if __name__ == '__main__':
  61.     # Init selenium browser
  62.     driver = webdriver.Firefox()
  63.     # Set username and password to use
  64.     username = "drymarker"
  65.     password = "notrouble"
  66.     main()
  67.  
  68.  
  69.  
  70.  
  71.  
  72.  
  73.  
  74. #def searchInteractives(searchstring,itemtype,sortmode,maxsearchpages):
  75. # Run search for items
  76. searchstring = "vore"
  77. itemtype = "Interactives"
  78. sortmode = "Newest Last"
  79. maxsearchpages = 100
  80. foundlinks = []
  81. # Load search page
  82. driver.get("http://www.writing.com/main/search?")
  83. # Set view to detailed
  84. viewdetailed = driver.find_element_by_name("lp2")
  85. viewdetailed.click()
  86. # Fill in search form
  87. searchbox = driver.find_element_by_name("search_for")
  88. searchbox.clear()
  89. searchbox.send_keys(searchstring)
  90. itemtypemenu = driver.find_element_by_name("ps_type")
  91. for typeoption in itemtypemenu.find_elements_by_tag_name("option"):
  92.     if itemtype in typeoption.text:
  93.         typeoption.click()
  94. # Run search page one
  95. searchbox.submit()
  96. # Change sort mode
  97. logger.debug("Changing sort mode to:" + sortmode)
  98. sortmenu = driver.find_element_by_name("sort_by")
  99. for sortoption in sortmenu.find_elements_by_tag_name("option"):
  100.     if sortmode in sortoption.text:
  101.         sortoption.click()
  102. nextpagebutton = driver.find_element_by_xpath('//*[@id="Content_Column_Inner"]/font/table[1]/tbody/tr/td/form/table/tbody/tr[2]/td/input[5]')
  103. nextpagebutton.click()
  104. # Iterate over searvh
  105. for searchloopcounter in xrange(maxsearchpages):
  106.     # Get page HTML
  107.     pagehtml = driver.page_source
  108.     # TODO: Run search for more pages, collecting thml for all
  109.     # Find all item links inside ceter pane
  110.     # Extract center pane
  111.     centerpanesearch = re.search(r"Content_Column_Inner.+Footer_Wrapper", pagehtml, re.IGNORECASE|re.DOTALL)
  112.     centerpanehtml = centerpanesearch.group()
  113.     # Grab links to items
  114.     pagelinks = findlinks(centerpanehtml)
  115.     # Remove anythin not an item link
  116.     for link in pagelinks:
  117.         if "view_item" in link:
  118.             foundlinks.append(link)
  119.     # Grab page number
  120.     # "Viewing page <b>54</b> of <b>53</b>"
  121.     pagenumbersearch = re.search(r"Viewing page <b>(\d+)</b> of <b>(\d+)</b>", pagehtml, re.IGNORECASE|re.DOTALL)
  122.     if pagenumbersearch:
  123.         currentpage = int(pagenumbersearch.group(1))
  124.         totalpages = int(pagenumbersearch.group(2))
  125.         logger.debug(pagenumbersearch.group(0))
  126.     # Check if there is a next page
  127.     if currentpage < totalpages:
  128.         # Load next page of results
  129.         nextpagebutton = driver.find_element_by_xpath('//*[@id="Content_Column_Inner"]/font/table[1]/tbody/tr/td/form/table/tbody/tr[2]/td/input[5]')
  130.         nextpagebutton.click()
  131.     else:
  132.         break
  133.  
  134. #def favitems(favurllist):
  135. logger.debug("Starting to add items to favs.")
  136. favurllist = foundlinks[:]
  137. logger.debug(str(favurllist))
  138. # Add items to favs
  139. for favpageurl in favurllist:
  140.     logger.debug("Loading url: "+favpageurl)
  141.     # Open item page
  142.     driver.get(favpageurl)
  143.     # Get page HTML
  144.     favpagehtml = driver.page_source
  145.     # Check if already a fav
  146.     favchecksearch = re.search(r"Item #\d+ is a favorite of yours", favpagehtml, re.IGNORECASE|re.DOTALL)
  147.     if favchecksearch:
  148.         isfav = True
  149.     else:
  150.         isfav = False
  151.     # Add to favs if needed
  152.     if not isfav:
  153.         logger.debug("Item is not faved, adding to favs.")
  154.         # Click add to favs link
  155.         favxpath = '//*[@id="Content_Column_Inner"]/font/div[3]/table/tbody/tr/td[2]/div[1]/div[1]/table/tbody/tr/td[2]/a[2]'
  156.         favbutton = driver.find_element_by_xpath(favxpath)
  157.         favbutton.click()
  158.         # Confirm fav add worked
  159.         favconfirmhtml = driver.page_source
  160.         favconfirmsearch = re.search(r"Item #\d+ is a favorite of yours", favconfirmhtml, re.IGNORECASE|re.DOTALL)
  161.         assert(favconfirmsearch)
  162.         if favconfirmsearch:
  163.             console.log("Added to favs")
  164.     else:
  165.         logger.debug("Item is already a fav")