
Untitled
By:
gingeredhorse on
Mar 24th, 2014 | syntax:
Python | size: 1.56 KB | hits: 43 | expires: Never
from urllib import urlopen
import re
import sys
channel = raw_input('Enter vimeo channel: ')
pageNum = raw_input('Enter number of follower pages to scrape:')
pageNum = int(pageNum)
adjList = open(channel+'.csv', 'w') #create csv
for i in range(1, pageNum + 1):
print >> sys.stderr, 'Scraping page '+str(i)+'...'
url = urlopen('http://vimeo.com/channels/'+channel+'/followers/page:'+str(i)+'/sort:datefollow')
htmltext = url.read()
pattern = re.compile('a href="/user(.+?)"')
userIds = re.findall(pattern, htmltext)
for i in range(len(userIds)):
#standard request - http://vimeo.com/api/v2/username/request.output
apiRequest = urlopen('http://vimeo.com/api/v2/user'+userIds[i]+'/channels.json')
userJsonData = apiRequest.read()
pattern = re.compile(',"name":"(.+?)"')
channelNames = re.findall(pattern, userJsonData)
for j in range(len(channelNames)): #remove commas and whitespace
channelNames[j] = channelNames[j].replace(' ', '_')
channelNames[j] = channelNames[j].replace(',', '')
#Write userId and channel to csv using a comma as delimiter
for j in range(len(channelNames)):
try:
adjList.write('user'+userIds[i]+ ', '+ channelNames[j]+'\n')
except BaseException, e:
print >> sys.stderr, 'Error:', str(e)
adjList.close()
print >> sys.stderr, 'Done writing. '+channel+'.csv saved to script folder.'