這篇文章主要介紹了python實現自動登錄人人網并采集信息的方法,涉及Python模擬登陸及正則匹配的相關技巧,需要的朋友可以參考下
本文實例講述了python實現自動登錄人人網并采集信息的方法。分享給大家供大家參考。具體實現方法如下:
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- import sys
- import re
- import urllib2
- import urllib
- import cookielib
- class Renren(object):
- def __init__(self):
- self.name = self.pwd = self.content = self.domain = self.origURL = ''
- self.operate = ''#登錄進去的操作對象
- self.cj = cookielib.LWPCookieJar()
- try:
- self.cj.revert('./renren.coockie')
- except Exception,e:
- print e
- self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
- urllib2.install_opener(self.opener)
- def setinfo(self,username,password,domain,origURL):
- '''設置用戶登錄信息'''
- self.name = username
- self.pwd = password
- self.domain = domain
- self.origURL = origURL
- def login(self):
- '''登錄人人網'''
- params = {
- 'domain':self.domain,
- 'origURL':self.origURL,
- 'email':self.name,
- 'password':self.pwd}
- print 'login.......'
- req = urllib2.Request(
- 'http://www.renren.com/PLogin.do',
- urllib.urlencode(params)
- )
- self.file=urllib2.urlopen(req).read()
- newsfeed = open('news.html','w')
- try:
- newsfeed.write(self.file)
- except Exception, e:
- newsfeed.close()
- self.operate = self.opener.open(req)
- print type(self.operate)
- print self.operate.geturl()
- if self.operate.geturl():
- print 'Logged on successfully!'
- self.cj.save('./renren.coockie')
- self.__viewnewinfo()
- else:
- print 'Logged on error'
- def __viewnewinfo(self):
- '''查看好友的更新狀態'''
- self.__caiinfo()
- def __caiinfo(self):
- '''采集信息'''
- h3patten = re.compile('<article>(.*?)</article>')#匹配范圍
- apatten = re.compile('<h3.+>(.+)</h3>:')#匹配作者
- cpatten = re.compile('</a>(.+)/s')#匹配內容
- content = h3patten.findall(self.file)
- print len(content)
- infocontent = self.operate.readlines()
- print type(infocontent)
- print 'friend newinfo:'
- for i in infocontent:
- content = h3patten.findall(i)
- if len(content) != 0:
- for m in content:
- username = apatten.findall(m)
- info = cpatten.findall(m)
- if len(username) !=0:
- print username[0],'說:',info[0]
- print '----------------------------------------------'
- else:
- continue
- ren = Renren()
- username = 'username'#你的人人網的帳號
- password = 'password'#你的人人網的密碼
- domain = 'www.renren.com'#人人網的地址
- origURL = 'http://www.renren.com/home'#人人網登錄以后的地址
- ren.setinfo(username,password,domain,origURL)
- ren.login()
希望本文所述對大家的Python序設計有所幫助。
新聞熱點
疑難解答