微博的的模拟登陆是比较坑的,看了网上很多大神的帖子,自己又看了微博的登陆时的json数据:1、发现登陆时在输入账号时用chrome可以看到会有一个prelogin之类的网址,网址后面会有大串的随机数。我测试了下,发现在没有随机数的情况下的网址也能得到所需要的severtime、nonce、等几个数据。2、通过chrome查看json数据就可以看到用户名和密码的加密方式,再找了网上大神的资料就可以得到用户名su、和密码sp。再把数据post 上去就可以得到一个重定向的微博登陆网址。3、将这个网址用正则表达式提取出来,再带上cookie数据就可以登陆了。然后你想干什么就干什
# _*_coding:utf-8 _*_import base64import urllibimport urllib2import reimport rsaimport cookielibclass Weibo(object): def __init__(self, username, password): self.user = base64.b64encode(username) self.pwd = password @property def get_pre_url_values(self): values_dict = {} su = self.user pre_url = 'https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=' \ + str(su[:-1]) + '%3D&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.18)' header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36' } request = urllib2.Request(pre_url, headers=header) html = urllib2.urlopen(request).read().decode('utf-8') p = re.compile(r'"servertime":(.*?),') values_dict['servertime'] = p.search(html).group(1).strip('\"') p1 = re.compile(r'"pcid":(.*?),') values_dict['pcid'] = p1.search(html).group(1).strip('\"') p2 = re.compile(r'"nonce":(.*?),') values_dict['nonce'] = p2.search(html).group(1).strip('\"') p3 = re.compile(r'"pubkey":(.*?),') values_dict['pubkey'] = p3.search(html).group(1).strip('\"') p4 = re.compile(r'"rsakv":(.*?),') values_dict['rsakv'] = p4.search(html).group(1).strip('\"') return values_dict def get_password(self, blog_values): ''' 这个函数是微博的json数据和网上的大神方法的出来的(抄来的)^_^ ''' rsapubkey = int(blog_values['pubkey'], 16) key = rsa.PublicKey(rsapubkey, 65537) massage = str(blog_values['servertime']) + '\t' + str(blog_values['nonce']) + '\n' + str(self.pwd) password = rsa.encrypt(massage, key) sp = password.encode('hex') return sp def login_weibo(self, blog_values, sp): values = { 'entry': "weibo", 'gateway': '1', 'from': '', 'savestate': '7', 'userticket': '1', 'pagerefer': "", 'cfrom': '1', 'vsnf': '1', 'su': self.user, 'service': 'miniblog', 'servertime': blog_values['servertime'], 'nonce': blog_values['nonce'], 'pwencode': 'rsa2', 'rsakv': blog_values['rsakv'], 'sp': sp, 'sr': "1440*900", 'encoding': 'UTF-8', 'prelt': '503', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack', 'returntype': 'META' } header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36' } url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)' '''获取cookie信息 ''' cj = cookielib.CookieJar() cj_support = urllib2.HTTPCookieProcessor(cj) opener = urllib2.build_opener(cj_support) data = urllib.urlencode(values) try: response = urllib2.Request(url, headers=header, data=data) html = opener.open(response).read().decode('gbk') except Exception, e: print e.message p = re.compile(r'location\.replace\(\'(.*?)\'\)') url = p.search(html).group(1) try: url_request = urllib2.Request(url) response_url = opener.open(url_request) page = response_url.read().decode('utf-8') p2 = re.compile(r'"userdomain":"(.*?)"') dom = p2.search(page).group(1) login_url = 'http://weibo.com/' + dom request_login_url = urllib2.Request(login_url) response_login_url = opener.open(request_login_url) per_html = response_login_url.read().decode('utf-8') except Exception: per_html = '登陆失败' return per_htmlif __name__ == '__main__': wbobj = Weibo('用户名', '密码') sp = wbobj.get_password(wbobj.get_pre_url_values) html = wbobj.login_weibo(blog_values=wbobj.get_pre_url_values, sp=sp) print html
么,比如:把女神的照片全要了、自动查看女神的微博并将邮件发给你,下次再来弄这个。最近失眠的厉害,快点找到工作吧!!