In addition to Weibo, there is also WeChat
Please pay attention
WeChat public account
Shulou
2025-02-23 Update From: SLTechnology News&Howtos shulou NAV: SLTechnology News&Howtos > Development >
Share
Shulou(Shulou.com)06/03 Report--
This article mainly explains "the method of Selenium crawler login to generate Cookie", interested friends may wish to have a look. The method introduced in this paper is simple, fast and practical. Next let the editor to take you to learn "Selenium crawler login to generate Cookie" bar!
Selenium crawler login to generate Cookie
To log in to a website, in essence, the server gives the client a credential after the login is completed, which can be stored in cookie or somewhere else. The next time the client requests a web page, the client will bring this credential, and the server will think that the client has logged in. So for crawlers, generating cookies for proxy use has become a must.
Example
Install chrome and corresponding driver
Download the corresponding version of chrome deriver for chrome
Download the corresponding version of driver
Https://chromedriver.chromium.org/downloads
For example:
Generate cookies
We use chrome driver for login and cookie generation
Import os
Import time
Import zipfile
From selenium import webdriver
From selenium.common.exceptions import TimeoutException
From selenium.webdriver.common.by import By
From selenium.webdriver.support import expected_conditions as EC
From selenium.webdriver.support.ui import WebDriverWait
Class GenCookies (object):
USER_AGENT = open ('useragents.txt'). Readlines ()
# 16yun proxy configuration
PROXY_HOST = 't.16yun.cn' # proxy or host Niuniu Agent www.16yun.cn
PROXY_PORT = 31111 # port
PROXY_USER = 'USERNAME' # username
PROXY_PASS = 'PASSWORD' # password
@ classmethod
Def get_chromedriver (cls, use_proxy=False, user_agent=None):
Manifest_json = ""
{
"version": "1.0.0"
"manifest_version": 2
"name": "Chrome Proxy"
"permissions": [
"proxy"
"tabs"
"unlimitedStorage"
"storage"
"
"webRequest"
"webRequestBlocking"
]
"background": {
"scripts": ["background.js"]
}
"minimum_chrome_version": "22.0.0"
}
"
Background_js = ""
Var config = {
Mode: "fixed_servers"
Rules: {
SingleProxy: {
Scheme: "http"
Host: "% s"
Port: parseInt (% s)
}
BypassList: ["localhost"]
}
}
Chrome.proxy.settings.set ({value: config, scope: "regular"}, function () {})
Function callbackFn (details) {
Return {
AuthCredentials: {
Username: "% s"
Password: "% s"
}
}
}
Chrome.webRequest.onAuthRequired.addListener (
CallbackFn
{urls: ["]}
['blocking']
);
""% (cls.PROXY_HOST, cls.PROXY_PORT, cls.PROXY_USER, cls.PROXY_PASS)
Path = os.path.dirname (os.path.abspath (_ _ file__))
Chrome_options = webdriver.ChromeOptions ()
If use_proxy:
Pluginfile = 'proxy_auth_plugin.zip'
With zipfile.ZipFile (pluginfile,'w') as zp:
Zp.writestr ("manifest.json", manifest_json)
Zp.writestr ("background.js", background_js)
Chrome_options.add_extension (pluginfile)
If user_agent:
Chrome_options.add_argument ('--user-agent=%s'% user_agent)
Driver = webdriver.Chrome (
Os.path.join (path, 'chromedriver')
Chrome_options=chrome_options)
Return driver
Def _ _ init__ (self, username, password):
Self.url = 'https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/'
Self.browser = self.get_chromedriver (use_proxy=True, user_agent=self.USER_AGENT)
Self.wait = WebDriverWait (self.browser, 20)
Self.username = username
Self.password = password
Def open (self):
"
Open the web page and enter the user name and password and click
: return: None
"
Self.browser.delete_all_cookies ()
Self.browser.get (self.url)
Username = self.wait.until (EC.presence_of_element_located ((By.ID, 'loginName')
Password = self.wait.until (EC.presence_of_element_located ((By.ID, 'loginPassword')
Submit = self.wait.until (EC.element_to_be_clickable ((By.ID, 'loginAction')
Username.send_keys (self.username)
Password.send_keys (self.password)
Time.sleep (1)
Submit.click ()
Def password_error (self):
"
Determine if the password is wrong
: return:
"
Try:
Return WebDriverWait (self.browser, 5). Until
EC.text_to_be_present_in_element ((By.ID, 'errorMsg'),' incorrect username or password'))
Except TimeoutException:
Return False
Def get_cookies (self):
"
Get Cookies
: return:
"
Return self.browser.get_cookies ()
Def main (self):
"
Entrance
: return:
"
Self.open ()
If self.password_error ():
Return {
'status': 2
'content':' wrong username or password'
}
# if you log in successfully without a CAPTCHA
Cookies = self.get_cookies ()
Return {
'status': 1
'content': cookies
}
If _ _ name__ = ='_ _ main__':
Result = GenCookies (
Username='180000000'
Password='16yun'
). Main ()
Print (result)
Import osimport timeimport zipfilefrom selenium import webdriverfrom selenium.common.exceptions import TimeoutExceptionfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.support.ui import WebDriverWaitclass GenCookies (object): USER_AGENT = open ('useragents.txt'). Readlines () # 16yun proxy configuration PROXY_HOST =' t.16yun.cn' # proxy or host Eniuyun Agent www.16yun.cn PROXY_PORT = 31111 # port PROXY_USER = 'USERNAME' # username PROXY_PASS = 'PASSWORD' # password @ classmethod def get_chromedriver (cls Use_proxy=False, user_agent=None): manifest_json = "" {"version": "1.0.0", "manifest_version": 2, "name": "Chrome Proxy", "permissions": ["proxy", "tabs", "unlimitedStorage" "storage", "", "webRequest", "webRequestBlocking"], "background": {"scripts": ["background.js"]} "minimum_chrome_version": "22.0.0"} "" background_js = "var config = {mode:" fixed_servers ", rules: {singleProxy: {scheme:" http ", host:"% s " Port: parseInt (% s)}, bypassList: ["localhost"]}} Chrome.proxy.settings.set ({value: config, scope: "regular"}, function () {}); function callbackFn (details) {return {authCredentials: {username: "% s", password: "% s"}} } chrome.webRequest.onAuthRequired.addListener (callbackFn, {urls: [""]}, ['blocking']) ""% (cls.PROXY_HOST, cls.PROXY_PORT, cls.PROXY_USER, cls.PROXY_PASS) path = os.path.dirname (os.path.abspath (_ file__)) chrome_options = webdriver.ChromeOptions () if use_proxy: pluginfile = 'proxy_auth_plugin.zip' with zipfile.ZipFile (pluginfile) 'w') as zp: zp.writestr ("manifest.json", manifest_json) zp.writestr ("background.js", background_js) chrome_options.add_extension (pluginfile) if user_agent: chrome_options.add_argument ('-user-agent=%s'% user_agent) driver = webdriver.Chrome (os.path.join (path) 'chromedriver'), chrome_options=chrome_options) return driver def _ _ init__ (self, username, password): self.url =' https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/' self.browser = self.get_chromedriver (use_proxy=True, user_agent=self.USER_AGENT) self.wait = WebDriverWait (self.browser 20) self.username = username self.password = password def open (self): "Open the web page to enter the username and password and click: return: None" self.browser.delete_all_cookies () self.browser.get (self.url) username = self.wait.until ((By.ID) " 'loginName') password = self.wait.until (EC.presence_of_element_located ((By.ID,' loginPassword')) submit = self.wait.until (EC.element_to_be_clickable ((By.ID)) 'loginAction')) username.send_keys (self.username) password.send_keys (self.password) time.sleep (1) submit.click () def password_error (self): "determine whether the password is incorrect: return:" try: return WebDriverWait (self.browser) 5. Until (EC.text_to_be_present_in_element ((By.ID, 'errorMsg')) ) except TimeoutException: return False def get_cookies (self): "get Cookies: return:" return self.browser.get_cookies () def main (self): "" entry: return: "self. Open () if self.password_error (): return {'status': 2 Wrong 'content':' username or password'} # if you login successfully cookies = self.get_cookies () return {'status': 1,' content': cookies} if _ _ name__ ='_ _ main__': result = GenCookies (username='180000000') Password='16yun',) .main () print (result)
The useragents.txt file contains random ua.
Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.16) Gecko/20110319 Firefox/40Mozilla/5.0 (Windows NT 6.2; rv:39.0) Gecko/20100101 Firefox/39.0Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; QQBrowser/8.3.4769.400) Mozilla/5.0 (Windows NT 6.1) Rv:39.0) Gecko/20100101 Firefox/39.0 here, I believe that everyone on the "Selenium crawler login to generate Cookie method" have a deeper understanding, might as well to actual operation it! Here is the website, more related content can enter the relevant channels to inquire, follow us, continue to learn!
Welcome to subscribe "Shulou Technology Information " to get latest news, interesting things and hot topics in the IT industry, and controls the hottest and latest Internet news, technology news and IT industry trends.
Views: 0
*The comments in the above article only represent the author's personal views and do not represent the views and positions of this website. If you have more insights, please feel free to contribute and share.
Continue with the installation of the previous hadoop.First, install zookooper1. Decompress zookoope
"Every 5-10 years, there's a rare product, a really special, very unusual product that's the most un
© 2024 shulou.com SLNews company. All rights reserved.