Selenium是一个自动化测试工具,可以驱动浏览器器执行特定的动作,如点击,下拉等。同时还可以获取浏览器当前呈现页面的源代码,可见即可爬。
Nuget
1 2 3 4
| Selenium.Chrome.WebDriver Selenium.RC Selenium.Support Selenium.WebDriver
|
爬页面
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| ChromeOptions options = new ChromeOptions();
options.AddArgument("--headless");
options.AddArgument("--disable-gpu");
options.AddUserProfilePreference("credentials_enable_service", false); options.BinaryLocation = webClientUrl; IWebDriver driver = new ChromeDriver(options);
driver.Navigate().GoToUrl(LoingUrl);
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(2000); driver.FindElement(By.Id("txtUsername")).SendKeys(userName); driver.FindElement(By.Id("txtPassword")).SendKeys(userPassword); driver.FindElement(By.Id("imgBtnSignIn")).Click(); driver.Navigate().GoToUrl(listUrl); var _trselector = driver.FindElements(By.CssSelector("这里是你需要获取数据的Clss或者ID对应的名称")); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(_trselector .GetAttribute("innerHTML"));
|
爬WebApi
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| public void GetData(){ CookieContainer httpCookie = DoLogin(UserName, Password); if (httpCookie == null) return; string result_CIS = GetCISList(CISURL, httpCookie); } private CookieContainer DoLogin(string username, string password, string LoginURL){ var client = new RestClient(LoginURL); var request = new RestRequest(Method.POST); request.AddParameter( "application/x-www-form-urlencoded", $"UserName={username}&Password={password}&OfficeCode=", ParameterType.RequestBody); client.CookieContainer = new System.Net.CookieContainer(); IRestResponse response = client.Execute(request); if (!response.IsSuccessful) return null; return client.CookieContainer; } private string GetCISList(string cISURL, CookieContainer httpCookie) { var client = new RestClient(cISURL+ "参数"); var request = new RestRequest(Method.POST); client.CookieContainer = httpCookie; IRestResponse response = client.Execute(request); if (!response.IsSuccessful) return null; return response.Content; }
|