File tree Expand file tree Collapse file tree 1 file changed +49
-0
lines changed Expand file tree Collapse file tree 1 file changed +49
-0
lines changed Original file line number Diff line number Diff line change
1
+ import random
2
+ import time
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from fake_useragent import UserAgent
6
+
7
+ # 初始化 fake_useragent
8
+ ua = UserAgent (platforms = 'pc' )
9
+
10
+ # 設定 base_url 和查詢參數
11
+ base_url = "https://www.104.com.tw/jobs/search/"
12
+ params = {
13
+ 'keyword' : 'python' ,
14
+ 'page' : 1
15
+ }
16
+
17
+ # 用來儲存所有工作的 URL
18
+ job_urls = []
19
+
20
+ # 爬取前 150 頁
21
+ for page in range (1 , 151 ):
22
+ print (f"正在抓取第 { page } 頁..." )
23
+ params ['page' ] = page
24
+
25
+ # 建立隨機的 User-Agent
26
+ headers = {
27
+ 'User-Agent' : ua .random
28
+ }
29
+
30
+ # 發送 GET 請求
31
+ response = requests .get (base_url , headers = headers , params = params )
32
+ soup = BeautifulSoup (response .text , 'lxml' )
33
+
34
+ # 找到所有的工作列表項目
35
+ job_items = soup .find_all ('article' , class_ = 'js-job-item' )
36
+
37
+ # For Loop 每個工作項目,提取工作 URL
38
+ for job in job_items :
39
+ job_link = job .find ('a' , class_ = 'js-job-link' )
40
+ if job_link :
41
+ job_url = job_link ['href' ]
42
+ # 104 的 URL 需要補全
43
+ full_job_url = "https:" + job_url
44
+ job_urls .append (full_job_url )
45
+
46
+ # 隨機等待 5 到 10 秒
47
+ sleep_time = random .uniform (5 , 10 )
48
+ print (f"等待 { sleep_time :.2f} 秒..." )
49
+ time .sleep (sleep_time )
You can’t perform that action at this time.
0 commit comments