Skip to content

Commit b9a230f

Browse files
authored
Create code1.py
1 parent da1264b commit b9a230f

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

dev/104/code1.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import random
2+
import time
3+
import requests
4+
from bs4 import BeautifulSoup
5+
from fake_useragent import UserAgent
6+
7+
# 初始化 fake_useragent
8+
ua = UserAgent(platforms='pc')
9+
10+
# 設定 base_url 和查詢參數
11+
base_url = "https://www.104.com.tw/jobs/search/"
12+
params = {
13+
'keyword': 'python',
14+
'page': 1
15+
}
16+
17+
# 用來儲存所有工作的 URL
18+
job_urls = []
19+
20+
# 爬取前 150 頁
21+
for page in range(1, 151):
22+
print(f"正在抓取第 {page} 頁...")
23+
params['page'] = page
24+
25+
# 建立隨機的 User-Agent
26+
headers = {
27+
'User-Agent': ua.random
28+
}
29+
30+
# 發送 GET 請求
31+
response = requests.get(base_url, headers=headers, params=params)
32+
soup = BeautifulSoup(response.text, 'lxml')
33+
34+
# 找到所有的工作列表項目
35+
job_items = soup.find_all('article', class_='js-job-item')
36+
37+
# For Loop 每個工作項目,提取工作 URL
38+
for job in job_items:
39+
job_link = job.find('a', class_='js-job-link')
40+
if job_link:
41+
job_url = job_link['href']
42+
# 104 的 URL 需要補全
43+
full_job_url = "https:" + job_url
44+
job_urls.append(full_job_url)
45+
46+
# 隨機等待 5 到 10 秒
47+
sleep_time = random.uniform(5, 10)
48+
print(f"等待 {sleep_time:.2f} 秒...")
49+
time.sleep(sleep_time)

0 commit comments

Comments
 (0)