|
| 1 | +import requests |
| 2 | +import pandas as pd |
| 3 | +import random |
| 4 | +import time |
| 5 | +from fake_useragent import UserAgent |
| 6 | + |
| 7 | +# 將 JSON 資料轉換為結構化字典 |
| 8 | +def convert_job_data(original_dict): |
| 9 | + data = original_dict['data'] |
| 10 | + |
| 11 | + # 將 jobType 轉換為描述文字 |
| 12 | + job_type_mapping = { |
| 13 | + 0: '全部', |
| 14 | + 1: '全職', |
| 15 | + 2: '兼職', |
| 16 | + 3: '高薪', |
| 17 | + 4: '派遣' |
| 18 | + } |
| 19 | + |
| 20 | + # 將 remoteWork 轉換為描述文字 |
| 21 | + remote_work_mapping = { |
| 22 | + 1: '完全遠端', |
| 23 | + 2: '部分遠端' |
| 24 | + } |
| 25 | + |
| 26 | + # 建立包含工作資訊的字典 |
| 27 | + job_info = { |
| 28 | + '職缺名稱': data['header']['jobName'], |
| 29 | + '公司名稱': data['header']['custName'], |
| 30 | + '公司網址': data['header']['custUrl'], |
| 31 | + '發佈日期': data['header']['appearDate'], |
| 32 | + '職缺分析網址': 'https:' + data['header']['analysisUrl'], |
| 33 | + '上班地區': data['jobDetail']['addressRegion'], |
| 34 | + '上班地點': data['jobDetail']['addressDetail'], |
| 35 | + '工作待遇': data['jobDetail']['salary'], |
| 36 | + '最低薪資': data['jobDetail']['salaryMin'], |
| 37 | + '最高薪資': data['jobDetail']['salaryMax'], |
| 38 | + '工作性質': job_type_mapping.get(data['jobDetail']['jobType'], '未知'), |
| 39 | + '上班時段': data['jobDetail']['workPeriod'], |
| 40 | + '假期政策': data['jobDetail']['vacationPolicy'], |
| 41 | + '工作經歷': data['condition']['workExp'], |
| 42 | + '學歷要求': data['condition']['edu'], |
| 43 | + '擅長工具': [specialty['description'] for specialty in data['condition']['specialty']], |
| 44 | + '工作技能': [skill['description'] for skill in data['condition']['skill']], |
| 45 | + '產業類別': data['industry'], |
| 46 | + '職務類別': [category['description'] for category in data.get('jobDetail', {}).get('jobCategory', [])], |
| 47 | + '出差外派': data['jobDetail']['businessTrip'], |
| 48 | + '遠端工作': remote_work_mapping.get((data['jobDetail'].get('remoteWork') or {}).get('type', 0), '無'), |
| 49 | + '公司人數': '' if data.get('employees') == '暫不提供' else data.get('employees', '').replace('人', ''), |
| 50 | + '管理責任': data['jobDetail']['manageResp'] |
| 51 | + } |
| 52 | + return job_info |
| 53 | + |
| 54 | +# 單獨抓取某一職缺的詳細資料 |
| 55 | +def fetch_job_detail(job_id): |
| 56 | + |
| 57 | + try: |
| 58 | + ua = UserAgent(platforms='pc') |
| 59 | + |
| 60 | + url = f'https://www.104.com.tw/job/ajax/content/{job_id}' |
| 61 | + headers = { |
| 62 | + 'User-Agent': ua.random, |
| 63 | + 'Referer': f'https://www.104.com.tw/job/{job_id}' |
| 64 | + } |
| 65 | + |
| 66 | + response = requests.get(url, headers=headers) |
| 67 | + response.raise_for_status() # 檢查 HTTP 回應狀態 |
| 68 | + |
| 69 | + data = response.json() |
| 70 | + job_info = convert_job_data(data) |
| 71 | + job_info['連結'] = f'https://www.104.com.tw/job/{job_id}' |
| 72 | + |
| 73 | + return job_info |
| 74 | + |
| 75 | + except Exception as e: |
| 76 | + print(f"處理職缺 {job_id} 時出錯: {e}") |
| 77 | + return None |
0 commit comments