1. selenium을 사용하기 위하여 anaconda설치 및 prompt에서 selenium을 install함
2. webdriver를 설치 - chrome webdriver를 설치 (webdriver를 C:\PythonHome에 설치)
3. 아래 프로그램을 작동하고 동일제목.txt가 생성
from selenium import webdriver
import time
driver = webdriver.Chrome("c:/PythonHome/chromedriver.exe")
driver.implicitly_wait(3)
#창이 켜질 때가지 기다림<-인터넷 접속 속도/컴퓨터의 사양에 따라/3초/1초도 충분 테스트필요
name = __file__.split("\\")[-1][:-3]
search_address=f"http://sillok.history.go.kr/manInfo/branchList.do"
timesleep=0.5
for page in range(1,51):
try:
print("*"*20+"page"+"*"*20)
print(page)
print("*"*20+"page"+"*"*20)
url = search_address
xpath = f'//*[@id="cont_area"]/div/table[3]/tbody/tr[6]/th/a'
#크롬 f12(개발자도구) 주부식, f12, 엘리먼트옆 클릭하고 식생활 클릭, 오른마우스 copy xpath
driver.get(url)
time.sleep(timesleep)#크롤링 속도에 맞춰주기 위해
search_res = driver.find_element_by_xpath(xpath)
page_url=driver.find_element_by_xpath(xpath).get_attribute('href')
search_res.click()
########################################################
xpath_2 = f'//*[@id="cont_area"]/div[1]/ul[2]/li[{page}]/dl/dt/a'
# //*[@id="cont_area"]/div[1]/ul[2]/li[50]/dl/dt/a
# //*[@id="cont_area"]/div[1]/ul[2]/li[1]/dl/dt/a
# 식생활내에 첫번째줄만 클릭해서 들어간 상태
search_res = driver.find_element_by_xpath(xpath_2)
page_url=driver.find_element_by_xpath(xpath_2).get_attribute('href')
search_res.click()
title1=driver.find_element_by_css_selector('#cont_area > div.cont_in_left.cont_full > div.page_tit.clear2.pl_20 > h3').text
text1 = driver.find_element_by_css_selector('#cont_area > div.cont_in_left.cont_full > div.ins_view_wrap.clear2 > div.ins_view.ins_view_left.w100_w0 > div > div > p').text
print(title1)
with open(f"{name}.txt",'a',encoding='utf-8') as file:
file.write(title1)
file.write(",\n")
file.write(text1)
file.write(",\n")
except:
print(f"error:{page}")
break
driver.quit()