1. selenium을 사용하기 위하여 anaconda설치  및 prompt에서 selenium을 install함

2. webdriver를 설치 - chrome webdriver를 설치 (webdriver를 C:\PythonHome에 설치)

3. 아래 프로그램을 작동하고 동일제목.txt가 생성

 

 

 

from selenium import webdriver

import time

 

driver = webdriver.Chrome("c:/PythonHome/chromedriver.exe")

driver.implicitly_wait(3)  

#창이 켜질 때가지 기다림<-인터넷 접속 속도/컴퓨터의 사양에 따라/3초/1초도 충분 테스트필요

 

name = __file__.split("\\")[-1][:-3]

search_address=f"http://sillok.history.go.kr/manInfo/branchList.do"

 

timesleep=0.5



for page in range(1,51):

try:

 

print("*"*20+"page"+"*"*20)

print(page)

print("*"*20+"page"+"*"*20)

url = search_address

xpath = f'//*[@id="cont_area"]/div/table[3]/tbody/tr[6]/th/a'

#크롬 f12(개발자도구) 주부식, f12, 엘리먼트옆 클릭하고 식생활 클릭, 오른마우스 copy xpath

 

driver.get(url)

time.sleep(timesleep)#크롤링 속도에 맞춰주기 위해

 

search_res = driver.find_element_by_xpath(xpath)

page_url=driver.find_element_by_xpath(xpath).get_attribute('href')

 

search_res.click()

 

########################################################

 

xpath_2 = f'//*[@id="cont_area"]/div[1]/ul[2]/li[{page}]/dl/dt/a'

# //*[@id="cont_area"]/div[1]/ul[2]/li[50]/dl/dt/a

# //*[@id="cont_area"]/div[1]/ul[2]/li[1]/dl/dt/a

# 식생활내에 첫번째줄만 클릭해서 들어간 상태



search_res = driver.find_element_by_xpath(xpath_2)

page_url=driver.find_element_by_xpath(xpath_2).get_attribute('href')

 

search_res.click()

 

title1=driver.find_element_by_css_selector('#cont_area > div.cont_in_left.cont_full > div.page_tit.clear2.pl_20 > h3').text

text1 = driver.find_element_by_css_selector('#cont_area > div.cont_in_left.cont_full > div.ins_view_wrap.clear2 > div.ins_view.ins_view_left.w100_w0 > div > div > p').text

 

print(title1)

 

with open(f"{name}.txt",'a',encoding='utf-8') as file:

file.write(title1)

file.write(",\n")

file.write(text1)

file.write(",\n")

except:

print(f"error:{page}")

break

driver.quit()

+ Recent posts