Server/Python

λͺ¨κ°μ½” 파이썬 크둀링 13일차

thals0 2022. 4. 8. 16:46
728x90

#13. 동적 크둀링 β‘’

πŸ“Œ κ΅¬ν˜„ν•  μ†ŒμŠ€μ½”λ“œ

2개의 μ†ŒμŠ€μ½”λ“œλ₯Ό κ΅¬ν˜„ν•¨

βœ… 첫 번째 μ†ŒμŠ€μ½”λ“œ

  • μ˜λ‹¨μ–΄ λ²ˆμ—­ μ—¬λŸ¬ 번 μ‹€ν–‰
  • λ²ˆμ—­ κ²°κ³Όλ₯Ό λͺ¨λ‘ 'my_papago.csv' νŒŒμΌμ— μ €μž₯

 

βœ… 두 번째 μ†ŒμŠ€μ½”λ“œ

  • μ˜λ‹¨μ–΄ λ²ˆμ—­ μ—¬λŸ¬ 번 μ‹€ν–‰
  • 'my_papago.csv' νŒŒμΌμ— μžˆλŠ” μ˜λ‹¨μ–΄μΌ 경우, μ €μž₯ν•˜μ§€ μ•ŠμŒ
  • 'my_papago.csv' νŒŒμΌμ— μ—†λŠ” μ˜λ‹¨μ–΄λŠ” λ²ˆμ—­ κ²°κ³Ό μ €μž₯

 

πŸ“Œ μ²«λ²ˆμ§Έ 파이썬 μ½”λ“œ μž‘μ„±

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time
import csv

chrome_driver = ChromeDriverManager().install()
service = Service(chrome_driver)
driver = webdriver.Chrome(service=service)

papago_url = "https://papago.naver.com/"
driver.get(papago_url)

time.sleep(3)

f = open("./my_papago.csv", "w", newline="", encoding="utf-8-sig")

wtr = csv.writer(f)
wtr.writerow(["μ˜λ‹¨μ–΄", "λ²ˆμ—­κ²°κ³Ό"])

while True:
    keyword = input("λ²ˆμ—­ν•  μ˜λ‹¨μ–΄ μž…λ ₯ (0 μž…λ ₯ν•˜λ©΄ μ’…λ£Œ) : ")
    if keyword == "0":
        print("λ²ˆμ—­ μ’…λ£Œ")
        break

    form = driver.find_element(By.CSS_SELECTOR, "textarea#txtSource")
    form.send_keys(keyword)

    button = driver.find_element(By.CSS_SELECTOR, "button#btnTranslate")
    button.click()
    time.sleep(1)

    output = driver.find_element(By.CSS_SELECTOR, "div#txtTarget").text

    wtr.writerow([keyword, output])

    driver.find_element(By.CSS_SELECTOR, "textarea#txtSource").clear()
    

driver.close()

f.close()

 

πŸ‘‰ μ‹€ν–‰ κ²°κ³Ό

 

 

πŸ“Œ λ‘λ²ˆμ§Έ 파이썬 μ½”λ“œ μž‘μ„±

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time
import csv

chrome_driver = ChromeDriverManager().install()
service = Service(chrome_driver)
driver = webdriver.Chrome(service=service)

papago_url = "https://papago.naver.com/"
driver.get(papago_url)

time.sleep(3)

f = open("./my_papago.csv", "r", encoding="utf-8-sig")
rdr = csv.reader(f)
next(rdr)

my_dict = {}
for row in rdr:
    keyword = row[0]
    korean = row[1]
    my_dict[keyword] = korean

f.close()

f = open("./my_papago.csv", "a", newline="", encoding="utf-8-sig")
wtr = csv.writer(f)

# whileλ¬Έ μ•ˆμ— μžˆλŠ” 쑰건문을 ν™•μΈν•΄μ£Όμ„Έμš”
while True:
    keyword = input("λ²ˆμ—­ν•  μ˜λ‹¨μ–΄ μž…λ ₯ (0 μž…λ ₯ν•˜λ©΄ μ’…λ£Œ) : ")
    if keyword == "0":
        print("λ²ˆμ—­ μ’…λ£Œ")
        break

    # μ˜λ‹¨μ–΄κ°€ 'my_dict'의 ν‚€ κ°’ 쀑에 μžˆλ‹€λ©΄, 이 사싀을 μ•Œλ €μ£Όκ³  μ €μž₯λ˜μ–΄μžˆλ˜ λ²ˆμ—­ κ²°κ³Ό 좜λ ₯
    if keyword in my_dict.keys():
        print("이미 λ²ˆμ—­ν•œ μ˜λ‹¨μ–΄μž…λ‹ˆλ‹€! λœ»μ€", my_dict[keyword], "μž…λ‹ˆλ‹€.")
    # μœ„μ˜ κ²½μš°μ— ν¬ν•¨λ˜μ§€ μ•ŠμœΌλ©΄, λ”•μ…”λ„ˆλ¦¬μ™€ CSV νŒŒμΌμ— μΆ”κ°€
    else:
        driver.find_element(By.CSS_SELECTOR, "textarea#txtSource").send_keys(keyword)
        driver.find_element(By.CSS_SELECTOR, "button#btnTranslate").click()
        time.sleep(1)

        output = driver.find_element(By.CSS_SELECTOR, "div#txtTarget").text

        # CSV νŒŒμΌμ— ν–‰ μΆ”κ°€
        wtr.writerow([keyword, output])

        # λ”•μ…”λ„ˆλ¦¬μ— μΆ”κ°€
        my_dict[keyword] = output

        driver.find_element(By.CSS_SELECTOR, "textarea#txtSource").clear()

driver.close()
f.close()

 

πŸ‘‰ μ‹€ν–‰ κ²°κ³Ό

 

 

 

 

 

 

β€‹β­μ •λ¦¬ν•˜κΈ°β­

β€‹βœ” 라이브러리 μ€€λΉ„

  • 동적크둀링을 μœ„ν•΄ selenium, time
  • csv νŒŒμΌμ„ 닀루기 μœ„ν•΄ csv

βœ” 첫번째 μ†ŒμŠ€μ½”λ“œ

  • csv νŒŒμΌμƒμ„±
  • writer 객체 μ‚¬μš©
  • csv 파일의 μ—΄ 제λͺ© μž‘μ„±
  • 반볡문으둜 λ²ˆμ—­ μ—¬λŸ¬λ²ˆ μ‹€ν–‰ 및 csv νŒŒμΌμ— μ €μž₯

βœ” λ‘λ²ˆμ§Έ μ†ŒμŠ€μ½”λ“œ

  • csv νŒŒμΌμ— μ €μž₯된 κ°’ 뢈러였기
  • 뢈러온 데이터λ₯Ό λ”•μ…”λ„ˆλ¦¬λ‘œ μ €μž₯
  • νŒŒμΌλ‹«κ³  λ‹€μ‹œ μ—΄κΈ°
  • csv 파일의 μ—΄ 제λͺ© μž‘μ„±
  • 반볡문으둜 λ²ˆμ—­ μ—¬λŸ¬λ²ˆ μ‹€ν–‰ 및 csv νŒŒμΌμ— μΆ”κ°€
728x90