파이썬 코딩

2024.12.07. [파이썬] 네이버 지도 크롤링

37song 2025. 1. 15. 19:42

https://cafe.naver.com/startcodingofficial/1458

 

끝이라 생각하고 답글 쓴지.. 2시간 반만에 다시 올립니다!!🫡

끝났다 생각하고 10시 반쯤 ???? ㅋㅋㅋㅋㅋㅋ 6시간만에 겨우 만들었습니다 !!!!!!! 하고 답글을 달았는데요, ㅋㅋㅋㅋㅋㅋㅋ 후.. 별점 없을때의 except 코드가 ...

cafe.naver.com

 

GUI를 제외하고,
오류도 수두룩하게 많아
가장 많이 생각하고, 고민해야했던 네이버지도 크롤링

작업 시간 : 약 8시간 🤪

search = input('검색어')
count = int(input('몇 개 추출?')) + 1

url = f'https://map.naver.com/p/search/{search}'

driver = webdriver.Chrome()
driver.maximize_window()
driver.get(url)
time.sleep(1)

result = []

try:    # 별점 있을때 !!!
    #  기본 프레임 이동했다가~
    driver.switch_to.default_content()
    time.sleep(1)
    # iframe 변경 (A 라는 영역?)
    driver.switch_to.frame('searchIframe')
    time.sleep(2)

    # 별점 있는지 확인 > 없으면 except로 넘어가기
    driver.find_element(By.CSS_SELECTOR, 'span.h69bs.orXYY')

    # 광고 개수 확인
    ad_count = len(driver.find_elements(By.CSS_SELECTOR, '#_pcmap_list_scroll_container > ul > li.UEzoS.rTjJo.cZnHG'))
    new_count = ad_count + count
    
    for i in range (1, new_count):
        #  기본 프레임 이동했다가~
        driver.switch_to.default_content()
        time.sleep(1)

        # iframe 변경 (A 라는 영역?)
        driver.switch_to.frame('searchIframe')
        time.sleep(2)
        
        try:
            # i 번째 업체명 클릭 및 추출
            element= driver.find_element(By.CSS_SELECTOR, f'#_pcmap_list_scroll_container > ul > li.UEzoS.rTjJo:not(.cZnHG):nth-of-type({i}) > div:nth-of-type(1) > a > div > div')
            element.click()
            driver.execute_script("arguments[0].scrollIntoView();", element) #스크롤
            com = driver.find_element(By.CSS_SELECTOR, f'#_pcmap_list_scroll_container > ul > li:nth-of-type({i}) div.place_bluelink > span:nth-of-type(1)').text
            time.sleep(1)

            #  기본 프레임 이동했다가~
            driver.switch_to.default_content()
            time.sleep(1)

            # iframe 변경 (B 라는 영역?)
            driver.switch_to.frame('entryIframe')
            time.sleep(2)

            # 정보 추출
            try:
                sector = driver.find_element(By.CSS_SELECTOR, '#_title > div > span.lnJFt').text
            except:
                sector = ''
            try:
                addr = driver.find_element(By.CSS_SELECTOR, 'div.place_section_content .LDgIH').text
            except:
                addr = ''
            try :
                phone = driver.find_element(By.CSS_SELECTOR, 'div.place_section_content .xlx7Q').text
            except:
                phone = ''
            try:
                homepage = driver.find_element(By.CSS_SELECTOR, 'div.place_section_content div.jO09N > a').get_attribute('href')
            except:
                homepage =''
        except:
            pass
        finally:
            result.append([com, sector, addr, phone, homepage])
            time.sleep(1)

except:    # 별점 없을때 !!!
    #  기본 프레임 이동했다가~
    driver.switch_to.default_content()
    time.sleep(1)
    # iframe 변경 (A 라는 영역?)
    driver.switch_to.frame('searchIframe')
    time.sleep(2)

    # 5개 설정
    for i in range (1, count):
        #  기본 프레임 이동했다가~
        driver.switch_to.default_content()
        time.sleep(1)

        # iframe 변경 (A 라는 영역?)
        driver.switch_to.frame('searchIframe')
        time.sleep(2)

        # i 번째 업체명 클릭 및 업체명 추출
        element = driver.find_element(By.CSS_SELECTOR, f'#_pcmap_list_scroll_container > ul > li:nth-of-type({i}) .place_bluelink')
        element.click()
        driver.execute_script("arguments[0].scrollIntoView();", element) #스크롤
        com =  driver.find_element(By.CSS_SELECTOR, f'#_pcmap_list_scroll_container > ul > li:nth-of-type({i}) .place_bluelink > span').text
        sector = driver.find_element(By.CSS_SELECTOR, f'#_pcmap_list_scroll_container > ul > li:nth-of-type({i}) div.place_bluelink > span.YzBgS').text

        # 주소 추출
        driver.find_element(By.CSS_SELECTOR, f'#_pcmap_list_scroll_container > ul > li:nth-of-type({i}) span._44_8').click()
        addr = driver.find_element(By.CSS_SELECTOR, f'.zZfO1').text.split('도로명')[1].split('복사')[0]
        driver.find_element(By.CSS_SELECTOR, f'#_pcmap_list_scroll_container > ul > li:nth-of-type({i}) span._44_8').click()
        time.sleep(2)

        #  기본 프레임 이동했다가~
        driver.switch_to.default_content()
        time.sleep(1)

        # 다시 iframe 영역 (B 라는 영역?)
        driver.switch_to.frame('entryIframe')
        time.sleep(1)

        # 업체 정보 추출
        try:
            phone = driver.find_element(By.CSS_SELECTOR, '.O8qbU.nbXkr > div > span.xlx7Q').text
        except :
            try :
                phone = driver.find_element(By.CSS_SELECTOR, 'span.txt_tel').text
            except:
                try :
                    phone = driver.find_element(By.CSS_SELECTOR, 'div.FEtx4c2oAJQ4yTWSktTy > span:nth-of-type(2)').text
                except :
                    phone =''
        try:
            homepage = driver.find_element(By.CSS_SELECTOR, 'div.O8qbU.yIPfO > div > div.jO09N > a').get_attribute('href')
        except:
            homepage = ''
        result.append([com, sector, addr, phone, homepage])
        time.sleep(1)

df = pd.DataFrame(result, columns=['업체명','업종','주소','전화번호','홈페이지'])
df.to_excel(f'{search}.xlsx')