1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | import time import sys from collections.abc import MutableMapping as MappingMixin # noqa from collections.abc import Sequence # noqa import pymssql from selenium import webdriver from selenium.webdriver.common.keys import Keys tagName = "태그이름" url = "https://www.instagram.com/explore/tags/" + tagName + "/" #크롬 드라이버 위치 DRIVER_DIR = 'C:/Users/User/AppData/Local/Programs/Python/Python37-32/Lib/site-packages/chromedriver.exe' cOptions = webdriver.ChromeOptions() cOptions.add_argument('headless') cOptions.add_argument('window-size=1920x1080') cOptions.add_argument("disable-images") driver = webdriver.Chrome(DRIVER_DIR,options=cOptions) driver.implicitly_wait(5) driver.get(url) driver2 = webdriver.Chrome(DRIVER_DIR,options=cOptions) elem = driver.find_element_by_tag_name("body") non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd) url_list = [] tag_list = [] pagedowns = 1 total = 0 conn = pymssql.connect(server='ip address', user='db id', password='db password', database='db name') try: #총 반복 횟수 지정(사진1개당 1, 사진30개면 30 #headless 지우고 한 페이지 지나면 다음 페이지로 넘어가는지 확인 #while total < 100: while True : url_list = [] urlCountFlag = 1 elem.send_keys(Keys.PAGE_DOWN) time.sleep(1) img = driver.find_elements_by_css_selector('div._bz0w > a') for i in img: url_list.append(i.get_attribute('href')) url_list = list(set(url_list)) for i in url_list : driver2.get(i) tagList = driver2.find_elements_by_xpath("//meta[@property='instapp:hashtags']") for i in tagList : tag_list.append((i.get_attribute("content")).translate(non_bmp_map)) if len(tag_list) == 0 or ''.join(tag_list) == "" or url_list[urlCountFlag] == "" : time.sleep(0.2) continue print(url_list[urlCountFlag],len(tag_list),sep= ' ------- ') cur = conn.cursor() cur.execute( " MERGE TagData AS A" " USING (SELECT %s AS URL) AS B" " ON A.URL = B.URL" " WHEN NOT MATCHED THEN" " INSERT (URL,Tags,BaseTag) VALUES (%s,%s,%s);" , ( url_list[urlCountFlag], url_list[urlCountFlag], ' | '.join(tag_list), tagName ) ) conn.commit() total = total + 1 time.sleep(0.3) tag_list = [] if len(url_list) == urlCountFlag : break urlCountFlag = urlCountFlag + 1 #tag_list = list(set(tag_list)) conn.close() driver2.close() driver2.quit() driver.close() driver.quit() except Exception as e: print(e) #tag_list = list(set(tag_list)) conn.close() driver2.close() driver2.quit() driver.close() driver.quit() | cs |
'Python' 카테고리의 다른 글
Python 특정 사이트에서 이미지 다운로드 받기 (0) | 2021.02.23 |
---|