Collectives™ on Stack Overflow
Find centralized, trusted content and collaborate around the technologies you use most.
Learn more about Collectives
Teams
Q&A for work
Connect and share knowledge within a single location that is structured and easy to search.
Learn more about Teams
Ask Question
chrome_driver = Service(ChromeDriverManager().install())
options = Options()
options.add_experimental_option('detach',True)
options.add_experimental_option('excludeSwitches',['enable-logging'])
# options.add_argument('--headless')
# options.add_argument('--window-size = x, y')
# options.add_argument('--start-maximazed')
# options.add_argument('--start-fullscreen')
# options.add_argument('--mute-audio')
self.driver = webdriver.Chrome(options=options,service=chrome_driver)
self.now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S (%a)')
self.hour = datetime.datetime.now().strftime('%H시_%M_분')
self.today = date.today()
self.folder = None
self.today_file = None
self.kakao_talk = kakao()
def connect(self):
url = 'http://minishop.gmarket.co.kr/meritblog'
# url = 'http://minishop.gmarket.co.kr/hanvitis'
self.driver.get(url)
return url
def shopping_mall(self):
mall_name = self.driver.find_element(By.CSS_SELECTOR,'a.shop_title_ui_txt').text
self.folder = f'./메리트몰_데이터베이스/지마켓'
self.today_file = f'{self.today}_{mall_name}_지마켓.json'
return mall_name
def soup(self,url_param):
response = requests.get(url_param)
if response.status_code == 200:
sp = BeautifulSoup(response.text, 'html.parser')
return sp
except requests.packages.urllib3.exceptions.MaxRetryError as e:
print(str(e))
def total_product(self):
total_items = 0
products = self.driver.find_element(By.ID,'ulCategory').find_elements(By.CSS_SELECTOR,'span.data_num')
for product in products:
items = int(product.text.replace('(',"").replace(')',""))
total_items += items
# 391개
return total_items
def paging(self,total_items,url):
page_list = []
# 전체상품보기 클릭
self.driver.execute_script('arguments[0].click();',self.driver.find_element(By.CSS_SELECTOR,'.allList_view > a'))
time.sleep(2)
# 한 페이지의 상품 수
view_limit = int(self.driver.find_element(By.CSS_SELECTOR,'div.limit').text.replace("개씩",""))
# 페이지 수 구하기
if total_items % view_limit == 0:
page = total_items // view_limit
else:
page = total_items // view_limit + 1
# 페이지 리스트
for cnt in range(page):
page_url = f'{url}/List?CategoryType=General&SortType=FocusRank&DisplayType=SmallImage&Page={cnt+1}&PageSize=60'
page_list.append(page_url)
# self.driver.quit()
return page_list
def data_one(self,page_list):
"""상품 url 리스트
정상가/할인가/할인율 딕셔너리"""
url_list = []
price_dic = {}
for page in page_list:
html = self.soup(page)
for items in html.find('ul',class_='type2').find_all('li'):
# url
item_url = items.find('a')['href']
# 상품코드
item_code = item_url[-10:]
# 가격 및 할인율
if items.find('p',class_='prd_price').find('span',class_='del_important'):
original_price = items.find('p',class_='prd_price').find('span',class_='del_important').text.replace("원","")
discount_price = items.find('p',class_='prd_price').find('strong').text.replace("원","")
sale_rate = items.find('p',class_='prd_price').find('span',class_='splt_ico usr_clr').text
else:
original_price = items.find('p',class_='prd_price').find('strong').text.replace("원","")
discount_price = "없음"
sale_rate = "없음"
url_list.append(item_url)
price_dic[item_code]={"정상가":original_price,"할인가":discount_price,"할인율":sale_rate}
time.sleep(randint(1,10))
self.driver.quit()
return url_list , price_dic
def check_start(self):
url = self.connect()
mall_name = self.shopping_mall()
total_items = self.total_product()
page_list = self.paging(total_items,url)
url_list,price_dic = self.data_one(page_list)
if __name__ == "__main__":
g_market = gmarket_sales()
# g_market.check_start()
schedule.every().hour.at(":20").do(g_market.check_start)
while True:
schedule.run_pending()
time.sleep(1)
Hello, I am a student practicing web page crawling.
I'm making a code that scrapes data by parsing a website with selenium.
I wrote the program so that it runs at regular intervals using the final schedule module.
However, if chrome_driver is initialized in the init of the class and the driver.quit() command is put in the execution process,
the MaxRetryError(_pool, url, error or ResponseError(cause)) urllib3.exceptions.MaxRetryError
code is displayed when the second code is executed..
Below is the code I wrote.
I would really appreciate it if you could point out any problems.
Thanks for contributing an answer to Stack Overflow!
-
Please be sure to
answer the question
. Provide details and share your research!
But
avoid
…
-
Asking for help, clarification, or responding to other answers.
-
Making statements based on opinion; back them up with references or personal experience.
To learn more, see our
tips on writing great answers
.