sleep
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import as_completed
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
base_url = 'https://auto.ria.com/search/?indexName=auto,order_auto,newauto_search&price.currency=&abroad.not=0&custom.not=1&page=0&size=100'
driver.get(base_url)
main_page = driver.page_source
soup = BeautifulSoup(main_page, 'lxml')
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
sleep(3)
pages = driver.find_element_by_xpath('//*[@id="searchPagination"]/div/nav/span[9]').text.replace(' ', '')
links = []
liks_otzv = []
def parse_1():
for i in range(10):
if i == 0:
continue
url = 'https://auto.ria.com/search/?indexName=auto,order_auto,newauto_search&price.currency=&abroad.not=0&custom.not=1&page=' + str(
i) + '&size=100'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
items = soup.findAll('section', class_='ticket-item')
for item in items:
link = item.find('a').get('href')
if link == 'javascript:void(0)':
continue
links.append(link)
print(len(items))
print(str(i))
parse_1()
def square(link):
r = requests.get(link)
soup = BeautifulSoup(r.content, 'lxml')
print(link)
try:
link_otzv = soup.find('div', class_='allotted i-block').find('a').get('href')
except:
try:
link_otzv = 'https://auto.ria.com' + soup.find('div', class_='testimonials_full_wrap').find('a').get('href')
except:
link_otzv = ''
liks_otzv.append(link_otzv)
if name == 'main':
with ProcessPoolExecutor(max_workers=3) as executor:
results = executor.map(square, links)
print(links_otzv)
Удали и зайди нормально (pastebin)
Обсуждают сегодня