Check out Kite (free AI Coding Assistant) → Link



Buy Me a Coffee? Your support is much appreciated!

PayPal Me: https://www.paypal.me/jiejenn/5
Venmo: @Jie-Jenn

Source Code: 

import time
import datetime
import requests
from bs4 import BeautifulSoup
import pandas as pd

def main(query_url):
    try:
        headers= {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:87.0) Gecko/20100101 Firefox/87.0',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'Cache-Control': 'max-age=0'
        }
        base_url = query_url

        response = requests.get(base_url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        total_results = int(soup.find('span', 'totalcount').text)
        total_pages = (total_results // 120) + 1

        search_results = []

        for i in range(0, total_pages):
            params = {
                's': i*120        
            }
            print('Processing Page {0}'.format(i+1))

            response = requests.get(base_url, headers=headers, params=params)
            soup = BeautifulSoup(response.content, 'html.parser')
            
            results = soup.find('ul', {'id': 'search-results'})
            result_rows = results.find_all('li', 'result-row')

            for result_row in result_rows:
                post_datetime = result_row.time['datetime']
                post_id = result_row.h3.a['data-id']
                post_url = result_row.h3.a['href']
                price = result_row.find('span', 'result-price').text
                location = result_row.find('span', 'result-hood').text if result_row.find('span', 'result-hood') else ''
                post_title = result_row.h3.a.text
                search_results.append([
                    post_datetime, post_id, post_url, price, location, post_title
                ])
            time.sleep(1)

        columns = ('Post Date', 'Post Id', 'Post Url', 'Price', 'Location', 'Post title')
        df = pd.DataFrame(search_results, columns=columns)
        timestamp = datetime.datetime.now().strftime('%m_%d_%y %H%M%S')
        df.to_csv(f'Craigslist Results ({timestamp}).csv', index=False)
        print('File Exported')
    except Exception as e:
        print(e)

main('https://sfbay.craigslist.org/d/for-sale/search/sfc/sss?query=playstation%205')