query.py

import os
import httpx
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

consumer_key = os.getenv('CONSUMER_KEY')
consumer_secret = os.getenv('CONSUMER_SECRET')
username = os.getenv('SFDC_USERNAME')
password = os.getenv('SFDC_PASSWORD')
# domain = 'login' # test
domain = "<your org's custom domain>" # if you are using a custom domain

auth_url = f'https://{domain}/services/oauth2/token'
auth_data = {
    'grant_type': 'password',
    'client_id': consumer_key,
    'client_secret': consumer_secret,
    'username': username,
    'password': password
}
auth_response = httpx.post(auth_url, data=auth_data)
print(auth_response)
access_token = auth_response.json().get('access_token')
instance_url = auth_response.json().get('instance_url')

if not access_token:
    raise Exception("Authentication Failed")

headers = {
    'Authorization': f'Bearer {access_token}',
    'Content-Type': 'application/json'
}

soql_query = 'SELECT id, name, stagename, account.id, account.name, LastModifiedBy.name, LastModifiedBy.Profile.name, LastModifiedBy.Profile.UserType FROM opportunity'
query_endpoint = f'{instance_url}/services/data/v60.0/query/'  # Replace 'vXX.0' with your Salesforce API version
records = []
while query_endpoint:
    response = httpx.get(query_endpoint, headers=headers, params={'q': soql_query})
    if response.status_code != 200:
        raise Exception(f"Request failed with status code {response.status_code}")
    
    response_json = response.json().get('records', [])
    records.extend(response_json)
    
    # Check for nextRecordsUrl for pagination
    query_endpoint = response.json().get('nextRecordsUrl')
    if query_endpoint:
        query_endpoint = f'{instance_url}{query_endpoint}'

print(len(records))

df = pd.json_normalize(records)
df2 = df[df.columns.drop(list(df.filter(regex='attributes')))]
df.to_excel('Salesforce report.xlsx', index=False)