Do not use without written authorization. Data exposure testing involves capturing and analyzing potentially sensitive personal data.
requests and json librariesCompare documented API responses with actual responses:
import requests
import json
BASE_URL = "https://target-api.example.com/api/v1"
headers = {"Authorization": "Bearer <user_token>", "Content-Type": "application/json"}
# Fetch a resource and analyze all returned fields
endpoints_to_test = [
("GET", "/users/me", None),
("GET", "/users/me/orders", None),
("GET", "/products", None),
("GET", "/users/me/settings", None),
("GET", "/transactions", None),
]
for method, path, body in endpoints_to_test:
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
if resp.status_code == 200:
data = resp.json()
# Recursively extract all field names
def extract_fields(obj, prefix=""):
fields = []
if isinstance(obj, dict):
for k, v in obj.items():
full_key = f"{prefix}.{k}" if prefix else k
fields.append(full_key)
fields.extend(extract_fields(v, full_key))
elif isinstance(obj, list) and obj:
fields.extend(extract_fields(obj[0], f"{prefix}[]"))
return fields
all_fields = extract_fields(data)
print(f"\n{method} {path} - {len(all_fields)} fields returned:")
for f in sorted(all_fields):
print(f" {f}")
Scan API responses for sensitive data patterns:
import re
SENSITIVE_PATTERNS = {
"email": r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
"phone": r'(\+?1?\s?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4})',
"ssn": r'\b\d{3}-\d{2}-\d{4}\b',
"credit_card": r'\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})\b',
"password_hash": r'\$2[aby]?\$\d{2}\$[./A-Za-z0-9]{53}',
"api_key": r'(?:api[_-]?key|apikey)["\s:=]+["\']?([a-zA-Z0-9_\-]{20,})',
"internal_ip": r'\b(?:10\.\d{1,3}|172\.(?:1[6-9]|2\d|3[01])|192\.168)\.\d{1,3}\.\d{1,3}\b',
"aws_key": r'AKIA[0-9A-Z]{16}',
"jwt_token": r'eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+',
"uuid": r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
}
SENSITIVE_FIELD_NAMES = [
"password", "password_hash", "secret", "token", "ssn", "social_security",
"credit_card", "card_number", "cvv", "pin", "private_key", "api_key",
"internal_id", "debug", "trace", "stack_trace", "created_by_ip",
"last_login_ip", "salt", "session_id", "refresh_token", "mfa_secret",
"date_of_birth", "bank_account", "routing_number", "tax_id"
]
def scan_response(endpoint, response_text):
findings = []
# Check for sensitive data patterns in values
for pattern_name, pattern in SENSITIVE_PATTERNS.items():
matches = re.findall(pattern, response_text)
if matches:
findings.append({
"endpoint": endpoint,
"type": "sensitive_value",
"pattern": pattern_name,
"count": len(matches),
"sample": matches[0][:20] + "..." if len(matches[0]) > 20 else matches[0]
})
# Check for sensitive field names
response_lower = response_text.lower()
for field in SENSITIVE_FIELD_NAMES:
if f'"{field}"' in response_lower or f"'{field}'" in response_lower:
findings.append({
"endpoint": endpoint,
"type": "sensitive_field",
"field_name": field
})
return findings
# Scan all endpoint responses
for method, path, body in endpoints_to_test:
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
if resp.status_code == 200:
findings = scan_response(f"{method} {path}", resp.text)
for f in findings:
print(f"[FINDING] {f['endpoint']}: {f['type']} - {f.get('pattern', f.get('field_name'))}")
# Fields the UI shows (observed from the frontend application)
ui_displayed_fields = {
"/users/me": {"name", "email", "avatar_url", "role"},
"/users/me/orders": {"order_id", "date", "status", "total"},
"/products": {"id", "name", "price", "image_url", "description"},
}
# Fields the API actually returns
for method, path, body in endpoints_to_test:
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
if resp.status_code == 200:
data = resp.json()
if isinstance(data, list):
actual_fields = set(data[0].keys()) if data else set()
elif isinstance(data, dict):
# Handle paginated responses
items_key = next((k for k in data if isinstance(data[k], list)), None)
if items_key and data[items_key]:
actual_fields = set(data[items_key][0].keys())
else:
actual_fields = set(data.keys())
else:
continue
expected = ui_displayed_fields.get(path, set())
excess = actual_fields - expected
if excess:
print(f"\n{method} {path} - EXCESS FIELDS (not shown in UI):")
for field in sorted(excess):
print(f" - {field}")
# Many APIs embed full user objects in responses for orders, comments, etc.
endpoints_with_user_objects = [
"/orders", # Each order may include full seller/buyer profile
"/comments", # Comments may include full author profile
"/reviews", # Reviews may expose reviewer details
"/transactions", # Transactions may include counterparty info
"/team/members", # Team listing may expose excessive member data
]
for path in endpoints_with_user_objects:
resp = requests.get(f"{BASE_URL}{path}", headers=headers)
if resp.status_code == 200:
text = resp.text
# Check for user data leakage in nested objects
user_fields_found = []
for field in ["password_hash", "last_login_ip", "mfa_enabled", "phone_number",
"date_of_birth", "ssn", "internal_notes", "salary", "address"]:
if f'"{field}"' in text:
user_fields_found.append(field)
if user_fields_found:
print(f"[EXCESSIVE] {path} exposes user fields: {user_fields_found}")
# GraphQL allows clients to request any available field
GRAPHQL_URL = f"{BASE_URL}/graphql"
# Introspection query to discover all fields on User type
introspection = {
"query": """
{
__type(name: "User") {
fields {
name
type {
name
kind
}
}
}
}
"""
}
resp = requests.post(GRAPHQL_URL, headers=headers, json=introspection)
if resp.status_code == 200:
fields = resp.json().get("data", {}).get("__type", {}).get("fields", [])
print("Available User fields via GraphQL:")
for f in fields:
sensitivity = "SENSITIVE" if f["name"] in SENSITIVE_FIELD_NAMES else "normal"
print(f" {f['name']} ({f['type']['name']}) [{sensitivity}]")
# Try to query sensitive fields
sensitive_query = {
"query": """
query {
users {
id
email
passwordHash
socialSecurityNumber
internalNotes
lastLoginIp
mfaSecret
apiKey
}
}
"""
}
resp = requests.post(GRAPHQL_URL, headers=headers, json=sensitive_query)
if resp.status_code == 200 and "errors" not in resp.json():
print("[CRITICAL] GraphQL exposes sensitive user fields without restriction")
# Test for debug information in responses
debug_headers_to_check = [
"X-Debug-Token", "X-Debug-Info", "Server", "X-Powered-By",
"X-Request-Id", "X-Correlation-Id", "X-Backend-Server",
"X-Runtime", "X-Version", "X-Build-Version"
]
resp = requests.get(f"{BASE_URL}/users/me", headers=headers)
for h in debug_headers_to_check:
if h.lower() in {k.lower(): v for k, v in resp.headers.items()}:
print(f"[INFO LEAK] Header {h}: {resp.headers.get(h)}")
# Test error responses for stack traces
error_payloads = [
("GET", "/users/invalid-id-format", None),
("POST", "/orders", {"invalid": "payload"}),
("GET", "/users/-1", None),
("GET", "/users/0", None),
]
for method, path, body in error_payloads:
resp = requests.request(method, f"{BASE_URL}{path}", headers=headers, json=body)
if resp.status_code >= 400:
text = resp.text.lower()
if any(kw in text for kw in ["stack trace", "traceback", "at com.", "at org.",
"file \"", "line ", "exception", "sql", "query"]):
print(f"[DEBUG LEAK] {method} {path} -> {resp.status_code}: Contains stack trace or query info")
| Term | Definition |
|---|---|
| Excessive Data Exposure | API returns more data fields than the client needs, relying on frontend filtering to hide sensitive information from users |
| Over-Fetching | Requesting or receiving more data than needed for a specific operation, common in REST APIs that return fixed response schemas |
| Response Filtering | Client-side filtering of API response data to display only relevant fields, which provides zero security since the full response is interceptable |
| Object Property Level Authorization | OWASP API3:2023 - ensuring that users can only read/write object properties they are authorized to access |
| PII Leakage | Unintended exposure of Personally Identifiable Information in API responses including names, emails, addresses, SSNs, or financial data |
| Schema Validation | Enforcing that API responses conform to a defined schema, stripping unauthorized fields before transmission |
Context: A mobile banking application's API returns full account objects to the mobile client, which only displays account nickname and balance. The API is accessed by both iOS and Android apps and a web portal.
Approach:
GET /api/v1/accounts response: UI shows 4 fields but API returns 23 fieldsrouting_number, account_holder_ssn_last4, internal_risk_score, kyc_verification_status, and linked_external_accounts - none shown in UIGET /api/v1/transactions response: API returns merchant_id, terminal_id, authorization_code, processor_response fields not needed by the clientGET /api/v1/users/me: API returns last_login_ip, mfa_backup_codes_remaining, account_officer_name, and credit_score_band
POST /api/v1/transfers with invalid payload returns SQL table name in error messagePitfalls:
## Finding: Excessive Data Exposure in Account and Transaction APIs
**ID**: API-DATA-001
**Severity**: High (CVSS 7.1)
**OWASP API**: API3:2023 - Broken Object Property Level Authorization
**Affected Endpoints**:
- GET /api/v1/accounts
- GET /api/v1/transactions
- GET /api/v1/users/me
**Description**:
The API returns full database objects to the client, including sensitive fields
that are not displayed in the mobile application UI. The mobile app filters
these fields client-side, but they are fully accessible by intercepting the
API response. This exposes SSN fragments, internal risk scores, and KYC
verification data for any authenticated user.
**Excess Fields Discovered**:
- /accounts: routing_number, account_holder_ssn_last4, internal_risk_score,
kyc_verification_status, linked_external_accounts (18 excess fields total)
- /transactions: merchant_id, terminal_id, authorization_code,
processor_response (12 excess fields total)
- /users/me: last_login_ip, mfa_backup_codes_remaining, credit_score_band
**Impact**:
An authenticated user can extract sensitive financial data, internal risk
assessments, and PII for their own account that the application is not
intended to reveal. Combined with BOLA vulnerabilities, this data could
be extracted for all users.
**Remediation**:
1. Implement server-side response filtering using DTOs/view models that only include fields needed by the client
2. Use GraphQL field-level authorization or REST response schemas per endpoint per role
3. Remove sensitive fields from API responses at the serialization layer
4. Implement response schema validation in the API gateway to strip undocumented fields
5. Add automated tests that verify response schemas match documentation