Introduction

Data Masking and Redaction

Data masking protects sensitive information by replacing it with realistic but fictional data. Unlike encryption, masked data is permanently de-identified — it cannot be reversed to recover the original value. Organizations use masking for development, testing, analytics, and compliance with privacy regulations like GDPR and CCPA.

Static Data Masking

Static masking creates a sanitized copy of a production database for non-production use.

import hashlib

import random

import string

class StaticDataMasker:

def init(self, seed=42):

self.seed = seed

self.rng = random.Random(seed)

def mask_email(self, email):

"""Generate a consistent fake email from the real one."""

local, domain = email.split('@')

hash_obj = hashlib.sha256(email.encode())

fake_local = hash_obj.hexdigest()[:12]

return f"{fake_local}@masked-domain.com"

def mask_phone(self, phone):

"""Mask phone number, keeping format but replacing digits."""

masked = []

for char in phone:

if char.isdigit():

masked.append(str(self.rng.randint(0, 9)))

else:

masked.append(char)

return ''.join(masked)

def mask_credit_card(self, cc_number):

"""Mask all but last 4 digits."""

clean = cc_number.replace(' ', '').replace('-', '')

if len(clean) >= 4:

masked = '*' * (len(clean) - 4) + clean[-4:]

else:

masked = clean

return masked

def mask_name(self, name):

"""Replace name with a fake name."""

first_names = ['John', 'Jane', 'Alex', 'Sarah', 'Michael']

last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones']

parts = name.split()

if len(parts) >= 2:

return f"{self.rng.choice(first_names)} {self.rng.choice(last_names)}"

return self.rng.choice(first_names)

SQL-Based Static Masking

\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\-- PostgreSQL static masking

CREATE TABLE users_masked AS

SELECT

id,

md5(email) || '@masked.com' AS email,

'- -' || RIGHT(phone, 4) AS phone,

CASE WHEN position(' ' IN full_name) > 0 THEN

'User ' || id::text

ELSE

full_name

END AS full_name,

encode(sha256(ssn::bytea), 'hex') AS ssn

FROM users_production;

\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\-- Consistent masking across tables

UPDATE customers SET

email = 'customer_' || id || '@example.com',

phone = CONCAT('555-', LPAD((id % 10000)::text, 4, '0')),

credit_card = CONCAT('XXXX-XXXX-XXXX-', RIGHT(credit_card, 4));

Dynamic Data Masking

Dynamic masking applies real-time transformations to query results without modifying the underlying data.

PostgreSQL Dynamic Masking

\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\-- Create a masked view

CREATE VIEW users_redacted AS

SELECT

id,

CASE

WHEN current_user = 'admin' THEN email

ELSE '@.com'

END AS email,

CASE

WHEN current_user = 'admin' THEN phone

ELSE regexp_replace(phone, '\d(?=\d{4})', '*', 'g')

END AS phone,

CASE

WHEN current_user IN ('admin', 'support') THEN full_name

ELSE CONCAT(LEFT(full_name, 1), '***')

END AS full_name

FROM users;

\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\-- Grant access to masked view

GRANT SELECT ON users_redacted TO app_user;

GRANT SELECT ON users_redacted TO support_agent;

Application-Level Dynamic Masking

from functools import wraps

def mask_sensitive_fields(fields_to_mask):

"""Decorator to dynamically mask sensitive fields in API responses."""

def decorator(func):

@wraps(func)

def wrapper(args, *kwargs):

result = func(args, *kwargs)

if isinstance(result, dict):

for field in fields_to_mask:

if field in result:

value = result[field]

if field in ('email', 'email_address'):

local, domain = value.split('@')

result[field] = f"{local[0]}***@{domain}"

elif field in ('phone', 'phone_number'):

result[field] = f"- -{value[-4:]}"

elif field in ('ssn', 'social_security'):

result[field] = f"*- -{value[-4:]}"

elif 'card' in field.lower():

result[field] = f"__-_- _**-{value[-4:]}"

return result

return wrapper

return decorator

@mask_sensitive_fields(['email', 'phone', 'credit_card'])

def get_user_profile(user_id):

Returns full data; masking applied by decorator

return {

'user_id': user_id,

'email': 'john.doe@example.com',

'phone': '555-123-4567',

'credit_card': '4111-1111-1111-1111',

}

Tokenization

Tokenization replaces sensitive data with non-sensitive placeholders (tokens) while storing the mapping in a secure vault.

class TokenizationService:

def init(self, vault_client):

self.vault = vault_client

self.token_prefix = "tok_"

def tokenize(self, sensitive_value, context):

"""Replace sensitive value with a token."""

Generate unique token

token_id = secrets.token_hex(16)

token = f"{self.token_prefix}{token_id}"

Store mapping in secure vault

self.vault.store(

f"tokens/{token}",

{

'value': sensitive_value,

'context': context,

'created_at': datetime.utcnow().isoformat(),

'access_count': 0

}

)

return token

def detokenize(self, token, requester_role):

"""Retrieve original value from token (if authorized)."""

if not token.startswith(self.token_prefix):

raise ValueError("Invalid token format")

if requester_role not in ['admin', 'auditor', 'compliance']:

raise PermissionError("Not authorized to detokenize")

record = self.vault.retrieve(f"tokens/{token}")

Increment access counter

record['access_count'] += 1

record['last_accessed'] = datetime.utcnow().isoformat()

self.vault.store(f"tokens/{token}", record)

Log access

self._audit_log('detokenize', token, requester_role)

return record['value']

GDPR Compliance

from datetime import datetime, timedelta

class GDPRDataProcessor:

RETENTION_PERIODS = {

'user_profile': timedelta(days=365),

'transaction_log': timedelta(days=730),

'session_log': timedelta(days=90),

'analytics': timedelta(days=180),

}

def mask_for_export(self, user_data):

"""GDPR Article 20: data portability with masking."""

masked = {

'basic_info': {

'email': user_data['email'],

'username': user_data['username']

},

'transactions': [

{

'date': t['date'],

'amount': t['amount'],

'reference': f"REF-{hashlib.sha256(t['reference'].encode()).hexdigest()[:8]}"

}

for t in user_data.get('transactions', [])

],

'communications': [

{

'date': c['date'],

'type': c['type'],

'content_snippet': c['content'][:100] if c.get('content') else None

}

for c in user_data.get('communications', [])

]

}

return masked

def delete_user_data(self, user_id, databases):

"""GDPR Article 17: right to erasure."""

deletion_log = []

for db in databases:

try:

db.delete_user(user_id)

deletion_log.append({

'database': db.name,

'status': 'deleted',

'timestamp': datetime.utcnow().isoformat()

})

except Exception as e:

deletion_log.append({

'database': db.name,

'status': 'failed',

'error': str(e)

})

return deletion_log

Conclusion

Data masking is essential for privacy compliance and reducing the risk of data exposure. Use static masking for non-production environments, dynamic masking for real-time access control, and tokenization for scenarios requiring reversible de-identification. Always log access to sensitive data, enforce role-based masking policies, and ensure masking is consistent across all data stores and applications.