diff --git a/services/apps/data_sink_worker/src/bin/erase-member.ts b/services/apps/data_sink_worker/src/bin/erase-member.ts index 9a6a734483..c476a54bec 100644 --- a/services/apps/data_sink_worker/src/bin/erase-member.ts +++ b/services/apps/data_sink_worker/src/bin/erase-member.ts @@ -1,36 +1,87 @@ -import fs from 'fs' -import path from 'path' +import * as readline from 'readline' -import { generateUUIDv1 } from '@crowd/common' import { SearchSyncWorkerEmitter } from '@crowd/common_services' import { DbStore, getDbConnection } from '@crowd/data-access-layer/src/database' import { getServiceChildLogger } from '@crowd/logging' import { QueueFactory } from '@crowd/queue' -import { MemberIdentityType } from '@crowd/types' import { DB_CONFIG, QUEUE_CONFIG } from '../conf' +/** + * Member Data Erasure Script (Database) + * + * This script completely removes a member and all associated data from the database + * for GDPR compliance and data deletion requests. It performs a comprehensive cleanup + * across multiple related tables while respecting foreign key constraints. + * + * WHAT THIS SCRIPT DOES: + * 1. Shows a detailed summary of all data to be deleted/modified + * 2. Requests user confirmation before proceeding + * 3. Performs the following operations in order: + * - Archives member identities to requestedForErasureMemberIdentities (separate step) + * - Deletes from maintainersInternal (respects FK constraint with memberIdentities) + * - Deletes from all member-related tables (relations, segments, etc.) + * - Deletes the main member record + * - Triggers search index updates and organization re-sync + * + * FOREIGN KEY HANDLING: + * - maintainersInternal.identityId → memberIdentities.id + * Solution: Delete maintainersInternal records first before memberIdentities + * + * TABLES AFFECTED: + * - maintainersInternal (deleted by identityId from member's identities) + * - requestedForErasureMemberIdentities (memberIdentities are inserted here before deletion) + * - activityRelations, memberNoMerge, memberOrganizationAffiliationOverrides + * - memberOrganizations, memberSegmentAffiliations, memberSegments, memberSegmentsAgg + * - memberEnrichmentCache, memberEnrichments, memberIdentities + * - memberToMerge, memberToMergeRaw, memberBotSuggestions, memberNoBot + * - members (main record) + * + * SEARCH INDEX UPDATES: + * - Removes member from search indexes + * - Re-syncs any affected organizations + * + * USAGE: + * npm run script erase-member + * + * SAFETY FEATURES: + * - Shows detailed deletion summary before proceeding + * - Requires explicit user confirmation (Y/n) + * - Runs in a database transaction for atomicity + * - Comprehensive error handling and logging + */ + /* eslint-disable @typescript-eslint/no-explicit-any */ const log = getServiceChildLogger('erase-member') +/** + * Prompts the user for Y/n confirmation via command line input + */ +async function promptConfirmation(message: string): Promise { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + + return new Promise((resolve) => { + rl.question(`${message} (Y/n): `, (answer) => { + rl.close() + resolve(answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes' || answer === '') + }) + }) +} + const processArguments = process.argv.slice(2) -if (processArguments.length === 0 || processArguments.length % 2 !== 0) { - log.error( - ` - Expected argument in pairs which can be any of the following: - - ids ", , ..." - - email john@doe.com - - name "John Doe" - - (e.g. lfid someusername) - `, - ) +if (processArguments.length !== 1) { + log.error('Expected exactly one argument: memberId') process.exit(1) } +const memberId = processArguments[0] + setImmediate(async () => { - const manualCheckFile = `manual_check_member_ids.txt` const dbConnection = await getDbConnection(DB_CONFIG()) const store = new DbStore(log, dbConnection) const queueClient = QueueFactory.createQueueService(QUEUE_CONFIG()) @@ -38,268 +89,201 @@ setImmediate(async () => { const searchSyncWorkerEmitter = new SearchSyncWorkerEmitter(queueClient, log) await searchSyncWorkerEmitter.init() - const pairs = [] - for (let i = 0; i < processArguments.length; i += 2) { - pairs.push({ - type: processArguments[i], - value: processArguments[i + 1], - }) - } - - log.info( - `Erasing member based on input data: [${pairs - .map((p) => `${p.type} "${p.value}"`) - .join(', ')}]`, - ) - - const idParams = pairs.filter((p) => p.type === 'ids') - const idsToDelete: string[] = [] - for (const param of idParams) { - idsToDelete.push(...param.value.split(',').map((id) => id.trim())) - } + log.info(`Erasing member with ID: ${memberId}`) const orgDataMap: Map = new Map() const memberDataMap: Map = new Map() - if (idsToDelete.length > 0) { - for (const memberId of idsToDelete) { - try { - await store.transactionally(async (t) => { - // get organization id for a member to sync later - let orgResults: any[] - if (orgDataMap.has(memberId)) { - orgResults = orgDataMap.get(memberId) - } else { - orgResults = await store - .connection() - .any( - `select distinct "organizationId" from "activityRelations" where "memberId" = $(memberId)`, - { - memberId, - }, - ) - orgDataMap.set(memberId, orgResults) - } - - let memberData: any - if (memberDataMap.has(memberId)) { - memberData = memberDataMap.get(memberId) - } else { - memberData = await store - .connection() - .one(`select * from members where id = $(memberId)`, { - memberId, - }) - memberDataMap.set(memberId, memberData) - } + try { + // Show deletion summary and get confirmation + const summary = await getDeletionSummary(store, memberId) + console.log(summary) - log.info('CLEANUP ACTIVITIES...') + const proceed = await promptConfirmation('Do you want to proceed with the deletion?') - // delete the member and everything around it - await deleteMemberFromDb(t, memberId) + if (!proceed) { + log.info('Deletion cancelled by user') + process.exit(0) + } - await searchSyncWorkerEmitter.triggerRemoveMember(memberId, true) + await store.transactionally(async (t) => { + // get organization id for a member to sync later + let orgResults: any[] + if (orgDataMap.has(memberId)) { + orgResults = orgDataMap.get(memberId) + } else { + orgResults = await store + .connection() + .any( + `select distinct "organizationId" from "activityRelations" where "memberId" = $(memberId)`, + { + memberId, + }, + ) + orgDataMap.set(memberId, orgResults) + } - if (orgResults.length > 0) { - for (const orgResult of orgResults) { - if (orgResult.organizationId) { - await searchSyncWorkerEmitter.triggerOrganizationSync( - orgResult.organizationId, - true, - ) - } - } - } + let memberData: any + if (memberDataMap.has(memberId)) { + memberData = memberDataMap.get(memberId) + } else { + memberData = await store.connection().one(`select * from members where id = $(memberId)`, { + memberId, }) - } catch (err) { - log.error(err, { memberId }, 'Failed to erase member identity!') + memberDataMap.set(memberId, memberData) } - } - } else { - const nameIdentity = pairs.find((p) => p.type === 'name') - const otherIdentities = pairs.filter((p) => p.type !== 'name') - - if (otherIdentities.length > 0) { - const conditions: string[] = [] - const params: any = {} - let index = 0 - for (const pair of otherIdentities) { - params[`value_${index}`] = pair.value - if (pair.type === 'email') { - conditions.push( - `(type = '${MemberIdentityType.EMAIL}' and lower(value) = lower($(value_${index})))`, - ) - } else { - params[`platform_${index}`] = (pair.type as string).toLowerCase() - conditions.push( - `(platform = $(platform_${index}) and lower(value) = lower($(value_${index})))`, - ) - } - index++ - } + log.info('CLEANUP ACTIVITIES...') + + // Archive member identities before deletion + await archiveMemberIdentities(t, memberId) + + // delete the member and everything around it + await deleteMemberFromDb(t, memberId) - const query = `select * from "memberIdentities" where ${conditions.join(' or ')}` - const existingIdentities = await store.connection().any(query, params) - - if (existingIdentities.length > 0) { - log.info(`Found ${existingIdentities.length} existing identities.`) - - const deletedMemberIds = [] - - for (const eIdentity of existingIdentities) { - try { - await store.transactionally(async (t) => { - // get organization id for a member to sync later - let orgResults: any[] - if (orgDataMap.has(eIdentity.memberId)) { - orgResults = orgDataMap.get(eIdentity.memberId) - } else { - orgResults = await store - .connection() - .any( - `select distinct "organizationId" from "activityRelations" where "memberId" = $(memberId)`, - { - memberId: eIdentity.memberId, - }, - ) - orgDataMap.set(eIdentity.memberId, orgResults) - } - - let memberData: any - if (memberDataMap.has(eIdentity.memberId)) { - memberData = memberDataMap.get(eIdentity.memberId) - } else { - memberData = await store - .connection() - .one(`select * from members where id = $(memberId)`, { - memberId: eIdentity.memberId, - }) - memberDataMap.set(eIdentity.memberId, memberData) - } - - // mark identity for erasure - await markIdentityForErasure(t, eIdentity.platform, eIdentity.type, eIdentity.value) - - if (!deletedMemberIds.includes(eIdentity.memberId)) { - if (eIdentity.verified) { - log.info({ tenantId: memberData.tenantId }, 'CLEANUP ACTIVITIES...') - - // delete the member and everything around it - await deleteMemberFromDb(t, eIdentity.memberId) - - // track so we don't delete the same member twice - deletedMemberIds.push(eIdentity.memberId) - - await searchSyncWorkerEmitter.triggerRemoveMember(eIdentity.memberId, true) - } else { - // just delete the identity - await deleteMemberIdentity( - t, - eIdentity.memberId, - eIdentity.platform, - eIdentity.type, - eIdentity.value, - ) - await searchSyncWorkerEmitter.triggerMemberSync(eIdentity.memberId, true) - } - - if (orgResults.length > 0) { - for (const orgResult of orgResults) { - if (orgResult.organizationId) { - await searchSyncWorkerEmitter.triggerOrganizationSync( - orgResult.organizationId, - true, - ) - } - } - } - } - }) - } catch (err) { - log.error(err, { eIdentity }, 'Failed to erase member identity!') + await searchSyncWorkerEmitter.triggerRemoveMember(memberId, true) + + if (orgResults.length > 0) { + for (const orgResult of orgResults) { + if (orgResult.organizationId) { + await searchSyncWorkerEmitter.triggerOrganizationSync(orgResult.organizationId, true) } } } - } + }) + } catch (err) { + log.error(err, { memberId }, 'Failed to erase member!') + } - if (nameIdentity) { - const results = await store - .connection() - .any(`select id from members where lower("displayName") = lower($(name))`, { - name: nameIdentity.value.trim(), - }) + process.exit(0) +}) + +/** + * Generates a comprehensive summary of all data that will be deleted or modified + * for the specified member. Queries each table to provide exact record counts. + * + * @param store - Database store instance + * @param memberId - The member ID to analyze + * @returns Formatted summary string showing what will be affected + */ +async function getDeletionSummary(store: DbStore, memberId: string): Promise { + let summary = `\n=== DELETION SUMMARY FOR MEMBER ${memberId} ===\n` + + // Count activities that will be updated (objectMemberId set to null) + const activityRelationsUpdate = await store + .connection() + .one(`select count(*) as count from "activityRelations" where "objectMemberId" = $(memberId)`, { + memberId, + }) + if (parseInt(activityRelationsUpdate.count) > 0) { + summary += `- ${activityRelationsUpdate.count} activityRelations will have objectMemberId/objectMemberUsername cleared\n` + } + + // Count maintainersInternal records to be deleted + const maintainersCount = await store.connection().one( + `select count(*) as count from "maintainersInternal" where "identityId" in ( + select id from "memberIdentities" where "memberId" = $(memberId) + )`, + { memberId }, + ) + if (parseInt(maintainersCount.count) > 0) { + summary += `- ${maintainersCount.count} maintainersInternal records will be deleted\n` + } + + // Count records in each table to be deleted + const tablesToDelete: Map = new Map([ + ['activityRelations', ['memberId']], + ['memberNoMerge', ['memberId', 'noMergeId']], + ['memberOrganizationAffiliationOverrides', ['memberId']], + ['memberOrganizations', ['memberId']], + ['memberSegmentAffiliations', ['memberId']], + ['memberSegments', ['memberId']], + ['memberSegmentsAgg', ['memberId']], + ['memberEnrichmentCache', ['memberId']], + ['memberEnrichments', ['memberId']], + ['memberIdentities', ['memberId']], + ['memberToMerge', ['memberId', 'toMergeId']], + ['memberToMergeRaw', ['memberId', 'toMergeId']], + ['memberBotSuggestions', ['memberId']], + ['memberNoBot', ['memberId']], + ]) - if (results.length > 0) { - addLinesToFile(manualCheckFile, [ - `name: ${nameIdentity.value}, member ids: [${results.map((r) => r.id).join(', ')}]`, - ]) - log.warn( - `Found ${results.length} members with name: ${ - nameIdentity.value - }! Manual check required for member ids: [${results.map((r) => r.id).join(', ')}]!`, - ) + for (const [table, memberIdColumns] of tablesToDelete) { + const condition = memberIdColumns.map((c) => `"${c}" = $(memberId)`).join(' or ') + const result = await store + .connection() + .one(`select count(*) as count from "${table}" where ${condition}`, { memberId }) + if (parseInt(result.count) > 0) { + if (table === 'memberIdentities') { + summary += `- ${result.count} records from ${table} (will be inserted into requestedForErasureMemberIdentities first)\n` + } else { + summary += `- ${result.count} records from ${table}\n` } } } - process.exit(0) -}) + // Count main member record + const memberExists = await store + .connection() + .one(`select count(*) as count from members where id = $(memberId)`, { memberId }) + if (parseInt(memberExists.count) > 0) { + summary += `- 1 member record\n` + } -async function markIdentityForErasure( - store: DbStore, - platform: string, - type: string, - value: string, -): Promise { - await store.connection().none( - ` - insert into "requestedForErasureMemberIdentities" (id, platform, type, value) - values ($(id), $(platform), $(type), $(value)) - `, - { - id: generateUUIDv1(), - platform, - type, - value, - }, - ) + summary += `\n` + return summary } -async function deleteMemberIdentity( - store: DbStore, - memberId: string, - platform: string, - type: string, - value: string, -): Promise { - const result = await store.connection().result( - `delete from "memberIdentities" where - "memberId" = $(memberId) and - platform = $(platform) and - type = $(type) and - value = $(value)`, - { - memberId, - platform, - type, - value, - }, +/** + * Archives member identities to requestedForErasureMemberIdentities table before deletion. + * This preserves identity data for audit/compliance purposes while allowing for GDPR deletion. + * + * @param store - Database store instance (should be within a transaction) + * @param memberId - The member ID whose identities will be archived + * @returns Number of identities archived + */ +export async function archiveMemberIdentities(store: DbStore, memberId: string): Promise { + const insertResult = await store.connection().result( + ` + INSERT INTO "requestedForErasureMemberIdentities" ( + id, platform, value, type + ) + SELECT id, platform, value, type + FROM "memberIdentities" + WHERE "memberId" = $(memberId) + `, + { memberId }, ) - if (result.rowCount === 0) { - throw new Error( - `Failed to delete member identity - memberId ${memberId}, platform: ${platform}, type: ${type}, value: ${value}!`, + if (insertResult.rowCount > 0) { + log.info( + `Archived ${insertResult.rowCount} memberIdentities to requestedForErasureMemberIdentities for member ${memberId}`, ) } + + return insertResult.rowCount } +/** + * Performs the actual deletion of a member and all associated data from the database. + * This function handles the complex deletion order required by foreign key constraints. + * + * DELETION ORDER: + * 1. Clear activityRelations.objectMemberId references (update, not delete) + * 2. Delete maintainersInternal records (by identityId from memberIdentities) + * 3. Delete from all member-related tables (including memberIdentities) + * 4. Delete the main member record + * + * @param store - Database store instance (should be within a transaction) + * @param memberId - The member ID to delete + */ export async function deleteMemberFromDb(store: DbStore, memberId: string): Promise { let result = await store.connection().result( ` - update activities set + update "activityRelations" set "objectMemberId" = null, - "objectMemberUsername" = null + "objectMemberUsername" = null, + "updatedAt" = now() where "objectMemberId" is not null and "objectMemberId" = $(memberId) `, { @@ -309,21 +293,52 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom if (result.rowCount > 0) { log.info( - `Cleared ${result.rowCount} activities."objectMemberId" and activities."objectMemberUsername" for memberId ${memberId}!`, + `Cleared ${result.rowCount} activityRelations."objectMemberId" and activityRelations."objectMemberUsername" for memberId ${memberId}!`, + ) + } + + // Delete from maintainersInternal first (foreign key constraint with memberIdentities.id) + const maintainersQuery = `delete from "maintainersInternal" where "identityId" in ( + select id from "memberIdentities" where "memberId" = '${memberId}' + )` + console.log(`\n=== ABOUT TO DELETE FROM MAINTAINERSINTERNAL ===`) + console.log(`Query: ${maintainersQuery}`) + const proceedMaintainers = await promptConfirmation( + 'Proceed with deleting from maintainersInternal?', + ) + if (!proceedMaintainers) { + throw new Error('User cancelled deletion from maintainersInternal') + } + + result = await store.connection().result( + `delete from "maintainersInternal" where "identityId" in ( + select id from "memberIdentities" where "memberId" = $(memberId) + )`, + { memberId }, + ) + + if (result.rowCount > 0) { + log.info( + `Deleted ${result.rowCount} rows from table maintainersInternal for member ${memberId}!`, ) } const tablesToDelete: Map = new Map([ ['activities', ['memberId']], + ['activityRelations', ['memberId']], ['memberNoMerge', ['memberId', 'noMergeId']], + ['memberOrganizationAffiliationOverrides', ['memberId']], ['memberOrganizations', ['memberId']], + ['memberSegmentAffiliations', ['memberId']], ['memberSegments', ['memberId']], ['memberSegmentsAgg', ['memberId']], ['memberEnrichmentCache', ['memberId']], + ['memberEnrichments', ['memberId']], ['memberIdentities', ['memberId']], - ['memberSegmentAffiliations', ['memberId']], ['memberToMerge', ['memberId', 'toMergeId']], ['memberToMergeRaw', ['memberId', 'toMergeId']], + ['memberBotSuggestions', ['memberId']], + ['memberNoBot', ['memberId']], ]) for (const table of Array.from(tablesToDelete.keys())) { @@ -333,7 +348,17 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom if (memberIdColumns.length === 0) { throw new Error(`No fk columns specified for table ${table}!`) } + const condition = memberIdColumns.map((c) => `"${c}" = $(memberId)`).join(' or ') + const deleteQuery = `delete from "${table}" where ${condition.replace('$(memberId)', `'${memberId}'`)}` + console.log(`\n=== ABOUT TO DELETE FROM ${table.toUpperCase()} ===`) + console.log(`Query: ${deleteQuery}`) + const proceedTable = await promptConfirmation(`Proceed with deleting from ${table}?`) + if (!proceedTable) { + log.info(`Skipped deletion from ${table}`) + continue + } + result = await store .connection() .result(`delete from "${table}" where ${condition}`, { memberId }) @@ -343,6 +368,14 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom } } + const finalDeleteQuery = `delete from members where id = '${memberId}'` + console.log(`\n=== ABOUT TO DELETE MAIN MEMBER RECORD ===`) + console.log(`Query: ${finalDeleteQuery}`) + const proceedFinal = await promptConfirmation('Proceed with deleting the main member record?') + if (!proceedFinal) { + throw new Error('User cancelled deletion of main member record') + } + result = await store .connection() .result(`delete from members where id = $(memberId)`, { memberId }) @@ -351,24 +384,3 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom throw new Error(`Failed to delete member - memberId ${memberId}!`) } } - -function addLinesToFile(filePath: string, lines: string[]) { - try { - // Ensure the directory exists - fs.mkdirSync(path.dirname(filePath), { recursive: true }) - - // Check if the file exists - try { - fs.accessSync(filePath) - - // File exists, append lines - fs.appendFileSync(filePath, lines.join('\n') + '\n') - } catch (error) { - // File doesn't exist, create it and write lines - fs.writeFileSync(filePath, lines.join('\n') + '\n') - } - } catch (err) { - log.error(err, { filePath }, 'Error while writing to file!') - throw err - } -} diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts new file mode 100644 index 0000000000..b3cbb1f624 --- /dev/null +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -0,0 +1,251 @@ +import * as readline from 'readline' + +const TINYBIRD_API_URL = 'https://api.us-west-2.aws.tinybird.co/v0/datasources' +const DATA_SOURCES = ['activityRelations', 'members', 'maintainersInternal', 'memberIdentities'] + +/** + * Member Data Erasure Script (Tinybird Analytics Platform) + * + * This script removes member data from Tinybird datasources for GDPR compliance + * and data deletion requests. It complements the database deletion script by + * cleaning up analytical data stored in Tinybird. + * + * WHAT THIS SCRIPT DOES: + * 1. Shows a detailed summary of records to be deleted from each Tinybird datasource + * 2. Requests user confirmation before proceeding + * 3. Deletes data from Tinybird datasources in the correct order to respect dependencies + * 4. Handles special cases like maintainersInternal which requires identityId-based deletion + * + * TINYBIRD INTEGRATION: + * Uses "Delete Data Selectively" API from Tinybird: + * https://www.tinybird.co/docs/classic/get-data-in/data-operations/replace-and-delete-data#delete-data-selectively + * + * DATASOURCES AFFECTED (in deletion order): + * 1. activityRelations - Activity relationship records (deleted by memberId) + * 2. members - Member profile data (deleted by memberId) + * 3. maintainersInternal - Repository maintainer records (deleted by identityId from member's identities) + * 4. memberIdentities - Member identity records (deleted by memberId) + * + * FOREIGN KEY HANDLING: + * - maintainersInternal.identityId → memberIdentities.id + * Solution: Use subquery in delete condition - 'identityId IN (SELECT id FROM memberIdentities WHERE memberId = ?)' + * + * DOWNSTREAM EFFECTS: + * All datasources created from pipes based on these tables will reflect the deletions + * after the relevant copy pipes run (typically scheduled hourly). + * + * USAGE: + * npm run script erase-members-data-tinybird + * + * REQUIREMENTS: + * - Tinybird token must be provided as command line argument + * - Token must have delete permissions on the specified datasources + * + * SAFETY FEATURES: + * - Shows detailed deletion summary with record counts before proceeding + * - Requires explicit user confirmation (Y/n) + * - Graceful error handling for API failures + * - Special validation for maintainersInternal dependencies + */ + +const args = process.argv.slice(2) + +if (args.length !== 2) { + console.error('Usage: erase-members-data-tinybird.ts ') + process.exit(1) +} + +const memberId = args[0] +const TOKEN = args[1] + +/** + * Prompts the user for Y/n confirmation via command line input + */ +async function promptConfirmation(message: string): Promise { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + + return new Promise((resolve) => { + rl.question(`${message} (Y/n): `, (answer) => { + rl.close() + resolve(answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes' || answer === '') + }) + }) +} + +/** + * Queries Tinybird to get the count of records matching a condition in a specific datasource + * + * @param tableName - The Tinybird datasource name + * @param condition - SQL WHERE condition to count matching records + * @returns Number of matching records, or 0 if query fails + */ +async function getRecordCount(tableName: string, condition: string): Promise { + const query = `SELECT count() as count FROM ${tableName} WHERE ${condition} FORMAT JSON` + const url = `https://api.us-west-2.aws.tinybird.co/v0/sql` + + const response = await fetch(url, { + method: 'POST', + headers: { + Authorization: `Bearer ${TOKEN}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + q: query, + }), + }) + + if (!response.ok) { + const errorText = await response.text() + console.warn(`Failed to get count for ${tableName}: ${response.status} ${response.statusText}`) + console.warn(`Error response: ${errorText}`) + return 0 + } + + const data = (await response.json()) as { data?: Array<{ count: number }> } + return data.data?.[0]?.count || 0 +} + +/** + * Generates a comprehensive summary of all data that will be deleted from Tinybird + * datasources for the specified member. Queries each datasource to provide exact record counts. + * + * Handles special logic for maintainersInternal using a subquery to count records + * by identityId from the member's identities. + * + * @param memberId - The member ID to analyze + * @returns Formatted summary string showing what will be deleted from each datasource + */ +async function getTinybirdDeletionSummary(memberId: string): Promise { + let summary = `\n=== TINYBIRD DELETION SUMMARY FOR MEMBER ${memberId} ===\n` + + for (const table of DATA_SOURCES) { + let condition: string + + if (table === 'maintainersInternal') { + // Use subquery to count maintainersInternal records by identityId + condition = `identityId IN (SELECT id FROM memberIdentities WHERE memberId = '${memberId}')` + } else if (table === 'members') { + // Members table uses 'id' as the primary key, not 'memberId' + condition = `id = '${memberId}'` + } else { + condition = `memberId = '${memberId}'` + } + + console.log(`Checking ${table} with condition: ${condition}`) + const count = await getRecordCount(table, condition) + console.log(`${table}: ${count} records found (type: ${typeof count})`) + + if (count > 0) { + summary += `- ${count} records from ${table}\n` + } else { + console.log(`No records added to summary for ${table} - count was: ${count}`) + } + } + + summary += `\n` + return summary +} + +/** + * Deletes member data from a specific Tinybird datasource using the appropriate condition. + * + * For most datasources, deletes by memberId directly. + * For maintainersInternal, uses a subquery to delete by identityId from the member's identities. + * + * @param tableName - The Tinybird datasource name + * @param memberId - The member ID to delete data for + */ +async function deleteFromDataSource(tableName: string, memberId: string) { + const url = `${TINYBIRD_API_URL}/${tableName}/delete` + let deleteCondition: string + + if (tableName === 'maintainersInternal') { + // Delete maintainersInternal using subquery to get identityIds from memberIdentities + deleteCondition = `identityId IN (SELECT id FROM memberIdentities WHERE memberId = '${memberId}')` + } else if (tableName === 'members') { + // Members table uses 'id' as the primary key, not 'memberId' + deleteCondition = `id = '${memberId}'` + } else { + deleteCondition = `memberId = '${memberId}'` + } + + // Safety check: ensure delete condition is not empty and contains the memberId + if (!deleteCondition || !deleteCondition.includes(memberId)) { + throw new Error(`Invalid delete condition generated: ${deleteCondition}`) + } + + const body = new URLSearchParams({ + delete_condition: deleteCondition, + }) + + // Log the complete request details before execution + console.log(`\n=== ABOUT TO DELETE FROM ${tableName.toUpperCase()} ===`) + console.log(`URL: ${url}`) + console.log(`Method: POST`) + console.log(`Headers:`) + console.log( + ` Authorization: Bearer ${TOKEN.substring(0, 20)}...${TOKEN.substring(TOKEN.length - 10)}`, + ) + console.log(` Content-Type: application/x-www-form-urlencoded`) + console.log(`Body:`) + console.log(` delete_condition: ${deleteCondition}`) + console.log(`\nEquivalent curl command:`) + console.log(`curl -X POST \\`) + console.log(` -H "Authorization: Bearer ${TOKEN}" \\`) + console.log(` -H "Content-Type: application/x-www-form-urlencoded" \\`) + console.log(` --data-urlencode 'delete_condition=${deleteCondition}' \\`) + console.log(` "${url}"`) + + // Ask for final confirmation for this specific deletion + const proceed = await promptConfirmation(`\nProceed with deleting from ${tableName}?`) + if (!proceed) { + console.log(`Skipped deletion from ${tableName}`) + return + } + + const response = await fetch(url, { + method: 'POST', + headers: { + Authorization: `Bearer ${TOKEN}`, + 'Content-Type': 'application/x-www-form-urlencoded', + }, + body, + }) + + const text = await response.text() + if (!response.ok) { + console.error(`Failed to delete from ${tableName}:`, text) + throw new Error(`Delete failed for table ${tableName}`) + } + + console.log(`Deleted from ${tableName}:`, text) +} + +async function main() { + // Show deletion summary and get confirmation + const summary = await getTinybirdDeletionSummary(memberId) + console.log(summary) + + const proceed = await promptConfirmation('Do you want to proceed with the Tinybird deletion?') + + if (!proceed) { + console.log('Deletion cancelled by user') + process.exit(0) + } + + // Process in order to respect foreign key constraints - maintainersInternal before memberIdentities + const orderedTables = ['activityRelations', 'maintainersInternal', 'memberIdentities', 'members'] + + for (const table of orderedTables) { + try { + await deleteFromDataSource(table, memberId) + } catch (err) { + console.error(err) + } + } +} + +main() diff --git a/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts new file mode 100644 index 0000000000..4db9433984 --- /dev/null +++ b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts @@ -0,0 +1,235 @@ +import * as readline from 'readline' + +import { generateUUIDv1 } from '@crowd/common' +import { DbStore } from '@crowd/data-access-layer/src/database' +import { getServiceChildLogger } from '@crowd/logging' +import { getClientSQL } from '@crowd/questdb' + +/** + * Member Data Anonymization Script (QuestDB Analytics) + * + * This script performs anonymization and soft deletion of member data in QuestDB for GDPR compliance + * and data deletion requests. Unlike hard deletion scripts, this replaces identifying data with + * anonymous dummy values while preserving analytical structure and marking records as deleted. + * + * WHAT THIS SCRIPT DOES: + * 1. Shows a detailed summary of all data to be anonymized/soft-deleted in QuestDB + * 2. Requests user confirmation before proceeding + * 3. Performs the following operations: + * - Replaces memberId with random UUID and username with deleted-{UUID} + * - Replaces objectMemberId with random UUID and objectMemberUsername with deleted-{UUID} + * - Sets deletedAt timestamp on activities records matching memberId + * - Updates updatedAt timestamp to reflect the changes + * + * ANONYMIZATION APPROACH: + * - Records are not physically deleted, but marked with deletedAt timestamp + * - Identifying data (memberId, username) is replaced with anonymous dummy values + * - Uses random UUIDs to ensure no collision with real data + * - Different UUIDs are used for member vs objectMember references to prevent correlation + * - Preserves referential integrity and analytical structure + * - Queries with proper deletedAt filtering will exclude these records + * + * TABLES AFFECTED: + * - activities: Records where memberId matches are anonymized and marked deleted + * - activities: Records where objectMemberId matches have object references anonymized + * + * QUESTDB CONSIDERATIONS: + * - QuestDB uses update statements to modify existing records + * - deletedAt is set to current timestamp (NOW()) + * - updatedAt is also updated to reflect the modification time + * - Anonymized data uses format: memberId = random-uuid, username = deleted-{random-uuid} + * + * USAGE: + * npm run script erasure-members-data-questdb [--no-summary] + * + * REQUIREMENTS: + * - QuestDB database connection configured via CROWD_QUESTDB_SQL_* environment variables + * - Proper permissions to UPDATE activities table + * + * SAFETY FEATURES: + * - Shows detailed anonymization summary with record counts before proceeding + * - Requires explicit user confirmation (Y/n) + * - Comprehensive error handling and logging + * - Data is anonymized, not destroyed, allowing for analytical continuity + * + * RELATIONSHIP TO OTHER SCRIPTS: + * - Complements erase-member.ts (PostgreSQL hard deletion) + * - Complements erase-members-data-tinybird.ts (Tinybird hard deletion) + * - Can be run independently to anonymize QuestDB data while preserving analytics + */ + +/* eslint-disable @typescript-eslint/no-explicit-any */ + +const log = getServiceChildLogger('erasure-members-data-questdb') + +/** + * Prompts the user for Y/n confirmation via command line input + */ +async function promptConfirmation(message: string): Promise { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + + return new Promise((resolve) => { + rl.question(`${message} (Y/n): `, (answer) => { + rl.close() + resolve(answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes' || answer === '') + }) + }) +} + +/** + * Generates a comprehensive summary of all data that will be anonymized and soft-deleted + * in QuestDB for the specified member. Queries the activities table to provide exact counts. + * + * @param qdbStore - QuestDB store instance + * @param memberId - The member ID to analyze + * @returns Formatted summary string showing what will be anonymized + */ +async function getQuestDbDeletionSummary(qdbStore: DbStore, memberId: string): Promise { + let summary = `\n=== QUESTDB ANONYMIZATION SUMMARY FOR MEMBER ${memberId} ===\n` + + // Count activities that will be anonymized and marked as deleted (where memberId matches) + const activitiesToAnonymize = await qdbStore + .connection() + .one( + `SELECT count(*) as count FROM activities WHERE "memberId" = $(memberId) AND "deletedAt" IS NULL`, + { memberId }, + ) + if (parseInt(activitiesToAnonymize.count) > 0) { + summary += `- ${activitiesToAnonymize.count} activities will be anonymized (memberId → random UUID, username → deleted-{UUID}) and marked deleted\n` + } + + // Count activities that will have object member references anonymized + const activitiesToAnonymizeObject = await qdbStore + .connection() + .one( + `SELECT count(*) as count FROM activities WHERE "objectMemberId" = $(memberId) AND "deletedAt" IS NULL`, + { memberId }, + ) + if (parseInt(activitiesToAnonymizeObject.count) > 0) { + summary += `- ${activitiesToAnonymizeObject.count} activities will have objectMember references anonymized (objectMemberId → random UUID, objectMemberUsername → deleted-{UUID})\n` + } + + // Check for overlap (records that will have both operations applied) + const overlappingRecords = await qdbStore + .connection() + .one( + `SELECT count(*) as count FROM activities WHERE "memberId" = $(memberId) AND "objectMemberId" = $(memberId) AND "deletedAt" IS NULL`, + { memberId }, + ) + if (parseInt(overlappingRecords.count) > 0) { + summary += `- ${overlappingRecords.count} activities will have both member and objectMember data anonymized\n` + } + + summary += `\nNOTE: Different random UUIDs will be used for member vs objectMember references to prevent correlation.\n\n` + return summary +} + +/** + * Performs anonymization and soft deletion of member data in QuestDB activities table. + * This function replaces identifying data with dummy values and marks records as deleted. + * + * OPERATIONS PERFORMED: + * 1. Replace memberId with random UUID and username with deleted-${uuid} where memberId matches + * 2. Replace objectMemberId with random UUID and objectMemberUsername with deleted-${uuid} where they reference the member + * + * @param qdbStore - QuestDB store instance + * @param memberId - The member ID to anonymize and soft delete + */ +async function softDeleteMemberFromQuestDb(qdbStore: DbStore, memberId: string): Promise { + // Generate random UUID for anonymization + const anonymousUuid = generateUUIDv1() + const anonymousUsername = `deleted-${anonymousUuid}` + + // Anonymize activities where memberId matches + let result = await qdbStore.connection().result( + ` + UPDATE activities SET + "memberId" = $(anonymousUuid), + "username" = $(anonymousUsername), + "deletedAt" = NOW(), + "updatedAt" = NOW() + WHERE "memberId" = $(memberId) AND "deletedAt" IS NULL + `, + { memberId, anonymousUuid, anonymousUsername }, + ) + + // NOTE: QuestDB UPDATE operations may report inaccurate rowCount due to PostgreSQL compatibility issues + // Also, QuestDB updates are asynchronous and may not be immediately visible in subsequent queries + if (result.rowCount > 0) { + log.info( + `QuestDB reported updating ${result.rowCount} activities (note: QuestDB rowCount may be inaccurate for UPDATE operations)`, + ) + } else { + log.info(`No activities found to anonymize for memberId ${memberId}`) + } + + // Generate separate UUID for object member references to avoid correlation + const objectAnonymousUuid = generateUUIDv1() + const objectAnonymousUsername = `deleted-${objectAnonymousUuid}` + + // Anonymize objectMemberId and objectMemberUsername references + result = await qdbStore.connection().result( + ` + UPDATE activities SET + "objectMemberId" = $(objectAnonymousUuid), + "objectMemberUsername" = $(objectAnonymousUsername), + "updatedAt" = NOW() + WHERE "objectMemberId" = $(memberId) AND "deletedAt" IS NULL + `, + { memberId, objectAnonymousUuid, objectAnonymousUsername }, + ) + + if (result.rowCount > 0) { + log.info( + `QuestDB reported updating ${result.rowCount} objectMember activities (note: QuestDB rowCount may be inaccurate for UPDATE operations)`, + ) + } else { + log.info(`No objectMember activities found to anonymize for memberId ${memberId}`) + } +} + +const processArguments = process.argv.slice(2) + +if (processArguments.length < 1 || processArguments.length > 2) { + log.error('Usage: erasure-members-data-questdb [--no-summary]') + process.exit(1) +} + +const memberId = processArguments[0] +const skipSummary = processArguments.includes('--no-summary') + +setImmediate(async () => { + const qdbConnection = await getClientSQL() + const qdbStore = new DbStore(log, qdbConnection) + + log.info(`Anonymizing member data in QuestDB for member ID: ${memberId}`) + + try { + if (!skipSummary) { + // Show anonymization summary and get confirmation + const summary = await getQuestDbDeletionSummary(qdbStore, memberId) + console.log(summary) + } + + const proceed = await promptConfirmation( + 'Do you want to proceed with the QuestDB anonymization and soft deletion?', + ) + + if (!proceed) { + log.info('Anonymization cancelled by user') + process.exit(0) + } + + // Perform anonymization operations + await softDeleteMemberFromQuestDb(qdbStore, memberId) + + log.info('QuestDB member anonymization completed successfully') + } catch (err) { + log.error(err, { memberId }, 'Failed to anonymize member data in QuestDB!') + } + + process.exit(0) +})