feat(validation): relax Instagram URL validation to support all content types

- Create validateInstagramUrl utility using URL constructor
- Replace regex-based validation with hostname and protocol checks
- Support posts, reels, IGTV, and URLs with query parameters
- Add comprehensive unit tests (22 tests, all passing)
- Add integration tests for new URL formats
- Update API documentation with supported URL formats

Closes: #RelaxInstagramUrlValidation
This commit is contained in:
Giancarmine Salucci
2025-12-22 03:10:29 +01:00
parent 8545744bb1
commit 6b022d8348
7 changed files with 1219 additions and 12 deletions

View File

@@ -0,0 +1,79 @@
/**
* Instagram URL Validation Utility
*
* Validates that a URL is from Instagram's domain and uses HTTPS.
* Accepts all Instagram URL formats (posts, reels, IGTV, etc.).
*/
export interface ValidationResult {
valid: boolean;
error?: string;
}
/**
* Validate Instagram URL
*
* Accepts:
* - https://instagram.com/p/{post-id}
* - https://www.instagram.com/p/{post-id}
* - https://instagram.com/reel/{reel-id}
* - https://instagram.com/tv/{tv-id}
* - Any Instagram URL with query parameters
*
* Rejects:
* - Non-HTTPS URLs (http://)
* - Non-Instagram domains
* - Invalid URL format
* - Subdomains other than www
*
* @param url - The URL to validate
* @returns Validation result with valid flag and optional error message
*
* @example
* ```typescript
* const result = validateInstagramUrl('https://instagram.com/reel/ABC123?utm_source=share');
* if (!result.valid) {
* console.error(result.error);
* }
* ```
*/
export function validateInstagramUrl(url: string): ValidationResult {
// Validate URL is a string
if (typeof url !== 'string' || url.trim() === '') {
return {
valid: false,
error: 'URL must be a non-empty string'
};
}
// Parse URL
let urlObj: URL;
try {
urlObj = new URL(url);
} catch (e) {
return {
valid: false,
error: 'Invalid URL format'
};
}
// Validate protocol (must be HTTPS)
if (urlObj.protocol !== 'https:') {
return {
valid: false,
error: 'Instagram URL must use HTTPS protocol'
};
}
// Validate hostname (must be instagram.com or www.instagram.com)
const validHostnames = ['instagram.com', 'www.instagram.com'];
if (!validHostnames.includes(urlObj.hostname)) {
return {
valid: false,
error: 'URL must be from instagram.com domain'
};
}
// Valid Instagram URL
return { valid: true };
}

View File

@@ -8,6 +8,7 @@
import { json, error } from '@sveltejs/kit';
import { queueManager } from '$lib/server/queue/QueueManager';
import { validateInstagramUrl } from '$lib/server/validation/instagram-url';
import type { RequestHandler } from './$types';
/**
@@ -41,12 +42,10 @@ export const POST: RequestHandler = async ({ request }) => {
return error(400, { message: 'URL is required and must be a string' });
}
// Validate Instagram URL format
const instagramUrlPattern = /^https:\/\/(www\.)?instagram\.com\/p\/[a-zA-Z0-9_-]+\/?$/;
if (!instagramUrlPattern.test(url)) {
return error(400, {
message: 'Invalid Instagram URL format. Expected: https://instagram.com/p/{post-id}'
});
// Validate Instagram URL format using utility
const validation = validateInstagramUrl(url);
if (!validation.valid) {
return error(400, { message: validation.error || 'Invalid Instagram URL' });
}
// Enqueue the URL

View File

@@ -0,0 +1,139 @@
import { describe, it, expect } from 'vitest';
import { validateInstagramUrl } from '$lib/server/validation/instagram-url';
describe('Instagram URL Validation', () => {
describe('Valid URLs', () => {
it('should accept post URLs without www', () => {
const result = validateInstagramUrl('https://instagram.com/p/ABC123');
expect(result.valid).toBe(true);
expect(result.error).toBeUndefined();
});
it('should accept post URLs with www', () => {
const result = validateInstagramUrl('https://www.instagram.com/p/XYZ789');
expect(result.valid).toBe(true);
});
it('should accept reel URLs', () => {
const result = validateInstagramUrl('https://instagram.com/reel/DSevV5CDcNm');
expect(result.valid).toBe(true);
});
it('should accept reel URLs with query parameters', () => {
const result = validateInstagramUrl(
'https://www.instagram.com/reel/DSevV5CDcNm/?utm_source=ig_web_copy_link'
);
expect(result.valid).toBe(true);
});
it('should accept IGTV URLs', () => {
const result = validateInstagramUrl('https://instagram.com/tv/ABC123');
expect(result.valid).toBe(true);
});
it('should accept URLs with multiple query parameters', () => {
const result = validateInstagramUrl(
'https://instagram.com/p/ABC123?utm_source=share&utm_medium=social'
);
expect(result.valid).toBe(true);
});
it('should accept URLs with trailing slash', () => {
const result = validateInstagramUrl('https://instagram.com/p/ABC123/');
expect(result.valid).toBe(true);
});
it('should accept URLs with hash fragments', () => {
const result = validateInstagramUrl('https://instagram.com/p/ABC123#section');
expect(result.valid).toBe(true);
});
});
describe('Invalid Protocol', () => {
it('should reject HTTP URLs', () => {
const result = validateInstagramUrl('http://instagram.com/p/ABC123');
expect(result.valid).toBe(false);
expect(result.error).toContain('HTTPS');
});
it('should reject FTP URLs', () => {
const result = validateInstagramUrl('ftp://instagram.com/p/ABC123');
expect(result.valid).toBe(false);
expect(result.error).toContain('HTTPS');
});
});
describe('Invalid Domain', () => {
it('should reject non-Instagram domains', () => {
const result = validateInstagramUrl('https://facebook.com/post/123');
expect(result.valid).toBe(false);
expect(result.error).toContain('instagram.com');
});
it('should reject malicious look-alike domains', () => {
const result = validateInstagramUrl('https://instagram.com.evil.com/p/ABC123');
expect(result.valid).toBe(false);
expect(result.error).toContain('instagram.com');
});
it('should reject subdomains other than www', () => {
const result = validateInstagramUrl('https://api.instagram.com/p/ABC123');
expect(result.valid).toBe(false);
expect(result.error).toContain('instagram.com');
});
it('should reject completely different domains', () => {
const result = validateInstagramUrl('https://example.com');
expect(result.valid).toBe(false);
});
});
describe('Invalid URL Format', () => {
it('should reject invalid URL strings', () => {
const result = validateInstagramUrl('not-a-url');
expect(result.valid).toBe(false);
expect(result.error).toContain('Invalid URL format');
});
it('should reject empty strings', () => {
const result = validateInstagramUrl('');
expect(result.valid).toBe(false);
expect(result.error).toContain('non-empty string');
});
it('should reject whitespace-only strings', () => {
const result = validateInstagramUrl(' ');
expect(result.valid).toBe(false);
expect(result.error).toContain('non-empty string');
});
it('should reject relative URLs', () => {
const result = validateInstagramUrl('/p/ABC123');
expect(result.valid).toBe(false);
expect(result.error).toContain('Invalid URL format');
});
});
describe('Edge Cases', () => {
it('should handle URLs with Unicode characters in query params', () => {
const result = validateInstagramUrl('https://instagram.com/p/ABC123?text=hello%20world');
expect(result.valid).toBe(true);
});
it('should handle URLs with port numbers', () => {
// Instagram doesn't use custom ports, but URL should parse
const result = validateInstagramUrl('https://instagram.com:443/p/ABC123');
expect(result.valid).toBe(true);
});
it('should accept stories URLs', () => {
const result = validateInstagramUrl('https://instagram.com/stories/username/123456789');
expect(result.valid).toBe(true);
});
it('should accept any Instagram path', () => {
const result = validateInstagramUrl('https://instagram.com/any/path/here');
expect(result.valid).toBe(true);
});
});
});

View File

@@ -71,10 +71,72 @@ describe('Queue API Endpoints', () => {
expect(item?.url).toBe('https://www.instagram.com/p/XYZ789');
});
it('should accept Instagram reel URLs', async () => {
const request = new Request('http://localhost/api/queue', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
url: 'https://instagram.com/reel/ABC123'
})
});
const response = await queuePOST({ request } as any);
expect(response.status).toBe(200);
const data = await response.json();
expect(data.url).toBe('https://instagram.com/reel/ABC123');
});
it('should accept Instagram URLs with query parameters', async () => {
const request = new Request('http://localhost/api/queue', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
url: 'https://www.instagram.com/reel/DSevV5CDcNm/?utm_source=ig_web_copy_link'
})
});
const response = await queuePOST({ request } as any);
expect(response.status).toBe(200);
const data = await response.json();
expect(data.url).toBe('https://www.instagram.com/reel/DSevV5CDcNm/?utm_source=ig_web_copy_link');
});
it('should accept Instagram IGTV URLs', async () => {
const request = new Request('http://localhost/api/queue', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
url: 'https://instagram.com/tv/XYZ789'
})
});
const response = await queuePOST({ request } as any);
expect(response.status).toBe(200);
});
it('should reject HTTP (non-HTTPS) URLs', async () => {
const request = new Request('http://localhost/api/queue', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
url: 'http://instagram.com/p/ABC123'
})
});
try {
const response = await queuePOST({ request } as any);
expect(response.status).toBe(400);
const data = await response.json();
expect(data.message).toContain('HTTPS');
} catch (err: any) {
expect(err.status).toBe(400);
expect(err.body.message).toContain('HTTPS');
}
});
it('should reject invalid Instagram URL formats', async () => {
const invalidUrls = [
'https://facebook.com/post/123',
'https://instagram.com/user/profile',
'not-a-url',
'https://other-site.com'
];
@@ -93,11 +155,12 @@ describe('Queue API Endpoints', () => {
// If we get here, check the response status
expect(response.status).toBe(400);
const data = await response.json();
expect(data.message).toBe('Invalid Instagram URL format. Expected: https://instagram.com/p/{post-id}');
// Updated to check for new error messages
expect(data.message).toBeTruthy();
} catch (err: any) {
// SvelteKit's error() throws - check the error
expect(err.status).toBe(400);
expect(err.body.message).toBe('Invalid Instagram URL format. Expected: https://instagram.com/p/{post-id}');
expect(err.body.message).toBeTruthy();
}
}
@@ -105,6 +168,33 @@ describe('Queue API Endpoints', () => {
expect(queueManager.getAll()).toHaveLength(0);
});
it('should reject non-Instagram domains', async () => {
const invalidUrls = [
'https://facebook.com/post/123',
'https://twitter.com/status/456',
'https://example.com',
'https://instagram.com.evil.com/p/123'
];
for (const url of invalidUrls) {
const request = new Request('http://localhost/api/queue', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url })
});
try {
const response = await queuePOST({ request } as any);
expect(response.status).toBe(400);
const data = await response.json();
expect(data.message).toContain('instagram.com');
} catch (err: any) {
expect(err.status).toBe(400);
expect(err.body.message).toContain('instagram.com');
}
}
});
it('should reject missing URL', async () => {
const request = new Request('http://localhost/api/queue', {
method: 'POST',