379 lines
14 KiB
C#
379 lines
14 KiB
C#
using FruitBank.Common.Entities;
|
|
using Nop.Plugin.Misc.FruitBankPlugin.Domains.DataLayer;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.IO.Compression;
|
|
using System.Linq;
|
|
using System.Security.Cryptography;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace Nop.Plugin.Misc.FruitBankPlugin.Services.FileStorage
|
|
{
|
|
/// <summary>
|
|
/// Generic file storage service with compression, hash calculation, and duplicate detection
|
|
/// </summary>
|
|
public class FileStorageService
|
|
{
|
|
private readonly IFileStorageProvider _storageProvider;
|
|
private readonly FruitBankDbContext _dbContext;
|
|
|
|
// File extensions that are already compressed (don't GZip these)
|
|
private static readonly HashSet<string> CompressedExtensions = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
|
{
|
|
".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", // Images
|
|
".pdf", // PDFs
|
|
".zip", ".rar", ".7z", ".gz", ".bz2", // Archives
|
|
".mp4", ".avi", ".mov", ".mkv", // Videos
|
|
".mp3", ".flac", ".aac", ".ogg" // Audio
|
|
};
|
|
|
|
public FileStorageService(IFileStorageProvider storageProvider, FruitBankDbContext dbContext)
|
|
{
|
|
_storageProvider = storageProvider ?? throw new ArgumentNullException(nameof(storageProvider));
|
|
_dbContext = dbContext ?? throw new ArgumentNullException(nameof(dbContext));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Saves a file with optional compression, hash calculation, and duplicate detection
|
|
/// </summary>
|
|
/// <param name="fileStream">The file stream to save</param>
|
|
/// <param name="fileName">Original filename with extension</param>
|
|
/// <param name="userId">User ID for path organization</param>
|
|
/// <param name="featureName">Feature name (e.g., "ShippingDocumentProcessing")</param>
|
|
/// <param name="entityType">Entity type (e.g., "ShippingDocuments")</param>
|
|
/// <param name="entityId">Entity ID</param>
|
|
/// <param name="rawText">Optional raw text content (for AI-extracted documents)</param>
|
|
/// <param name="checkForDuplicates">If true, checks if file already exists by hash</param>
|
|
/// <returns>Created or existing Files entity with ID</returns>
|
|
public async Task<Files> SaveFileAsync(
|
|
Stream fileStream,
|
|
string fileName,
|
|
int userId,
|
|
string featureName,
|
|
string entityType,
|
|
int entityId,
|
|
string rawText = null,
|
|
bool checkForDuplicates = true)
|
|
{
|
|
if (fileStream == null)
|
|
throw new ArgumentNullException(nameof(fileStream));
|
|
|
|
if (string.IsNullOrWhiteSpace(fileName))
|
|
throw new ArgumentNullException(nameof(fileName));
|
|
|
|
// ✅ STEP 1: Calculate file hash from original stream
|
|
string fileHash = await CalculateFileHashAsync(fileStream);
|
|
fileStream.Position = 0; // Reset stream position after hashing
|
|
|
|
Console.WriteLine($"📝 File hash calculated: {fileHash}");
|
|
|
|
// ✅ STEP 2: Check for duplicate file by hash
|
|
if (checkForDuplicates)
|
|
{
|
|
var existingFile = await _dbContext.Files
|
|
.GetAll()
|
|
.FirstOrDefaultAsync(f => f.FileHash == fileHash);
|
|
|
|
if (existingFile != null)
|
|
{
|
|
Console.WriteLine($"♻️ Duplicate file detected! Reusing existing file ID: {existingFile.Id}");
|
|
return existingFile; // Return existing file instead of creating new one
|
|
}
|
|
}
|
|
|
|
// ✅ STEP 3: Create database record first to get ID
|
|
var fileExtension = Path.GetExtension(fileName);
|
|
var fileEntity = new Files
|
|
{
|
|
FileName = Path.GetFileNameWithoutExtension(fileName),
|
|
FileExtension = fileExtension,
|
|
RawText = rawText,
|
|
FileHash = fileHash, // ✅ Store the hash
|
|
Created = DateTime.UtcNow,
|
|
Modified = DateTime.UtcNow,
|
|
IsCompressed = !IsAlreadyCompressed(fileExtension)
|
|
};
|
|
|
|
await _dbContext.Files.InsertAsync(fileEntity);
|
|
|
|
Console.WriteLine($"✅ File record created - ID: {fileEntity.Id}, Hash: {fileHash}");
|
|
|
|
// ✅ STEP 4: Build storage path with file ID
|
|
var fileNameWithId = $"{fileEntity.FileName}_{fileEntity.Id}{fileExtension}";
|
|
var relativePath = BuildRelativePath(userId, featureName, entityType, entityId, fileNameWithId);
|
|
|
|
// ✅ STEP 5: Determine if file should be compressed
|
|
bool shouldCompress = !IsAlreadyCompressed(fileExtension);
|
|
|
|
try
|
|
{
|
|
Stream streamToSave = fileStream;
|
|
|
|
// Compress if needed
|
|
if (shouldCompress)
|
|
{
|
|
streamToSave = await CompressStreamAsync(fileStream);
|
|
// Update filename to indicate compression
|
|
fileNameWithId += ".gz";
|
|
relativePath += ".gz";
|
|
}
|
|
|
|
// Save to storage provider
|
|
await _storageProvider.SaveFileAsync(streamToSave, relativePath);
|
|
|
|
// Dispose compressed stream if we created one
|
|
if (shouldCompress && streamToSave != fileStream)
|
|
{
|
|
await streamToSave.DisposeAsync();
|
|
}
|
|
|
|
Console.WriteLine($"💾 File saved: {relativePath} (Compressed: {shouldCompress})");
|
|
return fileEntity;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
// Rollback database record if file save fails
|
|
await _dbContext.Files.DeleteAsync(fileEntity);
|
|
|
|
|
|
Console.Error.WriteLine($"❌ Error saving file: {ex.Message}");
|
|
throw;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Check if a file with this hash already exists
|
|
/// </summary>
|
|
public async Task<Files> FindFileByHashAsync(string fileHash)
|
|
{
|
|
return await _dbContext.Files
|
|
.GetAll()
|
|
.FirstOrDefaultAsync(f => f.FileHash == fileHash);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Get all files with the same hash (duplicates)
|
|
/// </summary>
|
|
public async Task<List<Files>> FindDuplicateFilesByHashAsync(string fileHash)
|
|
{
|
|
return await _dbContext.Files
|
|
.GetAll()
|
|
.Where(f => f.FileHash == fileHash)
|
|
.ToListAsync();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Calculate SHA256 hash from stream
|
|
/// </summary>
|
|
private async Task<string> CalculateFileHashAsync(Stream stream)
|
|
{
|
|
using (var sha256 = SHA256.Create())
|
|
{
|
|
var hashBytes = await Task.Run(() => sha256.ComputeHash(stream));
|
|
return BitConverter.ToString(hashBytes).Replace("-", "").ToLowerInvariant();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Retrieves a file by ID with automatic decompression
|
|
/// </summary>
|
|
public async Task<(Stream FileStream, Files FileInfo)> GetFileByIdAsync(
|
|
int fileId,
|
|
int userId,
|
|
string featureName,
|
|
string entityType,
|
|
int entityId)
|
|
{
|
|
// Get file record from database
|
|
var fileEntity = await _dbContext.Files.GetByIdAsync(fileId);
|
|
|
|
if (fileEntity == null)
|
|
throw new FileNotFoundException($"File with ID {fileId} not found in database");
|
|
|
|
// Build path
|
|
var fileNameWithId = $"{fileEntity.FileName}_{fileEntity.Id}{fileEntity.FileExtension}";
|
|
var isCompressed = !IsAlreadyCompressed(fileEntity.FileExtension);
|
|
|
|
if (isCompressed)
|
|
{
|
|
fileNameWithId += ".gz";
|
|
}
|
|
|
|
var relativePath = BuildRelativePath(userId, featureName, entityType, entityId, fileNameWithId);
|
|
|
|
// Get file from storage
|
|
var fileStream = await _storageProvider.GetFileAsync(relativePath);
|
|
|
|
// Decompress if needed
|
|
if (isCompressed)
|
|
{
|
|
var decompressedStream = await DecompressStreamAsync(fileStream);
|
|
await fileStream.DisposeAsync();
|
|
fileStream = decompressedStream;
|
|
}
|
|
|
|
return (fileStream, fileEntity);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets all files from database
|
|
/// </summary>
|
|
public async Task<List<Files>> GetAllFilesAsync()
|
|
{
|
|
return await _dbContext.Files.GetAll().ToListAsync();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Searches files by filename, hash, or raw text content
|
|
/// </summary>
|
|
public async Task<List<Files>> SearchFilesAsync(string searchTerm)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(searchTerm))
|
|
return await GetAllFilesAsync();
|
|
|
|
var allFiles = await _dbContext.Files.GetAll().ToListAsync();
|
|
|
|
var results = allFiles.Where(f =>
|
|
// Search in filename
|
|
(!string.IsNullOrEmpty(f.FileName) &&
|
|
f.FileName.Contains(searchTerm, StringComparison.OrdinalIgnoreCase)) ||
|
|
|
|
// Search in file extension
|
|
(!string.IsNullOrEmpty(f.FileExtension) &&
|
|
f.FileExtension.Contains(searchTerm, StringComparison.OrdinalIgnoreCase)) ||
|
|
|
|
// Search in file hash
|
|
(!string.IsNullOrEmpty(f.FileHash) &&
|
|
f.FileHash.Contains(searchTerm, StringComparison.OrdinalIgnoreCase)) ||
|
|
|
|
// Full-text search in RawText (only if RawText is not null)
|
|
(!string.IsNullOrEmpty(f.RawText) &&
|
|
f.RawText.Contains(searchTerm, StringComparison.OrdinalIgnoreCase))
|
|
).ToList();
|
|
|
|
Console.WriteLine($"🔍 Search for '{searchTerm}' returned {results.Count} results");
|
|
return results;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Adds or updates a file record in the database
|
|
/// </summary>
|
|
public async Task<Files> AddOrUpdateFileAsync(Files fileEntity)
|
|
{
|
|
if (fileEntity == null)
|
|
throw new ArgumentNullException(nameof(fileEntity));
|
|
|
|
if (fileEntity.Id > 0)
|
|
{
|
|
// Update existing
|
|
fileEntity.Modified = DateTime.UtcNow;
|
|
await _dbContext.Files.UpdateAsync(fileEntity);
|
|
}
|
|
else
|
|
{
|
|
// Add new
|
|
fileEntity.Created = DateTime.UtcNow;
|
|
fileEntity.Modified = DateTime.UtcNow;
|
|
await _dbContext.Files.InsertAsync(fileEntity);
|
|
}
|
|
|
|
|
|
return fileEntity;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Deletes a file from both storage and database
|
|
/// </summary>
|
|
public async Task<bool> DeleteFileAsync(
|
|
int fileId,
|
|
int userId,
|
|
string featureName,
|
|
string entityType,
|
|
int entityId)
|
|
{
|
|
var fileEntity = await _dbContext.Files.GetByIdAsync(fileId);
|
|
|
|
if (fileEntity == null)
|
|
return false;
|
|
|
|
// Build path
|
|
var fileNameWithId = $"{fileEntity.FileName}_{fileEntity.Id}{fileEntity.FileExtension}";
|
|
var isCompressed = !IsAlreadyCompressed(fileEntity.FileExtension);
|
|
|
|
if (isCompressed)
|
|
{
|
|
fileNameWithId += ".gz";
|
|
}
|
|
|
|
var relativePath = BuildRelativePath(userId, featureName, entityType, entityId, fileNameWithId);
|
|
|
|
// Delete from storage
|
|
await _storageProvider.DeleteFileAsync(relativePath);
|
|
|
|
// Delete from database
|
|
await _dbContext.Files.DeleteAsync(fileEntity);
|
|
|
|
|
|
return true;
|
|
}
|
|
|
|
#region Private Helper Methods
|
|
|
|
/// <summary>
|
|
/// Builds the relative storage path
|
|
/// </summary>
|
|
private string BuildRelativePath(int userId, string featureName, string entityType, int entityId, string fileName)
|
|
{
|
|
return Path.Combine(
|
|
userId.ToString(),
|
|
featureName,
|
|
$"{entityType}-{entityId}",
|
|
fileName
|
|
);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Checks if a file extension represents an already-compressed format
|
|
/// </summary>
|
|
private bool IsAlreadyCompressed(string extension)
|
|
{
|
|
return CompressedExtensions.Contains(extension);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Compresses a stream using GZip
|
|
/// </summary>
|
|
private async Task<Stream> CompressStreamAsync(Stream inputStream)
|
|
{
|
|
var compressedStream = new MemoryStream();
|
|
|
|
using (var gzipStream = new GZipStream(compressedStream, CompressionMode.Compress, leaveOpen: true))
|
|
{
|
|
await inputStream.CopyToAsync(gzipStream);
|
|
}
|
|
|
|
compressedStream.Position = 0;
|
|
return compressedStream;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Decompresses a GZip stream
|
|
/// </summary>
|
|
private async Task<Stream> DecompressStreamAsync(Stream compressedStream)
|
|
{
|
|
var decompressedStream = new MemoryStream();
|
|
|
|
using (var gzipStream = new GZipStream(compressedStream, CompressionMode.Decompress, leaveOpen: true))
|
|
{
|
|
await gzipStream.CopyToAsync(decompressedStream);
|
|
}
|
|
|
|
decompressedStream.Position = 0;
|
|
return decompressedStream;
|
|
}
|
|
|
|
#endregion
|
|
}
|
|
}
|