PHP大文件处理与流式上传技术

PHP大文件处理与流式上传技术 PHP大文件处理与流式上传技术处理大文件是Web开发中的挑战。PHP的默认配置限制了上传大小和内存使用。今天说说大文件处理和流式上传的实现方案。PHP配置文件限制上传大小。php// php.ini 配置// upload_max_filesize 200M// post_max_size 200M// max_execution_time 300// memory_limit 256M?分片上传是大文件上传的标准方案。前端将文件切分成多个小片段后端逐个接收最后合并。phpclass ChunkedUploadHandler{private string $uploadDir;private string $tempDir;public function __construct(string $uploadDir /var/www/uploads){$this-uploadDir rtrim($uploadDir, /);$this-tempDir $this-uploadDir . /temp;foreach ([$this-uploadDir, $this-tempDir] as $dir) {if (!is_dir($dir)) mkdir($dir, 0755, true);}}public function handleChunk(array $params): array{$fileId $params[file_id] ?? ;$chunkIndex (int)($params[chunk_index] ?? 0);$totalChunks (int)($params[total_chunks] ?? 1);$originalName $params[original_name] ?? unknown;$totalSize (int)($params[total_size] ?? 0);if (empty($fileId)) {return [error 缺少文件ID];}// 保存分片$chunkData file_get_contents(php://input);$chunkDir $this-tempDir . / . $fileId;if (!is_dir($chunkDir)) mkdir($chunkDir, 0755, true);$chunkFile $chunkDir . / . $chunkIndex;file_put_contents($chunkFile, $chunkData);// 检查是否所有分片都上传完成$receivedChunks count(glob($chunkDir . /*));if ($receivedChunks $totalChunks) {return $this-mergeChunks($fileId, $originalName);}return [success true,received $receivedChunks,total $totalChunks,progress round($receivedChunks / $totalChunks * 100, 1) . %,];}private function mergeChunks(string $fileId, string $originalName): array{$chunkDir $this-tempDir . / . $fileId;$files glob($chunkDir . /*);sort($files, SORT_NUMERIC);$ext pathinfo($originalName, PATHINFO_EXTENSION);$newName bin2hex(random_bytes(16)) . ($ext ? .{$ext} : );$destPath $this-uploadDir . / . $newName;$destFile fopen($destPath, wb);foreach ($files as $file) {$chunkData file_get_contents($file);fwrite($destFile, $chunkData);unlink($file);}fclose($destFile);rmdir($chunkDir);return [success true,file_path $newName,file_size filesize($destPath),original_name $originalName,];}public function getUploadProgress(string $fileId): array{$chunkDir $this-tempDir . / . $fileId;if (!is_dir($chunkDir)) {return [received 0, total 0, progress 0%];}$received count(glob($chunkDir . /*));return [received $received, progress {$received}个分片已接收];}}?流式处理大文件的核心是逐块处理不一次性加载到内存。phpclass LargeFileProcessor{public function processLargeFile(string $path, callable $lineCallback): int{$handle fopen($path, r);if ($handle false) {throw new RuntimeException(无法打开文件: $path);}$lineCount 0;while (($line fgets($handle)) ! false) {$lineCount;$lineCallback(trim($line), $lineCount);}fclose($handle);return $lineCount;}public function processInChunks(string $path, int $chunkSize 8192, callable $chunkCallback): int{$handle fopen($path, rb);if ($handle false) {throw new RuntimeException(无法打开文件: $path);}$totalRead 0;while (!feof($handle)) {$chunk fread($handle, $chunkSize);$bytes strlen($chunk);if ($bytes 0) break;$totalRead $bytes;$chunkCallback($chunk, $totalRead);}fclose($handle);return $totalRead;}public function filterLargeFile(string $source, string $dest, callable $filter): void{$sourceHandle fopen($source, r);$destHandle fopen($dest, w);if ($sourceHandle false || $destHandle false) {throw new RuntimeException(文件操作失败);}while (($line fgets($sourceHandle)) ! false) {$line trim($line);if ($filter($line)) {fwrite($destHandle, $line . \n);}}fclose($sourceHandle);fclose($destHandle);}}$processor new LargeFileProcessor();// 生成大文件$largeFile /tmp/large_data.txt;$handle fopen($largeFile, w);for ($i 0; $i 100000; $i) {fwrite($handle, 行_{$i}, . rand(1000, 9999) . , . bin2hex(random_bytes(8)) . \n);}fclose($handle);// 逐行处理$count $processor-processLargeFile($largeFile, function ($line, $num) {if ($num % 25000 0) {echo 已处理 {$num} 行\n;}});echo 共处理 {$count} 行\n;?生成器在处理大数据集时能显著减少内存占用。phpclass LargeDataSet{public function readCsv(string $path): Generator{$handle fopen($path, r);if ($handle false) return;$headers fgetcsv($handle);if ($headers false) return;yield $headers;while (($row fgetcsv($handle)) ! false) {yield array_combine($headers, $row);}fclose($handle);}public function generateData(int $count): Generator{for ($i 0; $i $count; $i) {yield [id $i 1,name Item_{$i},value rand(1, 1000),created date(Y-m-d H:i:s, time() $i),];}}}$csvFile /tmp/large.csv;$fp fopen($csvFile, w);fputcsv($fp, [id, name, value, created]);for ($i 0; $i 50000; $i) {fputcsv($fp, [$i 1, Item_{$i}, rand(1, 1000), date(Y-m-d)]);}fclose($fp);$memoryStart memory_get_usage(true);$reader new LargeDataSet();$count 0;foreach ($reader-readCsv($csvFile) as $index $row) {if (is_array($row) isset($row[id])) {$count;}}echo 读取 {$count} 行内存使用: . (memory_get_usage(true) - $memoryStart) / 1024 . KB\n;?大文件处理的关键是避免一次性加载到内存。分片上传适合网络传输流式读取适合本地处理生成器适合大数据集遍历。PHP的流式处理能力配合生成器可以处理任意大小的文件。