fetch-text-by-line.ts 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import fs from 'node:fs';
  2. import { Readable } from 'node:stream';
  3. import fsp from 'node:fs/promises';
  4. import type { FileHandle } from 'node:fs/promises';
  5. import readline from 'node:readline';
  6. import { TextLineStream } from './text-line-transform-stream';
  7. import type { ReadableStream } from 'node:stream/web';
  8. import { TextDecoderStream } from 'node:stream/web';
  9. import { processLine, ProcessLineStream } from './process-line';
  10. import { $fetch } from './make-fetch-happen';
  11. import type { NodeFetchResponse } from './make-fetch-happen';
  12. import type { UndiciResponseData } from './fetch-retry';
  13. import type { Response as UnidiciWebResponse } from 'undici';
  14. function getReadableStream(file: string | FileHandle): ReadableStream {
  15. if (typeof file === 'string') {
  16. // return fs.openAsBlob(file).then(blob => blob.stream())
  17. return Readable.toWeb(fs.createReadStream(file/* , { encoding: 'utf-8' } */));
  18. }
  19. return file.readableWebStream();
  20. }
  21. // TODO: use FileHandle.readLine()
  22. export const readFileByLineLegacy: ((file: string /* | FileHandle */) => AsyncIterable<string>) = (file: string | FileHandle) => getReadableStream(file)
  23. .pipeThrough(new TextDecoderStream())
  24. .pipeThrough(new TextLineStream());
  25. export function readFileByLine(file: string): AsyncIterable<string> {
  26. return readline.createInterface({
  27. input: fs.createReadStream(file/* , { encoding: 'utf-8' } */),
  28. crlfDelay: Infinity
  29. });
  30. }
  31. const fdReadLines = (fd: FileHandle) => fd.readLines();
  32. export async function readFileByLineNew(file: string): Promise<AsyncIterable<string>> {
  33. return fsp.open(file, 'r').then(fdReadLines);
  34. }
  35. function ensureResponseBody<T extends NodeFetchResponse | UndiciResponseData | UnidiciWebResponse>(resp: T): NonNullable<T['body']> {
  36. if (resp.body == null) {
  37. throw new Error('Failed to fetch remote text');
  38. }
  39. if ('bodyUsed' in resp && resp.bodyUsed) {
  40. throw new Error('Body has already been consumed.');
  41. }
  42. return resp.body;
  43. }
  44. export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | UndiciResponseData | UnidiciWebResponse, processLine?: boolean) => ReadableStream<string>) = (resp, processLine = false) => {
  45. const stream = ensureResponseBody(resp);
  46. const webStream: ReadableStream<Uint8Array> = 'getReader' in stream
  47. ? stream
  48. : (
  49. 'text' in stream
  50. ? stream.body as any
  51. : Readable.toWeb(new Readable().wrap(stream))
  52. );
  53. const resultStream = webStream
  54. .pipeThrough(new TextDecoderStream())
  55. .pipeThrough(new TextLineStream());
  56. if (processLine) {
  57. return resultStream.pipeThrough(new ProcessLineStream());
  58. }
  59. return resultStream;
  60. };
  61. export function fetchRemoteTextByLine(url: string, processLine = false): Promise<AsyncIterable<string>> {
  62. return $fetch(url).then(resp => createReadlineInterfaceFromResponse(resp, processLine));
  63. }
  64. export async function readFileIntoProcessedArray(file: string /* | FileHandle */) {
  65. const results = [];
  66. for await (const line of readFileByLine(file)) {
  67. if (processLine(line)) {
  68. results.push(line);
  69. }
  70. }
  71. return results;
  72. }