fetch-text-by-line.ts 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import fs from 'node:fs';
  2. import { Readable } from 'node:stream';
  3. import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
  4. import type { FileHandle } from 'node:fs/promises';
  5. import { TextLineStream } from './text-line-transform-stream';
  6. import type { ReadableStream } from 'node:stream/web';
  7. import { TextDecoderStream } from 'node:stream/web';
  8. import { processLine } from './process-line';
  9. const getReadableStream = (file: string | FileHandle): ReadableStream => {
  10. if (typeof file === 'string') {
  11. // return fs.openAsBlob(file).then(blob => blob.stream())
  12. return Readable.toWeb(fs.createReadStream(file/* , { encoding: 'utf-8' } */));
  13. }
  14. return file.readableWebStream();
  15. };
  16. // TODO: use FileHandle.readLine()
  17. export const readFileByLine: ((file: string | FileHandle) => AsyncIterable<string>) = (file: string | FileHandle) => getReadableStream(file)
  18. .pipeThrough(new TextDecoderStream())
  19. .pipeThrough(new TextLineStream());
  20. const ensureResponseBody = (resp: Response) => {
  21. if (!resp.body) {
  22. throw new Error('Failed to fetch remote text');
  23. }
  24. if (resp.bodyUsed) {
  25. throw new Error('Body has already been consumed.');
  26. }
  27. return resp.body;
  28. };
  29. export const createReadlineInterfaceFromResponse: ((resp: Response) => AsyncIterable<string>) = (resp) => ensureResponseBody(resp)
  30. .pipeThrough(new TextDecoderStream())
  31. .pipeThrough(new TextLineStream());
  32. export function fetchRemoteTextByLine(url: string | URL) {
  33. return fetchWithRetry(url, defaultRequestInit).then(createReadlineInterfaceFromResponse);
  34. }
  35. export async function readFileIntoProcessedArray(file: string | FileHandle) {
  36. const results = [];
  37. for await (const line of readFileByLine(file)) {
  38. if (processLine(line)) {
  39. results.push(line);
  40. }
  41. }
  42. return results;
  43. }