fetch-text-by-line.ts 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import fs from 'node:fs';
  2. import { Readable } from 'node:stream';
  3. import type { FileHandle } from 'node:fs/promises';
  4. import readline from 'node:readline';
  5. import { TextLineStream } from './text-line-transform-stream';
  6. import type { ReadableStream } from 'node:stream/web';
  7. import { TextDecoderStream } from 'node:stream/web';
  8. import { processLine } from './process-line';
  9. import { $fetch } from './make-fetch-happen';
  10. import type { NodeFetchResponse } from './make-fetch-happen';
  11. import type { UndiciResponseData } from './fetch-retry';
  12. import type { Response as UnidiciWebResponse } from 'undici';
  13. function getReadableStream(file: string | FileHandle): ReadableStream {
  14. if (typeof file === 'string') {
  15. // return fs.openAsBlob(file).then(blob => blob.stream())
  16. return Readable.toWeb(fs.createReadStream(file/* , { encoding: 'utf-8' } */));
  17. }
  18. return file.readableWebStream();
  19. }
  20. // TODO: use FileHandle.readLine()
  21. export const readFileByLineLegacy: ((file: string /* | FileHandle */) => AsyncIterable<string>) = (file: string | FileHandle) => getReadableStream(file)
  22. .pipeThrough(new TextDecoderStream())
  23. .pipeThrough(new TextLineStream());
  24. export const readFileByLine: ((file: string /* | FileHandle */) => AsyncIterable<string>) = (file: string) => readline.createInterface({
  25. input: fs.createReadStream(file/* , { encoding: 'utf-8' } */),
  26. crlfDelay: Infinity
  27. });
  28. function ensureResponseBody<T extends NodeFetchResponse | UndiciResponseData | UnidiciWebResponse>(resp: T): NonNullable<T['body']> {
  29. if (resp.body == null) {
  30. throw new Error('Failed to fetch remote text');
  31. }
  32. if ('bodyUsed' in resp && resp.bodyUsed) {
  33. throw new Error('Body has already been consumed.');
  34. }
  35. return resp.body;
  36. }
  37. export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | UndiciResponseData | UnidiciWebResponse) => AsyncIterable<string>) = (resp) => {
  38. const stream = ensureResponseBody(resp);
  39. const webStream: ReadableStream<Uint8Array> = 'getReader' in stream
  40. ? stream
  41. : (
  42. 'text' in stream
  43. ? stream.body as any
  44. : Readable.toWeb(new Readable().wrap(stream))
  45. );
  46. return webStream
  47. .pipeThrough(new TextDecoderStream())
  48. .pipeThrough(new TextLineStream());
  49. };
  50. export function fetchRemoteTextByLine(url: string) {
  51. return $fetch(url).then(createReadlineInterfaceFromResponse);
  52. }
  53. export async function readFileIntoProcessedArray(file: string /* | FileHandle */) {
  54. const results = [];
  55. for await (const line of readFileByLine(file)) {
  56. if (processLine(line)) {
  57. results.push(line);
  58. }
  59. }
  60. return results;
  61. }