fetch-text-by-line.ts 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import type { BunFile } from 'bun';
  2. import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
  3. import { TextLineStream } from './text-line-transform-stream';
  4. import { PolyfillTextDecoderStream } from './text-decoder-stream';
  5. import { processLine } from './process-line';
  6. const enableTextLineStream = !!process.env.ENABLE_TEXT_LINE_STREAM;
  7. const decoder = new TextDecoder('utf-8');
  8. async function *createTextLineAsyncIterableFromStreamSource(stream: ReadableStream<Uint8Array>): AsyncIterable<string> {
  9. let buf = '';
  10. // @ts-expect-error -- ReadableStream<Uint8Array> should be AsyncIterable<Uint8Array>
  11. for await (const chunk of stream) {
  12. const chunkStr = decoder.decode(chunk).replaceAll('\r\n', '\n');
  13. for (let i = 0, len = chunkStr.length; i < len; i++) {
  14. const char = chunkStr[i];
  15. if (char === '\n') {
  16. yield buf;
  17. buf = '';
  18. } else {
  19. buf += char;
  20. }
  21. }
  22. }
  23. if (buf) {
  24. yield buf;
  25. }
  26. }
  27. const getBunBlob = (file: string | URL | BunFile) => {
  28. if (typeof file === 'string') {
  29. return Bun.file(file);
  30. } if (!('writer' in file)) {
  31. return Bun.file(file);
  32. }
  33. return file;
  34. };
  35. // @ts-expect-error -- ReadableStream<string> should be AsyncIterable<string>
  36. export const readFileByLine: ((file: string | URL | BunFile) => AsyncIterable<string>) = enableTextLineStream
  37. ? (file: string | URL | BunFile) => getBunBlob(file).stream().pipeThrough(new PolyfillTextDecoderStream()).pipeThrough(new TextLineStream())
  38. : (file: string | URL | BunFile) => createTextLineAsyncIterableFromStreamSource(getBunBlob(file).stream());
  39. const ensureResponseBody = (resp: Response) => {
  40. if (!resp.body) {
  41. throw new Error('Failed to fetch remote text');
  42. }
  43. if (resp.bodyUsed) {
  44. throw new Error('Body has already been consumed.');
  45. }
  46. return resp.body;
  47. };
  48. // @ts-expect-error -- ReadableStream<string> should be AsyncIterable<string>
  49. export const createReadlineInterfaceFromResponse: ((resp: Response) => AsyncIterable<string>) = enableTextLineStream
  50. ? (resp) => ensureResponseBody(resp).pipeThrough(new PolyfillTextDecoderStream()).pipeThrough(new TextLineStream())
  51. : (resp) => createTextLineAsyncIterableFromStreamSource(ensureResponseBody(resp));
  52. export function fetchRemoteTextByLine(url: string | URL) {
  53. return fetchWithRetry(url, defaultRequestInit).then(createReadlineInterfaceFromResponse);
  54. }
  55. export async function readFileIntoProcessedArray(file: string | URL | BunFile) {
  56. if (typeof file === 'string') {
  57. file = Bun.file(file);
  58. } else if (!('writer' in file)) {
  59. file = Bun.file(file);
  60. }
  61. const content = await file.text();
  62. return content.split('\n').filter(processLine);
  63. }