fetch-text-by-line.ts 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import type { BunFile } from 'bun';
  2. import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
  3. import { TextLineStream } from './text-line-transform-stream';
  4. import { PolyfillTextDecoderStream } from './text-decoder-stream';
  5. import { processLine } from './process-line';
  6. const enableTextLineStream = !!process.env.ENABLE_TEXT_LINE_STREAM;
  7. const decoder = new TextDecoder('utf-8');
  8. async function *createTextLineAsyncIterableFromStreamSource(stream: ReadableStream<Uint8Array>): AsyncIterable<string> {
  9. let buf = '';
  10. const reader = stream.getReader();
  11. while (true) {
  12. const res = await reader.read();
  13. if (res.done) {
  14. break;
  15. }
  16. const chunkStr = decoder.decode(res.value).replaceAll('\r\n', '\n');
  17. for (let i = 0, len = chunkStr.length; i < len; i++) {
  18. const char = chunkStr[i];
  19. if (char === '\n') {
  20. yield buf;
  21. buf = '';
  22. } else {
  23. buf += char;
  24. }
  25. }
  26. }
  27. if (buf) {
  28. yield buf;
  29. }
  30. }
  31. const getBunBlob = (file: string | URL | BunFile) => {
  32. if (typeof file === 'string') {
  33. return Bun.file(file);
  34. } if (!('writer' in file)) {
  35. return Bun.file(file);
  36. }
  37. return file;
  38. };
  39. // @ts-expect-error -- ReadableStream<string> should be AsyncIterable<string>
  40. export const readFileByLine: ((file: string | URL | BunFile) => AsyncIterable<string>) = enableTextLineStream
  41. ? (file: string | URL | BunFile) => getBunBlob(file).stream().pipeThrough(new PolyfillTextDecoderStream()).pipeThrough(new TextLineStream())
  42. : (file: string | URL | BunFile) => createTextLineAsyncIterableFromStreamSource(getBunBlob(file).stream());
  43. const ensureResponseBody = (resp: Response) => {
  44. if (!resp.body) {
  45. throw new Error('Failed to fetch remote text');
  46. }
  47. if (resp.bodyUsed) {
  48. throw new Error('Body has already been consumed.');
  49. }
  50. return resp.body;
  51. };
  52. // @ts-expect-error -- ReadableStream<string> should be AsyncIterable<string>
  53. export const createReadlineInterfaceFromResponse: ((resp: Response) => AsyncIterable<string>) = enableTextLineStream
  54. ? (resp) => ensureResponseBody(resp).pipeThrough(new PolyfillTextDecoderStream()).pipeThrough(new TextLineStream())
  55. : (resp) => createTextLineAsyncIterableFromStreamSource(ensureResponseBody(resp));
  56. export function fetchRemoteTextByLine(url: string | URL) {
  57. return fetchWithRetry(url, defaultRequestInit).then(createReadlineInterfaceFromResponse);
  58. }
  59. export async function readFileIntoProcessedArray(file: string | URL | BunFile) {
  60. if (typeof file === 'string') {
  61. file = Bun.file(file);
  62. } else if (!('writer' in file)) {
  63. file = Bun.file(file);
  64. }
  65. const content = await file.text();
  66. return content.split('\n').filter(processLine);
  67. }