fetch-text-by-line.ts 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. import type { BunFile } from 'bun';
  2. import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
  3. import type { FileHandle } from 'fs/promises';
  4. import { TextLineStream } from './text-line-transform-stream';
  5. import { PolyfillTextDecoderStream } from './text-decoder-stream';
  6. import { processLine } from './process-line';
  7. const enableTextLineStream = !!process.env.ENABLE_TEXT_LINE_STREAM;
  8. const decoder = new TextDecoder('utf-8');
  9. async function *createTextLineAsyncIterableFromStreamSource(stream: ReadableStream<Uint8Array>): AsyncIterable<string> {
  10. let buf = '';
  11. const reader = stream.getReader();
  12. while (true) {
  13. const res = await reader.read();
  14. if (res.done) {
  15. break;
  16. }
  17. const chunkStr = decoder.decode(res.value).replaceAll('\r\n', '\n');
  18. for (let i = 0, len = chunkStr.length; i < len; i++) {
  19. const char = chunkStr[i];
  20. if (char === '\n') {
  21. yield buf;
  22. buf = '';
  23. } else {
  24. buf += char;
  25. }
  26. }
  27. }
  28. if (buf) {
  29. yield buf;
  30. }
  31. }
  32. const getReadableStream = (file: string | BunFile | FileHandle): ReadableStream => {
  33. if (typeof file === 'string') {
  34. return Bun.file(file).stream();
  35. }
  36. if ('writer' in file) {
  37. return file.stream();
  38. }
  39. return file.readableWebStream();
  40. };
  41. // TODO: use FileHandle.readLine()
  42. export const readFileByLine: ((file: string | BunFile | FileHandle) => AsyncIterable<string>) = enableTextLineStream
  43. ? (file: string | BunFile | FileHandle) => getReadableStream(file).pipeThrough(new PolyfillTextDecoderStream()).pipeThrough(new TextLineStream())
  44. : (file: string | BunFile | FileHandle) => createTextLineAsyncIterableFromStreamSource(getReadableStream(file));
  45. const ensureResponseBody = (resp: Response) => {
  46. if (!resp.body) {
  47. throw new Error('Failed to fetch remote text');
  48. }
  49. if (resp.bodyUsed) {
  50. throw new Error('Body has already been consumed.');
  51. }
  52. return resp.body;
  53. };
  54. export const createReadlineInterfaceFromResponse: ((resp: Response) => AsyncIterable<string>) = enableTextLineStream
  55. ? (resp) => ensureResponseBody(resp).pipeThrough(new PolyfillTextDecoderStream()).pipeThrough(new TextLineStream())
  56. : (resp) => createTextLineAsyncIterableFromStreamSource(ensureResponseBody(resp));
  57. export function fetchRemoteTextByLine(url: string | URL) {
  58. return fetchWithRetry(url, defaultRequestInit).then(createReadlineInterfaceFromResponse);
  59. }
  60. export async function readFileIntoProcessedArray(file: string | URL | BunFile) {
  61. if (typeof file === 'string') {
  62. file = Bun.file(file);
  63. } else if (!('writer' in file)) {
  64. file = Bun.file(file);
  65. }
  66. const content = await file.text();
  67. return content.split('\n').filter(processLine);
  68. }