summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Rohan Godha <dev@rohangodha.com> 2024-09-29 14:41:15 -0400
committerGravatar GitHub <noreply@github.com> 2024-09-29 19:41:15 +0100
commit21b5e806c5df37c6b01da63487568a6ed351ba7d (patch)
treedf4c67413c56e5fa55119bfd18e8b96d5cd65180
parent0a1036eef62f13c9609362874c5b88434d1e9300 (diff)
downloadastro-21b5e806c5df37c6b01da63487568a6ed351ba7d.tar.gz
astro-21b5e806c5df37c6b01da63487568a6ed351ba7d.tar.zst
astro-21b5e806c5df37c6b01da63487568a6ed351ba7d.zip
feat: custom file formats in file content loader (#12047)
* add custom file format support * add tests * lint/format * changeset * nits * finish tests * add nested json test * requested changes * update changeset with @sarah11918 suggestions * typos/formatting * add map<id, data> yaml test * fix tests and rebase
-rw-r--r--.changeset/lovely-pianos-breathe.md68
-rw-r--r--packages/astro/src/content/loaders/file.ts61
-rw-r--r--packages/astro/test/content-layer.test.js70
-rw-r--r--packages/astro/test/fixtures/content-layer/package.json3
-rw-r--r--packages/astro/test/fixtures/content-layer/src/content/config.ts44
-rw-r--r--packages/astro/test/fixtures/content-layer/src/data/birds.json34
-rw-r--r--packages/astro/test/fixtures/content-layer/src/data/fish.yaml42
-rw-r--r--packages/astro/test/fixtures/content-layer/src/data/music.toml89
-rw-r--r--packages/astro/test/fixtures/content-layer/src/pages/collections.json.js18
-rw-r--r--pnpm-lock.yaml8
10 files changed, 408 insertions, 29 deletions
diff --git a/.changeset/lovely-pianos-breathe.md b/.changeset/lovely-pianos-breathe.md
new file mode 100644
index 000000000..d0d2df792
--- /dev/null
+++ b/.changeset/lovely-pianos-breathe.md
@@ -0,0 +1,68 @@
+---
+'astro': minor
+---
+
+Adds a new optional `parser` property to the built-in `file()` loader for content collections to support additional file types such as `toml` and `csv`.
+
+The `file()` loader now accepts a second argument that defines a `parser` function. This allows you to specify a custom parser (e.g. `toml.parse` or `csv-parse`) to create a collection from a file's contents. The `file()` loader will automatically detect and parse JSON and YAML files (based on their file extension) with no need for a `parser`.
+
+This works with any type of custom file formats including `csv` and `toml`. The following example defines a content collection `dogs` using a `.toml` file.
+```toml
+[[dogs]]
+id = "..."
+age = "..."
+
+[[dogs]]
+id = "..."
+age = "..."
+```
+After importing TOML's parser, you can load the `dogs` collection into your project by passing both a file path and `parser` to the `file()` loader.
+```typescript
+import { defineCollection } from "astro:content"
+import { file } from "astro/loaders"
+import { parse as parseToml } from "toml"
+
+const dogs = defineCollection({
+ loader: file("src/data/dogs.toml", { parser: (text) => parseToml(text).dogs }),
+ schema: /* ... */
+})
+
+// it also works with CSVs!
+import { parse as parseCsv } from "csv-parse/sync";
+
+const cats = defineCollection({
+ loader: file("src/data/cats.csv", { parser: (text) => parseCsv(text, { columns: true, skipEmptyLines: true })})
+});
+```
+
+The `parser` argument also allows you to load a single collection from a nested JSON document. For example, this JSON file contains multiple collections:
+```json
+{"dogs": [{}], "cats": [{}]}
+```
+
+You can seperate these collections by passing a custom `parser` to the `file()` loader like so:
+```typescript
+const dogs = defineCollection({
+ loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).dogs })
+});
+const cats = defineCollection({
+ loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).cats })
+});
+```
+
+And it continues to work with maps of `id` to `data`
+```yaml
+bubbles:
+ breed: "Goldfish"
+ age: 2
+finn:
+ breed: "Betta"
+ age: 1
+```
+
+```typescript
+const fish = defineCollection({
+ loader: file("src/data/fish.yaml"),
+ schema: z.object({ breed: z.string(), age: z.number() })
+});
+```
diff --git a/packages/astro/src/content/loaders/file.ts b/packages/astro/src/content/loaders/file.ts
index 22d498b12..d109f95b6 100644
--- a/packages/astro/src/content/loaders/file.ts
+++ b/packages/astro/src/content/loaders/file.ts
@@ -1,25 +1,56 @@
import { promises as fs, existsSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
+import yaml from 'js-yaml';
import { posixRelative } from '../utils.js';
import type { Loader, LoaderContext } from './types.js';
+export interface FileOptions {
+ /**
+ * the parsing function to use for this data
+ * @default JSON.parse or yaml.load, depending on the extension of the file
+ * */
+ parser?: (
+ text: string,
+ ) => Record<string, Record<string, unknown>> | Array<Record<string, unknown>>;
+}
+
/**
* Loads entries from a JSON file. The file must contain an array of objects that contain unique `id` fields, or an object with string keys.
- * @todo Add support for other file types, such as YAML, CSV etc.
* @param fileName The path to the JSON file to load, relative to the content directory.
+ * @param options Additional options for the file loader
*/
-export function file(fileName: string): Loader {
+export function file(fileName: string, options?: FileOptions): Loader {
if (fileName.includes('*')) {
// TODO: AstroError
throw new Error('Glob patterns are not supported in `file` loader. Use `glob` loader instead.');
}
+ let parse: ((text: string) => any) | null = null;
+
+ const ext = fileName.split('.').at(-1);
+ if (ext === 'json') {
+ parse = JSON.parse;
+ } else if (ext === 'yml' || ext === 'yaml') {
+ parse = (text) =>
+ yaml.load(text, {
+ filename: fileName,
+ });
+ }
+ if (options?.parser) parse = options.parser;
+
+ if (parse === null) {
+ // TODO: AstroError
+ throw new Error(
+ `No parser found for file '${fileName}'. Try passing a parser to the \`file\` loader.`,
+ );
+ }
+
async function syncData(filePath: string, { logger, parseData, store, config }: LoaderContext) {
- let json: Array<Record<string, unknown>>;
+ let data: Array<Record<string, unknown>> | Record<string, Record<string, unknown>>;
try {
- const data = await fs.readFile(filePath, 'utf-8');
- json = JSON.parse(data);
+ const contents = await fs.readFile(filePath, 'utf-8');
+ data = parse!(contents);
} catch (error: any) {
logger.error(`Error reading data from ${fileName}`);
logger.debug(error.message);
@@ -28,28 +59,28 @@ export function file(fileName: string): Loader {
const normalizedFilePath = posixRelative(fileURLToPath(config.root), filePath);
- if (Array.isArray(json)) {
- if (json.length === 0) {
+ if (Array.isArray(data)) {
+ if (data.length === 0) {
logger.warn(`No items found in ${fileName}`);
}
- logger.debug(`Found ${json.length} item array in ${fileName}`);
+ logger.debug(`Found ${data.length} item array in ${fileName}`);
store.clear();
- for (const rawItem of json) {
+ for (const rawItem of data) {
const id = (rawItem.id ?? rawItem.slug)?.toString();
if (!id) {
logger.error(`Item in ${fileName} is missing an id or slug field.`);
continue;
}
- const data = await parseData({ id, data: rawItem, filePath });
- store.set({ id, data, filePath: normalizedFilePath });
+ const parsedData = await parseData({ id, data: rawItem, filePath });
+ store.set({ id, data: parsedData, filePath: normalizedFilePath });
}
- } else if (typeof json === 'object') {
- const entries = Object.entries<Record<string, unknown>>(json);
+ } else if (typeof data === 'object') {
+ const entries = Object.entries<Record<string, unknown>>(data);
logger.debug(`Found object with ${entries.length} entries in ${fileName}`);
store.clear();
for (const [id, rawItem] of entries) {
- const data = await parseData({ id, data: rawItem, filePath });
- store.set({ id, data, filePath: normalizedFilePath });
+ const parsedData = await parseData({ id, data: rawItem, filePath });
+ store.set({ id, data: parsedData, filePath: normalizedFilePath });
}
} else {
logger.error(`Invalid data in ${fileName}. Must be an array or object.`);
diff --git a/packages/astro/test/content-layer.test.js b/packages/astro/test/content-layer.test.js
index 5be395386..abf91f363 100644
--- a/packages/astro/test/content-layer.test.js
+++ b/packages/astro/test/content-layer.test.js
@@ -53,11 +53,11 @@ describe('Content Layer', () => {
assert.equal(json.customLoader.length, 5);
});
- it('Returns `file()` loader collection', async () => {
- assert.ok(json.hasOwnProperty('fileLoader'));
- assert.ok(Array.isArray(json.fileLoader));
+ it('Returns json `file()` loader collection', async () => {
+ assert.ok(json.hasOwnProperty('jsonLoader'));
+ assert.ok(Array.isArray(json.jsonLoader));
- const ids = json.fileLoader.map((item) => item.data.id);
+ const ids = json.jsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'labrador-retriever',
'german-shepherd',
@@ -97,6 +97,58 @@ describe('Content Layer', () => {
);
});
+ it('Returns nested json `file()` loader collection', async () => {
+ assert.ok(json.hasOwnProperty('nestedJsonLoader'));
+ assert.ok(Array.isArray(json.nestedJsonLoader));
+
+ const ids = json.nestedJsonLoader.map((item) => item.data.id);
+ assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']);
+ });
+
+ it('Returns yaml `file()` loader collection', async () => {
+ assert.ok(json.hasOwnProperty('yamlLoader'));
+ assert.ok(Array.isArray(json.yamlLoader));
+
+ const ids = json.yamlLoader.map((item) => item.id);
+ assert.deepEqual(ids, [
+ 'bubbles',
+ 'finn',
+ 'shadow',
+ 'spark',
+ 'splash',
+ 'nemo',
+ 'angel-fish',
+ 'gold-stripe',
+ 'blue-tail',
+ 'bubble-buddy',
+ ]);
+ });
+
+ it('Returns toml `file()` loader collection', async () => {
+ assert.ok(json.hasOwnProperty('tomlLoader'));
+ assert.ok(Array.isArray(json.tomlLoader));
+
+ const ids = json.tomlLoader.map((item) => item.data.id);
+ assert.deepEqual(ids, [
+ 'crown',
+ 'nikes-on-my-feet',
+ 'stars',
+ 'never-let-me-down',
+ 'no-church-in-the-wild',
+ 'family-ties',
+ 'somebody',
+ 'honest',
+ ]);
+ });
+
+ it('Returns nested json `file()` loader collection', async () => {
+ assert.ok(json.hasOwnProperty('nestedJsonLoader'));
+ assert.ok(Array.isArray(json.nestedJsonLoader));
+
+ const ids = json.nestedJsonLoader.map((item) => item.data.id);
+ assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']);
+ });
+
it('Returns data entry by id', async () => {
assert.ok(json.hasOwnProperty('dataEntry'));
assert.equal(json.dataEntry.filePath?.split(sep).join(posixSep), 'src/data/dogs.json');
@@ -276,10 +328,10 @@ describe('Content Layer', () => {
});
it('Returns `file()` loader collection', async () => {
- assert.ok(json.hasOwnProperty('fileLoader'));
- assert.ok(Array.isArray(json.fileLoader));
+ assert.ok(json.hasOwnProperty('jsonLoader'));
+ assert.ok(Array.isArray(json.jsonLoader));
- const ids = json.fileLoader.map((item) => item.data.id);
+ const ids = json.jsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'labrador-retriever',
'german-shepherd',
@@ -348,7 +400,7 @@ describe('Content Layer', () => {
it('updates collection when data file is changed', async () => {
const rawJsonResponse = await fixture.fetch('/collections.json');
const initialJson = devalue.parse(await rawJsonResponse.text());
- assert.equal(initialJson.fileLoader[0].data.temperament.includes('Bouncy'), false);
+ assert.equal(initialJson.jsonLoader[0].data.temperament.includes('Bouncy'), false);
await fixture.editFile('/src/data/dogs.json', (prev) => {
const data = JSON.parse(prev);
@@ -359,7 +411,7 @@ describe('Content Layer', () => {
await fixture.onNextDataStoreChange();
const updatedJsonResponse = await fixture.fetch('/collections.json');
const updated = devalue.parse(await updatedJsonResponse.text());
- assert.ok(updated.fileLoader[0].data.temperament.includes('Bouncy'));
+ assert.ok(updated.jsonLoader[0].data.temperament.includes('Bouncy'));
await fixture.resetAllFiles();
});
});
diff --git a/packages/astro/test/fixtures/content-layer/package.json b/packages/astro/test/fixtures/content-layer/package.json
index fc73ce6f7..4057b1c35 100644
--- a/packages/astro/test/fixtures/content-layer/package.json
+++ b/packages/astro/test/fixtures/content-layer/package.json
@@ -4,6 +4,7 @@
"private": true,
"dependencies": {
"astro": "workspace:*",
- "@astrojs/mdx": "workspace:*"
+ "@astrojs/mdx": "workspace:*",
+ "toml": "^3.0.0"
}
}
diff --git a/packages/astro/test/fixtures/content-layer/src/content/config.ts b/packages/astro/test/fixtures/content-layer/src/content/config.ts
index 402bad7fc..776c44f68 100644
--- a/packages/astro/test/fixtures/content-layer/src/content/config.ts
+++ b/packages/astro/test/fixtures/content-layer/src/content/config.ts
@@ -1,6 +1,7 @@
import { defineCollection, z, reference } from 'astro:content';
import { file, glob } from 'astro/loaders';
import { loader } from '../loaders/post-loader.js';
+import { parse as parseToml } from 'toml';
const blog = defineCollection({
loader: loader({ url: 'https://jsonplaceholder.typicode.com/posts' }),
@@ -118,6 +119,27 @@ const cats = defineCollection({
}),
});
+const fish = defineCollection({
+ loader: file('src/data/fish.yaml'),
+ schema: z.object({
+ name: z.string(),
+ breed: z.string(),
+ age: z.number(),
+ }),
+});
+
+const birds = defineCollection({
+ loader: file('src/data/birds.json', {
+ parser: (text) => JSON.parse(text).birds,
+ }),
+ schema: z.object({
+ id: z.string(),
+ name: z.string(),
+ breed: z.string(),
+ age: z.number(),
+ }),
+});
+
// Absolute paths should also work
const absoluteRoot = new URL('../../content/space', import.meta.url);
@@ -198,14 +220,36 @@ const increment = defineCollection({
},
});
+const artists = defineCollection({
+ loader: file('src/data/music.toml', { parser: (text) => parseToml(text).artists }),
+ schema: z.object({
+ id: z.string(),
+ name: z.string(),
+ genre: z.string().array(),
+ }),
+});
+
+const songs = defineCollection({
+ loader: file('src/data/music.toml', { parser: (text) => parseToml(text).songs }),
+ schema: z.object({
+ id: z.string(),
+ name: z.string(),
+ artists: z.array(reference('artists')),
+ }),
+});
+
export const collections = {
blog,
dogs,
cats,
+ fish,
+ birds,
numbers,
spacecraft,
increment,
images,
+ artists,
+ songs,
probes,
rodents,
};
diff --git a/packages/astro/test/fixtures/content-layer/src/data/birds.json b/packages/astro/test/fixtures/content-layer/src/data/birds.json
new file mode 100644
index 000000000..3e7d83795
--- /dev/null
+++ b/packages/astro/test/fixtures/content-layer/src/data/birds.json
@@ -0,0 +1,34 @@
+{
+ "birds": [
+ {
+ "id": "bluejay",
+ "name": "Blue Jay",
+ "breed": "Cyanocitta cristata",
+ "age": 3
+ },
+ {
+ "id": "robin",
+ "name": "Robin",
+ "breed": "Turdus migratorius",
+ "age": 2
+ },
+ {
+ "id": "sparrow",
+ "name": "Sparrow",
+ "breed": "Passer domesticus",
+ "age": 1
+ },
+ {
+ "id": "cardinal",
+ "name": "Cardinal",
+ "breed": "Cardinalis cardinalis",
+ "age": 4
+ },
+ {
+ "id": "goldfinch",
+ "name": "Goldfinch",
+ "breed": "Spinus tristis",
+ "age": 2
+ }
+ ]
+}
diff --git a/packages/astro/test/fixtures/content-layer/src/data/fish.yaml b/packages/astro/test/fixtures/content-layer/src/data/fish.yaml
new file mode 100644
index 000000000..a9ac4e435
--- /dev/null
+++ b/packages/astro/test/fixtures/content-layer/src/data/fish.yaml
@@ -0,0 +1,42 @@
+# map of ids to data
+bubbles:
+ name: "Bubbles"
+ breed: "Goldfish"
+ age: 2
+finn:
+ name: "Finn"
+ breed: "Betta"
+ age: 1
+shadow:
+ name: "Shadow"
+ breed: "Catfish"
+ age: 3
+spark:
+ name: "Spark"
+ breed: "Tetra"
+ age: 1
+splash:
+ name: "Splash"
+ breed: "Guppy"
+ age: 2
+nemo:
+ name: "Nemo"
+ breed: "Clownfish"
+ age: 3
+angel-fish:
+ name: "Angel Fish"
+ breed: "Angelfish"
+ age: 4
+gold-stripe:
+ name: "Gold Stripe"
+ breed: "Molly"
+ age: 1
+blue-tail:
+ name: "Blue Tail"
+ breed: "Swordtail"
+ age: 2
+bubble-buddy:
+ name: "Bubble Buddy"
+ breed: "Betta"
+ age: 3
+
diff --git a/packages/astro/test/fixtures/content-layer/src/data/music.toml b/packages/astro/test/fixtures/content-layer/src/data/music.toml
new file mode 100644
index 000000000..89e15c9bb
--- /dev/null
+++ b/packages/astro/test/fixtures/content-layer/src/data/music.toml
@@ -0,0 +1,89 @@
+[[artists]]
+id = "kendrick-lamar"
+name = "Kendrick Lamar"
+genre = ["Hip-Hop", "Rap"]
+
+[[artists]]
+id = "mac-miller"
+name = "Mac Miller"
+genre = ["Hip-Hop", "Rap"]
+
+[[artists]]
+id = "jid"
+name = "JID"
+genre = ["Hip-Hop", "Rap"]
+
+[[artists]]
+id = "yasiin-bey"
+name = "Yasiin Bey"
+genre = ["Hip-Hop", "Rap"]
+
+[[artists]]
+id = "kanye-west"
+name = "Kanye West"
+genre = ["Hip-Hop", "Rap"]
+
+[[artists]]
+id = "jay-z"
+name = "JAY-Z"
+genre = ["Hip-Hop", "Rap"]
+
+[[artists]]
+id = "j-ivy"
+name = "J. Ivy"
+genre = ["Spoken Word", "Rap"]
+
+[[artists]]
+id = "frank-ocean"
+name = "Frank Ocean"
+genre = ["R&B", "Hip-Hop"]
+
+[[artists]]
+id = "the-dream"
+name = "The-Dream"
+genre = ["R&B", "Hip-Hop"]
+
+[[artists]]
+id = "baby-keem"
+name = "Baby Keem"
+genre = ["Hip-Hop", "Rap"]
+
+[[songs]]
+id = "crown"
+name = "Crown"
+artists = ["kendrick-lamar"]
+
+[[songs]]
+id = "nikes-on-my-feet"
+name = "Nikes on My Feet"
+artists = ["mac-miller"]
+
+[[songs]]
+id = "stars"
+name = "Stars"
+artists = ["jid", "yasiin-bey"]
+
+[[songs]]
+id = "never-let-me-down"
+name = "Never Let Me Down"
+artists = ["kanye-west", "jay-z", "j-ivy"]
+
+[[songs]]
+id = "no-church-in-the-wild"
+name = "No Church In The Wild"
+artists = ["jay-z", "kanye-west", "frank-ocean", "the-dream"]
+
+[[songs]]
+id = "family-ties"
+name = "family ties"
+artists = ["kendrick-lamar", "baby-keem"]
+
+[[songs]]
+id = "somebody"
+name = "Somebody"
+artists = ["jid"]
+
+[[songs]]
+id = "honest"
+name = "HONEST"
+artists = ["baby-keem"]
diff --git a/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js b/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js
index 761ff7dba..6bced27e4 100644
--- a/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js
+++ b/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js
@@ -5,7 +5,7 @@ export async function GET() {
const customLoader = await getCollection('blog', (entry) => {
return entry.data.id < 6;
});
- const fileLoader = await getCollection('dogs');
+ const jsonLoader = await getCollection('dogs');
const dataEntry = await getEntry('dogs', 'beagle');
@@ -23,10 +23,17 @@ export async function GET() {
const simpleLoaderObject = await getCollection('rodents')
const probes = await getCollection('probes');
+
+ const yamlLoader = await getCollection('fish');
+
+ const tomlLoader = await getCollection('songs');
+
+ const nestedJsonLoader = await getCollection('birds');
+
return new Response(
devalue.stringify({
customLoader,
- fileLoader,
+ jsonLoader,
dataEntry,
simpleLoader,
simpleLoaderObject,
@@ -35,7 +42,10 @@ export async function GET() {
referencedEntry,
increment,
images,
- probes
- })
+ probes,
+ yamlLoader,
+ tomlLoader,
+ nestedJsonLoader,
+ }),
);
}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 7a5356446..dffaf1f8e 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -2713,6 +2713,9 @@ importers:
astro:
specifier: workspace:*
version: link:../../..
+ toml:
+ specifier: ^3.0.0
+ version: 3.0.0
packages/astro/test/fixtures/content-layer-markdoc:
dependencies:
@@ -10327,6 +10330,9 @@ packages:
resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
engines: {node: '>=0.6'}
+ toml@3.0.0:
+ resolution: {integrity: sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w==}
+
totalist@3.0.1:
resolution: {integrity: sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==}
engines: {node: '>=6'}
@@ -16512,6 +16518,8 @@ snapshots:
toidentifier@1.0.1: {}
+ toml@3.0.0: {}
+
totalist@3.0.1: {}
tough-cookie@4.1.3: