From 006e6d0e442df9c4932ca83b4c66a7719adf0fbe Mon Sep 17 00:00:00 2001 From: Minor Gordon Date: Wed, 31 Jan 2024 16:48:14 -0500 Subject: [PATCH] handle 403 on lookup file --- __tests__/SchemaDotOrgDataSet.test.ts | 12 ++++++++++ package.json | 2 +- src/SchemaDotOrgDataSet.ts | 34 ++++++++++++++++----------- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/__tests__/SchemaDotOrgDataSet.test.ts b/__tests__/SchemaDotOrgDataSet.test.ts index be3f47c..52afa39 100644 --- a/__tests__/SchemaDotOrgDataSet.test.ts +++ b/__tests__/SchemaDotOrgDataSet.test.ts @@ -137,4 +137,16 @@ describe("SchemaDotOrgDataSet", () => { await administrativeAreaClassSubset.samplePagesByIri(); expect(Object.keys(samplePagesByIri)).toHaveLength(1); }); + + it("handles 4xx on a class lookup file", async () => { + if (process.env.CI) { + return; + } + + const paintingClassSubset = classSubsets.find( + (classSubset) => classSubset.className === "Painting" + ); + expect(paintingClassSubset).not.toBeUndefined(); + expect(await paintingClassSubset?.payLevelDomainSubsets()).toHaveLength(0); + }); }); diff --git a/package.json b/package.json index 574c0e3..8ba55e8 100644 --- a/package.json +++ b/package.json @@ -63,5 +63,5 @@ }, "type": "module", "types": "dist/index.d.ts", - "version": "3.0.4" + "version": "3.0.5" } diff --git a/src/SchemaDotOrgDataSet.ts b/src/SchemaDotOrgDataSet.ts index 453ca48..d3ad7b5 100644 --- a/src/SchemaDotOrgDataSet.ts +++ b/src/SchemaDotOrgDataSet.ts @@ -267,12 +267,6 @@ namespace SchemaDotOrgDataSet { this.showProgress = showProgress; } - private async lookupCsvString(): Promise { - return ( - await streamToBuffer(await this.httpClient.get(this.lookupFileUrl)) - ).toString("utf8"); - } - async *dataset() { for (let fileI = 0; fileI < this.numberOfFiles; fileI++) { for await (const quad of parseNQuadsStream( @@ -308,14 +302,26 @@ namespace SchemaDotOrgDataSet { return {}; } - const pldDataFileNames = Papa.parse(await this.lookupCsvString(), { - header: true, - }).data.reduce((map: Record, row: any) => { - if (row["pld"].length > 0) { - map[row["pld"]] = row["file_lookup"]; - } - return map; - }, {}); + let pldDataFileNames: Record; + try { + pldDataFileNames = Papa.parse( + ( + await streamToBuffer(await this.httpClient.get(this.lookupFileUrl)) + ).toString("utf8"), + { + header: true, + } + ).data.reduce((map: Record, row: any) => { + if (row["pld"].length > 0) { + map[row["pld"]] = row["file_lookup"]; + } + return map; + }, {}); + } catch (e) { + // The 2022-12 Painting lookup returns 403 + logger.error("error getting and parsing %s: %s", this.lookupFileUrl, e); + return {}; + } return ( Papa.parse(