import { Labels, LabelStudio } from "@humansignal/frontend-test/helpers/LSF"; import { RichText } from "@humansignal/frontend-test/helpers/LSF/RichText"; import { multilineTextData, simpleHyperTextConfig, simpleHyperTextData, simpleTextConfig, simpleTextData, } from "../../data/ner/emoji"; describe("NER - Emoji - Text", () => { const refTextResultValue = { start: 21, end: 25, text: "test", }; it("Should calculate offsets by code points in text", () => { LabelStudio.params().config(simpleTextConfig).data(simpleTextData).withResult([]).init(); LabelStudio.waitForObjectsReady(); Labels.select("region"); RichText.selectText("test"); RichText.hasRegionWithText("test"); LabelStudio.serialize().then((results) => { const resultValue = results[0].value; expect(resultValue.start).to.eq(refTextResultValue.start); expect(resultValue.end).to.eq(refTextResultValue.end); expect(resultValue.text).to.eq(refTextResultValue.text); LabelStudio.params().config(simpleTextConfig).data(simpleTextData).withResult(results).init(); LabelStudio.waitForObjectsReady(); RichText.hasRegionWithText("test"); LabelStudio.serialize().then((results) => { const resultValue = results[0].value; expect(resultValue.start).to.eq(refTextResultValue.start); expect(resultValue.end).to.eq(refTextResultValue.end); expect(resultValue.text).to.eq(refTextResultValue.text); }); }); }); const refMultilineTextResultValue = { start: 2, end: 27, text: "Warning:\\nš± This is a test", }; it("Should calculate offsets by code points in multiline text", () => { LabelStudio.params().config(simpleTextConfig).data(multilineTextData).withResult([]).init(); LabelStudio.waitForObjectsReady(); Labels.select("region"); RichText.selectBetweenTexts("Warning", "test"); RichText.hasRegionWithText("Warning:"); RichText.hasRegionWithText("š± This is a test"); LabelStudio.serialize().then((results) => { const resultValue = results[0].value; expect(resultValue.start).to.eq(refMultilineTextResultValue.start); expect(resultValue.end).to.eq(refMultilineTextResultValue.end); expect(resultValue.text).to.eq(refMultilineTextResultValue.text); LabelStudio.params().config(simpleTextConfig).data(multilineTextData).withResult(results).init(); LabelStudio.waitForObjectsReady(); RichText.hasRegionWithText("Warning:"); RichText.hasRegionWithText("š± This is a test"); LabelStudio.serialize().then((results) => { const resultValue = results[0].value; expect(resultValue.start).to.eq(refMultilineTextResultValue.start); expect(resultValue.end).to.eq(refMultilineTextResultValue.end); expect(resultValue.text).to.eq(refMultilineTextResultValue.text); }); }); }); const refHyperTextResultValue = { start: "/article[1]/p[1]/text()[1]", end: "/article[1]/p[1]/text()[1]", text: "test", globalOffsets: { start: 23, end: 27, }, startOffset: 13, endOffset: 17, }; it("Should calculate global offsets by code points and relative offsets by string length in hypertext", () => { LabelStudio.params().config(simpleHyperTextConfig).data(simpleHyperTextData).withResult([]).init(); LabelStudio.waitForObjectsReady(); Labels.select("region"); RichText.selectText("test"); RichText.hasRegionWithText("test"); LabelStudio.serialize().then((results) => { const resultValue = results[0].value; expect(resultValue.start).to.eq(refHyperTextResultValue.start); expect(resultValue.end).to.eq(refHyperTextResultValue.end); expect(resultValue.globalOffsets.start).to.eq(refHyperTextResultValue.globalOffsets.start); expect(resultValue.globalOffsets.end).to.eq(refHyperTextResultValue.globalOffsets.end); expect(resultValue.startOffset).to.eq(refHyperTextResultValue.startOffset); expect(resultValue.endOffset).to.eq(refHyperTextResultValue.endOffset); expect(resultValue.text).to.eq(refHyperTextResultValue.text); LabelStudio.params().config(simpleHyperTextConfig).data(simpleHyperTextData).withResult(results).init(); LabelStudio.waitForObjectsReady(); RichText.hasRegionWithText("test"); LabelStudio.serialize().then((results) => { const resultValue = results[0].value; expect(resultValue.start).to.eq(refHyperTextResultValue.start); expect(resultValue.end).to.eq(refHyperTextResultValue.end); expect(resultValue.globalOffsets.start).to.eq(refHyperTextResultValue.globalOffsets.start); expect(resultValue.globalOffsets.end).to.eq(refHyperTextResultValue.globalOffsets.end); expect(resultValue.startOffset).to.eq(refHyperTextResultValue.startOffset); expect(resultValue.endOffset).to.eq(refHyperTextResultValue.endOffset); expect(resultValue.text).to.eq(refHyperTextResultValue.text); }); }); }); const refHyperTextMultilineResultValue = { start: "/article[1]/h2[1]/text()[1]", end: "/article[1]/p[1]/text()[1]", text: "Warning:\\nš± This is a test", globalOffsets: { // this is offset in codepoints ("š±" + " " = 2 codepoints) start: 2, end: 27, }, // this is offset in in-browser characters ("š±" is 2 characters + " " = 3) startOffset: 3, endOffset: 17, }; it("Should calculate global offsets by code points and relative offsets by string length in multiline hypertext", () => { LabelStudio.params().config(simpleHyperTextConfig).data(simpleHyperTextData).withResult([]).init(); LabelStudio.waitForObjectsReady(); Labels.select("region"); RichText.selectBetweenTexts("Warning", "test"); RichText.hasRegionWithText("Warning:"); RichText.hasRegionWithText("š± This is a test"); LabelStudio.serialize().then((results) => { const resultValue = results[0].value; expect(resultValue.start).to.eq(refHyperTextMultilineResultValue.start); expect(resultValue.end).to.eq(refHyperTextMultilineResultValue.end); expect(resultValue.globalOffsets.start).to.eq(refHyperTextMultilineResultValue.globalOffsets.start); expect(resultValue.globalOffsets.end).to.eq(refHyperTextMultilineResultValue.globalOffsets.end); expect(resultValue.startOffset).to.eq(refHyperTextMultilineResultValue.startOffset); expect(resultValue.endOffset).to.eq(refHyperTextMultilineResultValue.endOffset); expect(resultValue.text).to.eq(refHyperTextMultilineResultValue.text); LabelStudio.params().config(simpleHyperTextConfig).data(simpleHyperTextData).withResult(results).init(); LabelStudio.waitForObjectsReady(); RichText.hasRegionWithText("Warning:"); RichText.hasRegionWithText("š± This is a test"); LabelStudio.serialize().then((results) => { const resultValue = results[0].value; expect(resultValue.start).to.eq(refHyperTextMultilineResultValue.start); expect(resultValue.end).to.eq(refHyperTextMultilineResultValue.end); expect(resultValue.globalOffsets.start).to.eq(refHyperTextMultilineResultValue.globalOffsets.start); expect(resultValue.globalOffsets.end).to.eq(refHyperTextMultilineResultValue.globalOffsets.end); expect(resultValue.startOffset).to.eq(refHyperTextMultilineResultValue.startOffset); expect(resultValue.endOffset).to.eq(refHyperTextMultilineResultValue.endOffset); expect(resultValue.text).to.eq(refHyperTextMultilineResultValue.text); }); }); }); it("Heuristic edge case", () => { LabelStudio.params().config(simpleHyperTextConfig).data({ text: "
š±\nmeans cat
" }).withResult([]).init(); LabelStudio.waitForObjectsReady(); Labels.select("region"); RichText.selectText("means"); RichText.hasRegionWithText("means"); LabelStudio.serialize().then((results) => { const resultValue = results[0].value; expect(resultValue.start).to.eq("/p[1]/text()[1]"); expect(resultValue.end).to.eq("/p[1]/text()[1]"); expect(resultValue.globalOffsets.start).to.eq(2); expect(resultValue.globalOffsets.end).to.eq(7); expect(resultValue.startOffset).to.eq(3); expect(resultValue.endOffset).to.eq(8); expect(resultValue.text).to.eq("means"); }); }); });