| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- const { serialize } = require("./helpers");
- const assert = require("assert");
- Feature("NER");
- const configSimple = `
- <View>
- <HyperTextLabels name="ner" toName="text">
- <Label value="Term"></Label>
- <Label value="Abbr"></Label>
- </HyperTextLabels>
- <HyperText name="text" value="$text"></HyperText>
- </View>
- `;
- // codecept can click only in the middle of element
- // block elements have 100% width, so middle point can be out of symbols depending on screen size
- // so to click at exact point we use special spans with data-testid as reference
- // if we `click()` at two-symbol span it happens between symbols
- const text = `<div>
- <h2><span data-testid="r1-start">.N</span>amed-<span data-testid="r1-mid">entity</span> recognitio<span data-testid="r1-end">n.</span></h2>
- <p><b>Named-entity recognition</b> (<b>NER</b>) (also known as <b>entity identification</b>, <b>entity chunking</b> and <b>entity extraction</b>) is a subtask of <a href="/wiki/Information_extraction" title="Information extraction">information extraction</a> that seeks to locate and classify <a href="/wiki/Named_entity" title="Named entity">named entities</a> mentioned in <a href="/wiki/Unstructured_data" title="Unstructured data">unstructured text</a> into pre-defined categories such as person names, organizations, locations, <a href="/wiki/Medical_classification" title="Medical classification">medical codes</a>, time expressions, quantities, monetary values, percentages, etc.</p>
- <p>Most research on NER systems has been structured as taking an unannotated block of text, such as this one:</p>
- <blockquote class="templatequote"><p>Jim bought 300 shares of Acme Corp. in 2006.</p></blockquote>
- <p>And producing an annotated block of text that highlights the names of entities:</p>
- <blockquote class="templatequote"><p>[Jim]<sub>Person</sub> bought 300 shares of [Acme Corp.]<sub>Organization</sub> in [2006]<sub>Time</sub>.</p></blockquote>
- </div>`;
- const results = [
- {
- start: "/div[1]/h2[1]/span[1]/text()[1]",
- startOffset: 1,
- end: "/div[1]/h2[1]/span[3]/text()[1]",
- endOffset: 1,
- hypertextlabels: ["Term"],
- text: "Named-entity recognition",
- globalOffsets: {
- end: 28,
- start: 4,
- },
- },
- {
- start: "/div[1]/p[1]/b[2]/text()[1]",
- startOffset: 0,
- end: "/div[1]/p[1]/b[2]/text()[1]",
- endOffset: 3,
- hypertextlabels: ["Abbr"],
- text: "NER",
- globalOffsets: {
- end: 61,
- start: 58,
- },
- },
- ];
- Scenario("NER labeling for HyperText", async ({ I, LabelStudio }) => {
- const params = {
- config: configSimple,
- data: { text },
- };
- I.amOnPage("/");
- LabelStudio.init(params);
- // create regions inside iframe
- I.switchTo("iframe");
- I.pressKey("1");
- I.click("[data-testid=r1-start]");
- I.pressKeyDown("Shift");
- I.click("[data-testid=r1-end]");
- I.pressKeyUp("Shift");
- I.pressKey("2");
- I.doubleClick("b:nth-child(2)");
- I.click("[data-testid=r1-mid]");
- I.pressKey(["alt", "r"]);
- I.click("b:nth-child(2)");
- I.switchTo();
- I.see("Relations (1)");
- const result = await I.executeScript(serialize);
- assert.equal(result.length, 3);
- assert.deepEqual(result[0].value, results[0]);
- assert.deepEqual(result[1].value, results[1]);
- assert.equal(result[2].type, "relation");
- assert.equal(result[2].from_id, result[0].id);
- assert.equal(result[2].to_id, result[1].id);
- });
|