ner.test.js 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. const { serialize } = require("./helpers");
  2. const assert = require("assert");
  3. Feature("NER");
  4. const configSimple = `
  5. <View>
  6. <HyperTextLabels name="ner" toName="text">
  7. <Label value="Term"></Label>
  8. <Label value="Abbr"></Label>
  9. </HyperTextLabels>
  10. <HyperText name="text" value="$text"></HyperText>
  11. </View>
  12. `;
  13. // codecept can click only in the middle of element
  14. // block elements have 100% width, so middle point can be out of symbols depending on screen size
  15. // so to click at exact point we use special spans with data-testid as reference
  16. // if we `click()` at two-symbol span it happens between symbols
  17. const text = `<div>
  18. <h2><span data-testid="r1-start">.N</span>amed-<span data-testid="r1-mid">entity</span> recognitio<span data-testid="r1-end">n.</span></h2>
  19. <p><b>Named-entity recognition</b> (<b>NER</b>) (also known as <b>entity identification</b>, <b>entity chunking</b> and <b>entity extraction</b>) is a subtask of <a href="/wiki/Information_extraction" title="Information extraction">information extraction</a> that seeks to locate and classify <a href="/wiki/Named_entity" title="Named entity">named entities</a> mentioned in <a href="/wiki/Unstructured_data" title="Unstructured data">unstructured text</a> into pre-defined categories such as person names, organizations, locations, <a href="/wiki/Medical_classification" title="Medical classification">medical codes</a>, time expressions, quantities, monetary values, percentages, etc.</p>
  20. <p>Most research on NER systems has been structured as taking an unannotated block of text, such as this one:</p>
  21. <blockquote class="templatequote"><p>Jim bought 300 shares of Acme Corp. in 2006.</p></blockquote>
  22. <p>And producing an annotated block of text that highlights the names of entities:</p>
  23. <blockquote class="templatequote"><p>[Jim]<sub>Person</sub> bought 300 shares of [Acme Corp.]<sub>Organization</sub> in [2006]<sub>Time</sub>.</p></blockquote>
  24. </div>`;
  25. const results = [
  26. {
  27. start: "/div[1]/h2[1]/span[1]/text()[1]",
  28. startOffset: 1,
  29. end: "/div[1]/h2[1]/span[3]/text()[1]",
  30. endOffset: 1,
  31. hypertextlabels: ["Term"],
  32. text: "Named-entity recognition",
  33. globalOffsets: {
  34. end: 28,
  35. start: 4,
  36. },
  37. },
  38. {
  39. start: "/div[1]/p[1]/b[2]/text()[1]",
  40. startOffset: 0,
  41. end: "/div[1]/p[1]/b[2]/text()[1]",
  42. endOffset: 3,
  43. hypertextlabels: ["Abbr"],
  44. text: "NER",
  45. globalOffsets: {
  46. end: 61,
  47. start: 58,
  48. },
  49. },
  50. ];
  51. Scenario("NER labeling for HyperText", async ({ I, LabelStudio }) => {
  52. const params = {
  53. config: configSimple,
  54. data: { text },
  55. };
  56. I.amOnPage("/");
  57. LabelStudio.init(params);
  58. // create regions inside iframe
  59. I.switchTo("iframe");
  60. I.pressKey("1");
  61. I.click("[data-testid=r1-start]");
  62. I.pressKeyDown("Shift");
  63. I.click("[data-testid=r1-end]");
  64. I.pressKeyUp("Shift");
  65. I.pressKey("2");
  66. I.doubleClick("b:nth-child(2)");
  67. I.click("[data-testid=r1-mid]");
  68. I.pressKey(["alt", "r"]);
  69. I.click("b:nth-child(2)");
  70. I.switchTo();
  71. I.see("Relations (1)");
  72. const result = await I.executeScript(serialize);
  73. assert.equal(result.length, 3);
  74. assert.deepEqual(result[0].value, results[0]);
  75. assert.deepEqual(result[1].value, results[1]);
  76. assert.equal(result[2].type, "relation");
  77. assert.equal(result[2].from_id, result[0].id);
  78. assert.equal(result[2].to_id, result[1].id);
  79. });