emoji.cy.ts 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. import { Labels, LabelStudio } from "@humansignal/frontend-test/helpers/LSF";
  2. import { RichText } from "@humansignal/frontend-test/helpers/LSF/RichText";
  3. import {
  4. multilineTextData,
  5. simpleHyperTextConfig,
  6. simpleHyperTextData,
  7. simpleTextConfig,
  8. simpleTextData,
  9. } from "../../data/ner/emoji";
  10. describe("NER - Emoji - Text", () => {
  11. const refTextResultValue = {
  12. start: 21,
  13. end: 25,
  14. text: "test",
  15. };
  16. it("Should calculate offsets by code points in text", () => {
  17. LabelStudio.params().config(simpleTextConfig).data(simpleTextData).withResult([]).init();
  18. LabelStudio.waitForObjectsReady();
  19. Labels.select("region");
  20. RichText.selectText("test");
  21. RichText.hasRegionWithText("test");
  22. LabelStudio.serialize().then((results) => {
  23. const resultValue = results[0].value;
  24. expect(resultValue.start).to.eq(refTextResultValue.start);
  25. expect(resultValue.end).to.eq(refTextResultValue.end);
  26. expect(resultValue.text).to.eq(refTextResultValue.text);
  27. LabelStudio.params().config(simpleTextConfig).data(simpleTextData).withResult(results).init();
  28. LabelStudio.waitForObjectsReady();
  29. RichText.hasRegionWithText("test");
  30. LabelStudio.serialize().then((results) => {
  31. const resultValue = results[0].value;
  32. expect(resultValue.start).to.eq(refTextResultValue.start);
  33. expect(resultValue.end).to.eq(refTextResultValue.end);
  34. expect(resultValue.text).to.eq(refTextResultValue.text);
  35. });
  36. });
  37. });
  38. const refMultilineTextResultValue = {
  39. start: 2,
  40. end: 27,
  41. text: "Warning:\\n🐱 This is a test",
  42. };
  43. it("Should calculate offsets by code points in multiline text", () => {
  44. LabelStudio.params().config(simpleTextConfig).data(multilineTextData).withResult([]).init();
  45. LabelStudio.waitForObjectsReady();
  46. Labels.select("region");
  47. RichText.selectBetweenTexts("Warning", "test");
  48. RichText.hasRegionWithText("Warning:");
  49. RichText.hasRegionWithText("🐱 This is a test");
  50. LabelStudio.serialize().then((results) => {
  51. const resultValue = results[0].value;
  52. expect(resultValue.start).to.eq(refMultilineTextResultValue.start);
  53. expect(resultValue.end).to.eq(refMultilineTextResultValue.end);
  54. expect(resultValue.text).to.eq(refMultilineTextResultValue.text);
  55. LabelStudio.params().config(simpleTextConfig).data(multilineTextData).withResult(results).init();
  56. LabelStudio.waitForObjectsReady();
  57. RichText.hasRegionWithText("Warning:");
  58. RichText.hasRegionWithText("🐱 This is a test");
  59. LabelStudio.serialize().then((results) => {
  60. const resultValue = results[0].value;
  61. expect(resultValue.start).to.eq(refMultilineTextResultValue.start);
  62. expect(resultValue.end).to.eq(refMultilineTextResultValue.end);
  63. expect(resultValue.text).to.eq(refMultilineTextResultValue.text);
  64. });
  65. });
  66. });
  67. const refHyperTextResultValue = {
  68. start: "/article[1]/p[1]/text()[1]",
  69. end: "/article[1]/p[1]/text()[1]",
  70. text: "test",
  71. globalOffsets: {
  72. start: 23,
  73. end: 27,
  74. },
  75. startOffset: 13,
  76. endOffset: 17,
  77. };
  78. it("Should calculate global offsets by code points and relative offsets by string length in hypertext", () => {
  79. LabelStudio.params().config(simpleHyperTextConfig).data(simpleHyperTextData).withResult([]).init();
  80. LabelStudio.waitForObjectsReady();
  81. Labels.select("region");
  82. RichText.selectText("test");
  83. RichText.hasRegionWithText("test");
  84. LabelStudio.serialize().then((results) => {
  85. const resultValue = results[0].value;
  86. expect(resultValue.start).to.eq(refHyperTextResultValue.start);
  87. expect(resultValue.end).to.eq(refHyperTextResultValue.end);
  88. expect(resultValue.globalOffsets.start).to.eq(refHyperTextResultValue.globalOffsets.start);
  89. expect(resultValue.globalOffsets.end).to.eq(refHyperTextResultValue.globalOffsets.end);
  90. expect(resultValue.startOffset).to.eq(refHyperTextResultValue.startOffset);
  91. expect(resultValue.endOffset).to.eq(refHyperTextResultValue.endOffset);
  92. expect(resultValue.text).to.eq(refHyperTextResultValue.text);
  93. LabelStudio.params().config(simpleHyperTextConfig).data(simpleHyperTextData).withResult(results).init();
  94. LabelStudio.waitForObjectsReady();
  95. RichText.hasRegionWithText("test");
  96. LabelStudio.serialize().then((results) => {
  97. const resultValue = results[0].value;
  98. expect(resultValue.start).to.eq(refHyperTextResultValue.start);
  99. expect(resultValue.end).to.eq(refHyperTextResultValue.end);
  100. expect(resultValue.globalOffsets.start).to.eq(refHyperTextResultValue.globalOffsets.start);
  101. expect(resultValue.globalOffsets.end).to.eq(refHyperTextResultValue.globalOffsets.end);
  102. expect(resultValue.startOffset).to.eq(refHyperTextResultValue.startOffset);
  103. expect(resultValue.endOffset).to.eq(refHyperTextResultValue.endOffset);
  104. expect(resultValue.text).to.eq(refHyperTextResultValue.text);
  105. });
  106. });
  107. });
  108. const refHyperTextMultilineResultValue = {
  109. start: "/article[1]/h2[1]/text()[1]",
  110. end: "/article[1]/p[1]/text()[1]",
  111. text: "Warning:\\n🐱 This is a test",
  112. globalOffsets: {
  113. // this is offset in codepoints ("🐱" + " " = 2 codepoints)
  114. start: 2,
  115. end: 27,
  116. },
  117. // this is offset in in-browser characters ("🐱" is 2 characters + " " = 3)
  118. startOffset: 3,
  119. endOffset: 17,
  120. };
  121. it("Should calculate global offsets by code points and relative offsets by string length in multiline hypertext", () => {
  122. LabelStudio.params().config(simpleHyperTextConfig).data(simpleHyperTextData).withResult([]).init();
  123. LabelStudio.waitForObjectsReady();
  124. Labels.select("region");
  125. RichText.selectBetweenTexts("Warning", "test");
  126. RichText.hasRegionWithText("Warning:");
  127. RichText.hasRegionWithText("🐱 This is a test");
  128. LabelStudio.serialize().then((results) => {
  129. const resultValue = results[0].value;
  130. expect(resultValue.start).to.eq(refHyperTextMultilineResultValue.start);
  131. expect(resultValue.end).to.eq(refHyperTextMultilineResultValue.end);
  132. expect(resultValue.globalOffsets.start).to.eq(refHyperTextMultilineResultValue.globalOffsets.start);
  133. expect(resultValue.globalOffsets.end).to.eq(refHyperTextMultilineResultValue.globalOffsets.end);
  134. expect(resultValue.startOffset).to.eq(refHyperTextMultilineResultValue.startOffset);
  135. expect(resultValue.endOffset).to.eq(refHyperTextMultilineResultValue.endOffset);
  136. expect(resultValue.text).to.eq(refHyperTextMultilineResultValue.text);
  137. LabelStudio.params().config(simpleHyperTextConfig).data(simpleHyperTextData).withResult(results).init();
  138. LabelStudio.waitForObjectsReady();
  139. RichText.hasRegionWithText("Warning:");
  140. RichText.hasRegionWithText("🐱 This is a test");
  141. LabelStudio.serialize().then((results) => {
  142. const resultValue = results[0].value;
  143. expect(resultValue.start).to.eq(refHyperTextMultilineResultValue.start);
  144. expect(resultValue.end).to.eq(refHyperTextMultilineResultValue.end);
  145. expect(resultValue.globalOffsets.start).to.eq(refHyperTextMultilineResultValue.globalOffsets.start);
  146. expect(resultValue.globalOffsets.end).to.eq(refHyperTextMultilineResultValue.globalOffsets.end);
  147. expect(resultValue.startOffset).to.eq(refHyperTextMultilineResultValue.startOffset);
  148. expect(resultValue.endOffset).to.eq(refHyperTextMultilineResultValue.endOffset);
  149. expect(resultValue.text).to.eq(refHyperTextMultilineResultValue.text);
  150. });
  151. });
  152. });
  153. it("Heuristic edge case", () => {
  154. LabelStudio.params().config(simpleHyperTextConfig).data({ text: "<p>🐱\nmeans cat</p>" }).withResult([]).init();
  155. LabelStudio.waitForObjectsReady();
  156. Labels.select("region");
  157. RichText.selectText("means");
  158. RichText.hasRegionWithText("means");
  159. LabelStudio.serialize().then((results) => {
  160. const resultValue = results[0].value;
  161. expect(resultValue.start).to.eq("/p[1]/text()[1]");
  162. expect(resultValue.end).to.eq("/p[1]/text()[1]");
  163. expect(resultValue.globalOffsets.start).to.eq(2);
  164. expect(resultValue.globalOffsets.end).to.eq(7);
  165. expect(resultValue.startOffset).to.eq(3);
  166. expect(resultValue.endOffset).to.eq(8);
  167. expect(resultValue.text).to.eq("means");
  168. });
  169. });
  170. });