base_parse_qa_handle.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. # coding=utf-8
  2. """
  3. @project: maxkb
  4. @Author:虎
  5. @file: base_parse_qa_handle.py
  6. @date:2024/5/21 14:56
  7. @desc:
  8. """
  9. from abc import ABC, abstractmethod
  10. def get_row_value(row, title_row_index_dict, field):
  11. index = title_row_index_dict.get(field)
  12. if index is None:
  13. return None
  14. if (len(row) - 1) >= index:
  15. return row[index]
  16. return None
  17. def get_title_row_index_dict(title_row_list):
  18. title_row_index_dict = {}
  19. if len(title_row_list) == 1:
  20. title_row_index_dict['content'] = 0
  21. elif len(title_row_list) == 1:
  22. title_row_index_dict['title'] = 0
  23. title_row_index_dict['content'] = 1
  24. else:
  25. title_row_index_dict['title'] = 0
  26. title_row_index_dict['content'] = 1
  27. title_row_index_dict['problem_list'] = 2
  28. for index in range(len(title_row_list)):
  29. title_row = title_row_list[index]
  30. if title_row is None:
  31. title_row = ''
  32. if title_row.startswith('分段标题'):
  33. title_row_index_dict['title'] = index
  34. if title_row.startswith('分段内容'):
  35. title_row_index_dict['content'] = index
  36. if title_row.startswith('问题'):
  37. title_row_index_dict['problem_list'] = index
  38. return title_row_index_dict
  39. class BaseParseQAHandle(ABC):
  40. @abstractmethod
  41. def support(self, file, get_buffer):
  42. pass
  43. @abstractmethod
  44. def handle(self, file, get_buffer, save_image):
  45. pass