| 1234567891011121314151617181920212223242526 |
- import zipfile, xml.etree.ElementTree as ET
- docx_path = r'D:\我的文件\JeecgBoot-main\doc\湛江市人力资源和社会保障局智慧人社运营运维(2025-2027年)项目需求规格说明书-就业一湛通服务平台.docx'
- output_path = r'D:\我的文件\JeecgBoot-main\.docs\_proto_docx.txt'
- try:
- with zipfile.ZipFile(docx_path) as z:
- xml_content = z.read('word/document.xml')
- root = ET.fromstring(xml_content)
- NS = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
- all_text = []
- for para in root.iter(NS + 'p'):
- texts = []
- for t in para.iter(NS + 't'):
- if t.text:
- texts.append(t.text)
- if texts:
- all_text.append(''.join(texts))
- with open(output_path, 'w', encoding='utf-8') as f:
- f.write('\n'.join(all_text))
- # Find 方案设计 content
- for i, p in enumerate(all_text):
- if '方案设计' in p or '页面原型' in p or '原型设计' in p:
- for j in range(max(0,i-2), min(len(all_text),i+15)):
- print(f'{j}: [{all_text[j][:200]}]')
- print('---')
- except Exception as e:
- print(f'Error: {e}')
|