zjrs
/
zjrs-jeecgBoot


			
				
					
						
						
							1234567891011121314151617181920212223242526
							import zipfile, xml.etree.ElementTree as ET
docx_path = r'D:\我的文件\JeecgBoot-main\doc\湛江市人力资源和社会保障局智慧人社运营运维（2025-2027年）项目需求规格说明书-就业一湛通服务平台.docx'
output_path = r'D:\我的文件\JeecgBoot-main\.docs\_proto_docx.txt'
try:
    with zipfile.ZipFile(docx_path) as z:
        xml_content = z.read('word/document.xml')
    root = ET.fromstring(xml_content)
    NS = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
    all_text = []
    for para in root.iter(NS + 'p'):
        texts = []
        for t in para.iter(NS + 't'):
            if t.text:
                texts.append(t.text)
        if texts:
            all_text.append(''.join(texts))
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(all_text))
    # Find 方案设计 content
    for i, p in enumerate(all_text):
        if '方案设计' in p or '页面原型' in p or '原型设计' in p:
            for j in range(max(0,i-2), min(len(all_text),i+15)):
                print(f'{j}: [{all_text[j][:200]}]')
            print('---')
except Exception as e:
    print(f'Error: {e}')