From 3770ccdcfd6d3146abb891b2cbaaf5f9d4d7dabb Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 24 Apr 2025 15:44:32 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E7=94=A8PaddleOCR,=20=E8=B7=9F?= =?UTF-8?q?=E9=9A=8F=E4=B8=BB=E7=BA=BF=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + backend/config.py | 26 +- backend/main.py | 70 +- backend/ppocr/__init__.py | 16 - backend/ppocr/data/__init__.py | 109 - backend/ppocr/data/collate_fn.py | 72 - backend/ppocr/data/imaug/ColorJitter.py | 26 - backend/ppocr/data/imaug/__init__.py | 74 - backend/ppocr/data/imaug/copy_paste.py | 170 - backend/ppocr/data/imaug/east_process.py | 436 - backend/ppocr/data/imaug/fce_aug.py | 564 -- backend/ppocr/data/imaug/fce_targets.py | 658 -- backend/ppocr/data/imaug/gen_table_mask.py | 244 - backend/ppocr/data/imaug/iaa_augment.py | 105 - backend/ppocr/data/imaug/label_ops.py | 1041 -- backend/ppocr/data/imaug/make_border_map.py | 173 - backend/ppocr/data/imaug/make_pse_gt.py | 106 - backend/ppocr/data/imaug/make_shrink_map.py | 123 - backend/ppocr/data/imaug/operators.py | 468 - backend/ppocr/data/imaug/pg_process.py | 906 -- backend/ppocr/data/imaug/randaugment.py | 143 - backend/ppocr/data/imaug/random_crop_data.py | 234 - backend/ppocr/data/imaug/rec_img_aug.py | 601 -- backend/ppocr/data/imaug/sast_process.py | 777 -- backend/ppocr/data/imaug/ssl_img_aug.py | 60 - .../data/imaug/text_image_aug/__init__.py | 17 - .../data/imaug/text_image_aug/augment.py | 120 - .../data/imaug/text_image_aug/warp_mls.py | 168 - backend/ppocr/data/imaug/vqa/__init__.py | 19 - .../ppocr/data/imaug/vqa/token/__init__.py | 17 - .../data/imaug/vqa/token/vqa_token_chunk.py | 122 - .../data/imaug/vqa/token/vqa_token_pad.py | 104 - .../imaug/vqa/token/vqa_token_relation.py | 67 - backend/ppocr/data/lmdb_dataset.py | 118 - backend/ppocr/data/pgnet_dataset.py | 106 - backend/ppocr/data/pubtab_dataset.py | 114 - backend/ppocr/data/simple_dataset.py | 151 - backend/ppocr/losses/__init__.py | 71 - backend/ppocr/losses/ace_loss.py | 52 - backend/ppocr/losses/basic_loss.py | 155 - backend/ppocr/losses/center_loss.py | 88 - backend/ppocr/losses/cls_loss.py | 30 - backend/ppocr/losses/combined_loss.py | 69 - backend/ppocr/losses/det_basic_loss.py | 153 - backend/ppocr/losses/det_db_loss.py | 76 - backend/ppocr/losses/det_east_loss.py | 63 - backend/ppocr/losses/det_fce_loss.py | 227 - backend/ppocr/losses/det_pse_loss.py | 149 - backend/ppocr/losses/det_sast_loss.py | 121 - backend/ppocr/losses/distillation_loss.py | 324 - backend/ppocr/losses/e2e_pg_loss.py | 140 - backend/ppocr/losses/kie_sdmgr_loss.py | 115 - backend/ppocr/losses/rec_aster_loss.py | 99 - backend/ppocr/losses/rec_att_loss.py | 39 - backend/ppocr/losses/rec_ctc_loss.py | 45 - backend/ppocr/losses/rec_enhanced_ctc_loss.py | 70 - backend/ppocr/losses/rec_multi_loss.py | 58 - backend/ppocr/losses/rec_nrtr_loss.py | 30 - backend/ppocr/losses/rec_pren_loss.py | 30 - backend/ppocr/losses/rec_sar_loss.py | 29 - backend/ppocr/losses/rec_srn_loss.py | 47 - backend/ppocr/losses/table_att_loss.py | 109 - .../ppocr/losses/vqa_token_layoutlm_loss.py | 42 - backend/ppocr/metrics/__init__.py | 47 - backend/ppocr/metrics/cls_metric.py | 46 - backend/ppocr/metrics/det_metric.py | 154 - backend/ppocr/metrics/distillation_metric.py | 73 - backend/ppocr/metrics/e2e_metric.py | 86 - backend/ppocr/metrics/eval_det_iou.py | 225 - backend/ppocr/metrics/kie_metric.py | 71 - backend/ppocr/metrics/rec_metric.py | 76 - backend/ppocr/metrics/table_metric.py | 51 - backend/ppocr/metrics/vqa_token_re_metric.py | 176 - backend/ppocr/metrics/vqa_token_ser_metric.py | 47 - .../ppocr/modeling/architectures/__init__.py | 32 - .../modeling/architectures/base_model.py | 100 - .../architectures/distillation_model.py | 60 - backend/ppocr/modeling/backbones/__init__.py | 64 - .../modeling/backbones/det_mobilenet_v3.py | 268 - .../ppocr/modeling/backbones/det_resnet_vd.py | 351 - .../modeling/backbones/det_resnet_vd_sast.py | 285 - .../modeling/backbones/e2e_resnet_vd_pg.py | 265 - .../modeling/backbones/kie_unet_sdmgr.py | 186 - .../backbones/rec_efficientb3_pren.py | 228 - .../ppocr/modeling/backbones/rec_micronet.py | 528 -- .../modeling/backbones/rec_mobilenet_v3.py | 138 - .../modeling/backbones/rec_mv1_enhance.py | 256 - .../ppocr/modeling/backbones/rec_nrtr_mtb.py | 48 - .../ppocr/modeling/backbones/rec_resnet_31.py | 210 - .../modeling/backbones/rec_resnet_aster.py | 143 - .../modeling/backbones/rec_resnet_fpn.py | 307 - .../ppocr/modeling/backbones/rec_resnet_vd.py | 286 - .../ppocr/modeling/backbones/rec_svtrnet.py | 584 -- .../ppocr/modeling/backbones/vqa_layoutlm.py | 172 - backend/ppocr/modeling/heads/__init__.py | 58 - backend/ppocr/modeling/heads/cls_head.py | 52 - backend/ppocr/modeling/heads/det_db_head.py | 118 - backend/ppocr/modeling/heads/det_east_head.py | 121 - backend/ppocr/modeling/heads/det_fce_head.py | 99 - backend/ppocr/modeling/heads/det_pse_head.py | 37 - backend/ppocr/modeling/heads/det_sast_head.py | 128 - backend/ppocr/modeling/heads/e2e_pg_head.py | 253 - .../ppocr/modeling/heads/kie_sdmgr_head.py | 207 - .../modeling/heads/multiheadAttention.py | 163 - .../ppocr/modeling/heads/rec_aster_head.py | 393 - backend/ppocr/modeling/heads/rec_att_head.py | 202 - backend/ppocr/modeling/heads/rec_ctc_head.py | 87 - .../ppocr/modeling/heads/rec_multi_head.py | 73 - backend/ppocr/modeling/heads/rec_nrtr_head.py | 826 -- backend/ppocr/modeling/heads/rec_pren_head.py | 34 - backend/ppocr/modeling/heads/rec_sar_head.py | 410 - backend/ppocr/modeling/heads/rec_srn_head.py | 280 - .../ppocr/modeling/heads/self_attention.py | 406 - .../ppocr/modeling/heads/table_att_head.py | 246 - backend/ppocr/modeling/necks/__init__.py | 37 - backend/ppocr/modeling/necks/db_fpn.py | 358 - backend/ppocr/modeling/necks/east_fpn.py | 188 - backend/ppocr/modeling/necks/fce_fpn.py | 280 - backend/ppocr/modeling/necks/fpn.py | 138 - backend/ppocr/modeling/necks/pg_fpn.py | 314 - backend/ppocr/modeling/necks/pren_fpn.py | 163 - backend/ppocr/modeling/necks/rnn.py | 191 - backend/ppocr/modeling/necks/sast_fpn.py | 284 - backend/ppocr/modeling/necks/table_fpn.py | 110 - backend/ppocr/modeling/transforms/__init__.py | 28 - backend/ppocr/modeling/transforms/stn.py | 135 - backend/ppocr/modeling/transforms/tps.py | 308 - .../transforms/tps_spatial_transformer.py | 156 - backend/ppocr/optimizer/__init__.py | 62 - backend/ppocr/optimizer/learning_rate.py | 310 - backend/ppocr/optimizer/lr_scheduler.py | 162 - backend/ppocr/optimizer/optimizer.py | 234 - backend/ppocr/optimizer/regularizer.py | 51 - backend/ppocr/postprocess/__init__.py | 61 - backend/ppocr/postprocess/cls_postprocess.py | 42 - backend/ppocr/postprocess/db_postprocess.py | 220 - backend/ppocr/postprocess/east_postprocess.py | 143 - backend/ppocr/postprocess/fce_postprocess.py | 241 - .../ppocr/postprocess/locality_aware_nms.py | 200 - backend/ppocr/postprocess/pg_postprocess.py | 52 - .../postprocess/pse_postprocess/__init__.py | 15 - .../postprocess/pse_postprocess/pse/README.md | 6 - .../pse_postprocess/pse/__init__.py | 29 - .../postprocess/pse_postprocess/pse/pse.pyx | 70 - .../postprocess/pse_postprocess/pse/setup.py | 14 - .../pse_postprocess/pse_postprocess.py | 118 - backend/ppocr/postprocess/rec_postprocess.py | 754 -- backend/ppocr/postprocess/sast_postprocess.py | 355 - .../vqa_token_re_layoutlm_postprocess.py | 51 - .../vqa_token_ser_layoutlm_postprocess.py | 93 - backend/ppocr/utils/__init__.py | 13 - backend/ppocr/utils/dict/ar_dict.txt | 117 - backend/ppocr/utils/dict/arabic_dict.txt | 161 - backend/ppocr/utils/dict/be_dict.txt | 145 - backend/ppocr/utils/dict/bg_dict.txt | 140 - backend/ppocr/utils/dict/ch_dict.txt | 6623 ------------- backend/ppocr/utils/dict/chinese_cht_dict.txt | 8421 ----------------- backend/ppocr/utils/dict/cyrillic_dict.txt | 163 - backend/ppocr/utils/dict/devanagari_dict.txt | 167 - backend/ppocr/utils/dict/en_dict.txt | 95 - backend/ppocr/utils/dict/es_dict.txt | 110 - backend/ppocr/utils/dict/fa_dict.txt | 136 - backend/ppocr/utils/dict/french_dict.txt | 136 - backend/ppocr/utils/dict/german_dict.txt | 143 - backend/ppocr/utils/dict/hi_dict.txt | 162 - backend/ppocr/utils/dict/it_dict.txt | 118 - backend/ppocr/utils/dict/japan_dict.txt | 4399 --------- backend/ppocr/utils/dict/ka_dict.txt | 153 - .../utils/dict/kie_dict/xfund_class_list.txt | 4 - backend/ppocr/utils/dict/kn_dict.txt | 153 - backend/ppocr/utils/dict/korean_dict.txt | 3688 -------- backend/ppocr/utils/dict/latin_dict.txt | 185 - .../dict/layout_dict/layout_cdla_dict.txt | 10 - .../layout_dict/layout_publaynet_dict.txt | 5 - .../dict/layout_dict/layout_table_dict.txt | 1 - backend/ppocr/utils/dict/mr_dict.txt | 153 - backend/ppocr/utils/dict/ne_dict.txt | 153 - backend/ppocr/utils/dict/oc_dict.txt | 96 - backend/ppocr/utils/dict/pt_dict.txt | 130 - backend/ppocr/utils/dict/pu_dict.txt | 130 - backend/ppocr/utils/dict/rs_cyrillic_dict.txt | 134 - backend/ppocr/utils/dict/rs_dict.txt | 91 - backend/ppocr/utils/dict/rs_latin_dict.txt | 91 - backend/ppocr/utils/dict/rsc_dict.txt | 134 - backend/ppocr/utils/dict/ru_dict.txt | 163 - backend/ppocr/utils/dict/spin_dict.txt | 68 - backend/ppocr/utils/dict/ta_dict.txt | 128 - backend/ppocr/utils/dict/table_dict.txt | 277 - .../dict/table_master_structure_dict.txt | 39 - .../ppocr/utils/dict/table_structure_dict.txt | 28 - .../utils/dict/table_structure_dict_ch.txt | 48 - backend/ppocr/utils/dict/te_dict.txt | 151 - backend/ppocr/utils/dict/ug_dict.txt | 114 - backend/ppocr/utils/dict/uk_dict.txt | 142 - backend/ppocr/utils/dict/ur_dict.txt | 137 - backend/ppocr/utils/dict/xi_dict.txt | 110 - backend/ppocr/utils/e2e_metric/Deteval.py | 574 -- .../ppocr/utils/e2e_metric/polygon_fast.py | 83 - .../utils/e2e_utils/extract_batchsize.py | 87 - .../utils/e2e_utils/extract_textpoint_fast.py | 457 - .../utils/e2e_utils/extract_textpoint_slow.py | 592 -- .../ppocr/utils/e2e_utils/pgnet_pp_utils.py | 162 - backend/ppocr/utils/e2e_utils/visual.py | 162 - backend/ppocr/utils/iou.py | 54 - backend/ppocr/utils/loggers/__init__.py | 3 - backend/ppocr/utils/loggers/base_logger.py | 15 - backend/ppocr/utils/loggers/loggers.py | 18 - backend/ppocr/utils/loggers/vdl_logger.py | 21 - backend/ppocr/utils/loggers/wandb_logger.py | 78 - backend/ppocr/utils/logging.py | 71 - backend/ppocr/utils/network.py | 84 - backend/ppocr/utils/poly_nms.py | 146 - backend/ppocr/utils/profiler.py | 110 - backend/ppocr/utils/save_load.py | 185 - backend/ppocr/utils/stats.py | 72 - backend/ppocr/utils/utility.py | 131 - backend/ppocr/utils/visual.py | 98 - backend/tools/infer/predict_cls.py | 151 - backend/tools/infer/predict_det.py | 302 - backend/tools/infer/predict_e2e.py | 169 - backend/tools/infer/predict_rec.py | 442 - backend/tools/infer/predict_system.py | 210 - backend/tools/infer/utility.py | 645 -- gui.py | 3 - requirements.txt | 4 + 225 files changed, 93 insertions(+), 59487 deletions(-) delete mode 100755 backend/ppocr/__init__.py delete mode 100644 backend/ppocr/data/__init__.py delete mode 100644 backend/ppocr/data/collate_fn.py delete mode 100644 backend/ppocr/data/imaug/ColorJitter.py delete mode 100644 backend/ppocr/data/imaug/__init__.py delete mode 100644 backend/ppocr/data/imaug/copy_paste.py delete mode 100644 backend/ppocr/data/imaug/east_process.py delete mode 100644 backend/ppocr/data/imaug/fce_aug.py delete mode 100644 backend/ppocr/data/imaug/fce_targets.py delete mode 100644 backend/ppocr/data/imaug/gen_table_mask.py delete mode 100644 backend/ppocr/data/imaug/iaa_augment.py delete mode 100644 backend/ppocr/data/imaug/label_ops.py delete mode 100644 backend/ppocr/data/imaug/make_border_map.py delete mode 100644 backend/ppocr/data/imaug/make_pse_gt.py delete mode 100644 backend/ppocr/data/imaug/make_shrink_map.py delete mode 100644 backend/ppocr/data/imaug/operators.py delete mode 100644 backend/ppocr/data/imaug/pg_process.py delete mode 100644 backend/ppocr/data/imaug/randaugment.py delete mode 100644 backend/ppocr/data/imaug/random_crop_data.py delete mode 100644 backend/ppocr/data/imaug/rec_img_aug.py delete mode 100644 backend/ppocr/data/imaug/sast_process.py delete mode 100644 backend/ppocr/data/imaug/ssl_img_aug.py delete mode 100644 backend/ppocr/data/imaug/text_image_aug/__init__.py delete mode 100644 backend/ppocr/data/imaug/text_image_aug/augment.py delete mode 100644 backend/ppocr/data/imaug/text_image_aug/warp_mls.py delete mode 100644 backend/ppocr/data/imaug/vqa/__init__.py delete mode 100644 backend/ppocr/data/imaug/vqa/token/__init__.py delete mode 100644 backend/ppocr/data/imaug/vqa/token/vqa_token_chunk.py delete mode 100644 backend/ppocr/data/imaug/vqa/token/vqa_token_pad.py delete mode 100644 backend/ppocr/data/imaug/vqa/token/vqa_token_relation.py delete mode 100644 backend/ppocr/data/lmdb_dataset.py delete mode 100644 backend/ppocr/data/pgnet_dataset.py delete mode 100644 backend/ppocr/data/pubtab_dataset.py delete mode 100644 backend/ppocr/data/simple_dataset.py delete mode 100755 backend/ppocr/losses/__init__.py delete mode 100644 backend/ppocr/losses/ace_loss.py delete mode 100644 backend/ppocr/losses/basic_loss.py delete mode 100644 backend/ppocr/losses/center_loss.py delete mode 100755 backend/ppocr/losses/cls_loss.py delete mode 100644 backend/ppocr/losses/combined_loss.py delete mode 100644 backend/ppocr/losses/det_basic_loss.py delete mode 100755 backend/ppocr/losses/det_db_loss.py delete mode 100644 backend/ppocr/losses/det_east_loss.py delete mode 100644 backend/ppocr/losses/det_fce_loss.py delete mode 100644 backend/ppocr/losses/det_pse_loss.py delete mode 100644 backend/ppocr/losses/det_sast_loss.py delete mode 100644 backend/ppocr/losses/distillation_loss.py delete mode 100644 backend/ppocr/losses/e2e_pg_loss.py delete mode 100644 backend/ppocr/losses/kie_sdmgr_loss.py delete mode 100644 backend/ppocr/losses/rec_aster_loss.py delete mode 100644 backend/ppocr/losses/rec_att_loss.py delete mode 100755 backend/ppocr/losses/rec_ctc_loss.py delete mode 100644 backend/ppocr/losses/rec_enhanced_ctc_loss.py delete mode 100644 backend/ppocr/losses/rec_multi_loss.py delete mode 100644 backend/ppocr/losses/rec_nrtr_loss.py delete mode 100644 backend/ppocr/losses/rec_pren_loss.py delete mode 100644 backend/ppocr/losses/rec_sar_loss.py delete mode 100644 backend/ppocr/losses/rec_srn_loss.py delete mode 100644 backend/ppocr/losses/table_att_loss.py delete mode 100755 backend/ppocr/losses/vqa_token_layoutlm_loss.py delete mode 100644 backend/ppocr/metrics/__init__.py delete mode 100644 backend/ppocr/metrics/cls_metric.py delete mode 100644 backend/ppocr/metrics/det_metric.py delete mode 100644 backend/ppocr/metrics/distillation_metric.py delete mode 100644 backend/ppocr/metrics/e2e_metric.py delete mode 100644 backend/ppocr/metrics/eval_det_iou.py delete mode 100644 backend/ppocr/metrics/kie_metric.py delete mode 100644 backend/ppocr/metrics/rec_metric.py delete mode 100644 backend/ppocr/metrics/table_metric.py delete mode 100644 backend/ppocr/metrics/vqa_token_re_metric.py delete mode 100644 backend/ppocr/metrics/vqa_token_ser_metric.py delete mode 100755 backend/ppocr/modeling/architectures/__init__.py delete mode 100644 backend/ppocr/modeling/architectures/base_model.py delete mode 100644 backend/ppocr/modeling/architectures/distillation_model.py delete mode 100755 backend/ppocr/modeling/backbones/__init__.py delete mode 100755 backend/ppocr/modeling/backbones/det_mobilenet_v3.py delete mode 100644 backend/ppocr/modeling/backbones/det_resnet_vd.py delete mode 100644 backend/ppocr/modeling/backbones/det_resnet_vd_sast.py delete mode 100644 backend/ppocr/modeling/backbones/e2e_resnet_vd_pg.py delete mode 100644 backend/ppocr/modeling/backbones/kie_unet_sdmgr.py delete mode 100644 backend/ppocr/modeling/backbones/rec_efficientb3_pren.py delete mode 100644 backend/ppocr/modeling/backbones/rec_micronet.py delete mode 100644 backend/ppocr/modeling/backbones/rec_mobilenet_v3.py delete mode 100644 backend/ppocr/modeling/backbones/rec_mv1_enhance.py delete mode 100644 backend/ppocr/modeling/backbones/rec_nrtr_mtb.py delete mode 100644 backend/ppocr/modeling/backbones/rec_resnet_31.py delete mode 100644 backend/ppocr/modeling/backbones/rec_resnet_aster.py delete mode 100644 backend/ppocr/modeling/backbones/rec_resnet_fpn.py delete mode 100644 backend/ppocr/modeling/backbones/rec_resnet_vd.py delete mode 100644 backend/ppocr/modeling/backbones/rec_svtrnet.py delete mode 100644 backend/ppocr/modeling/backbones/vqa_layoutlm.py delete mode 100755 backend/ppocr/modeling/heads/__init__.py delete mode 100644 backend/ppocr/modeling/heads/cls_head.py delete mode 100644 backend/ppocr/modeling/heads/det_db_head.py delete mode 100644 backend/ppocr/modeling/heads/det_east_head.py delete mode 100644 backend/ppocr/modeling/heads/det_fce_head.py delete mode 100644 backend/ppocr/modeling/heads/det_pse_head.py delete mode 100644 backend/ppocr/modeling/heads/det_sast_head.py delete mode 100644 backend/ppocr/modeling/heads/e2e_pg_head.py delete mode 100644 backend/ppocr/modeling/heads/kie_sdmgr_head.py delete mode 100755 backend/ppocr/modeling/heads/multiheadAttention.py delete mode 100644 backend/ppocr/modeling/heads/rec_aster_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_att_head.py delete mode 100755 backend/ppocr/modeling/heads/rec_ctc_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_multi_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_nrtr_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_pren_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_sar_head.py delete mode 100644 backend/ppocr/modeling/heads/rec_srn_head.py delete mode 100644 backend/ppocr/modeling/heads/self_attention.py delete mode 100644 backend/ppocr/modeling/heads/table_att_head.py delete mode 100644 backend/ppocr/modeling/necks/__init__.py delete mode 100644 backend/ppocr/modeling/necks/db_fpn.py delete mode 100644 backend/ppocr/modeling/necks/east_fpn.py delete mode 100644 backend/ppocr/modeling/necks/fce_fpn.py delete mode 100644 backend/ppocr/modeling/necks/fpn.py delete mode 100644 backend/ppocr/modeling/necks/pg_fpn.py delete mode 100644 backend/ppocr/modeling/necks/pren_fpn.py delete mode 100644 backend/ppocr/modeling/necks/rnn.py delete mode 100644 backend/ppocr/modeling/necks/sast_fpn.py delete mode 100644 backend/ppocr/modeling/necks/table_fpn.py delete mode 100755 backend/ppocr/modeling/transforms/__init__.py delete mode 100644 backend/ppocr/modeling/transforms/stn.py delete mode 100644 backend/ppocr/modeling/transforms/tps.py delete mode 100644 backend/ppocr/modeling/transforms/tps_spatial_transformer.py delete mode 100644 backend/ppocr/optimizer/__init__.py delete mode 100644 backend/ppocr/optimizer/learning_rate.py delete mode 100644 backend/ppocr/optimizer/lr_scheduler.py delete mode 100644 backend/ppocr/optimizer/optimizer.py delete mode 100644 backend/ppocr/optimizer/regularizer.py delete mode 100644 backend/ppocr/postprocess/__init__.py delete mode 100644 backend/ppocr/postprocess/cls_postprocess.py delete mode 100755 backend/ppocr/postprocess/db_postprocess.py delete mode 100755 backend/ppocr/postprocess/east_postprocess.py delete mode 100755 backend/ppocr/postprocess/fce_postprocess.py delete mode 100644 backend/ppocr/postprocess/locality_aware_nms.py delete mode 100644 backend/ppocr/postprocess/pg_postprocess.py delete mode 100644 backend/ppocr/postprocess/pse_postprocess/__init__.py delete mode 100644 backend/ppocr/postprocess/pse_postprocess/pse/README.md delete mode 100644 backend/ppocr/postprocess/pse_postprocess/pse/__init__.py delete mode 100644 backend/ppocr/postprocess/pse_postprocess/pse/pse.pyx delete mode 100644 backend/ppocr/postprocess/pse_postprocess/pse/setup.py delete mode 100755 backend/ppocr/postprocess/pse_postprocess/pse_postprocess.py delete mode 100644 backend/ppocr/postprocess/rec_postprocess.py delete mode 100755 backend/ppocr/postprocess/sast_postprocess.py delete mode 100644 backend/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py delete mode 100644 backend/ppocr/postprocess/vqa_token_ser_layoutlm_postprocess.py delete mode 100755 backend/ppocr/utils/__init__.py delete mode 100644 backend/ppocr/utils/dict/ar_dict.txt delete mode 100644 backend/ppocr/utils/dict/arabic_dict.txt delete mode 100644 backend/ppocr/utils/dict/be_dict.txt delete mode 100644 backend/ppocr/utils/dict/bg_dict.txt delete mode 100644 backend/ppocr/utils/dict/ch_dict.txt delete mode 100644 backend/ppocr/utils/dict/chinese_cht_dict.txt delete mode 100644 backend/ppocr/utils/dict/cyrillic_dict.txt delete mode 100644 backend/ppocr/utils/dict/devanagari_dict.txt delete mode 100644 backend/ppocr/utils/dict/en_dict.txt delete mode 100644 backend/ppocr/utils/dict/es_dict.txt delete mode 100644 backend/ppocr/utils/dict/fa_dict.txt delete mode 100644 backend/ppocr/utils/dict/french_dict.txt delete mode 100644 backend/ppocr/utils/dict/german_dict.txt delete mode 100644 backend/ppocr/utils/dict/hi_dict.txt delete mode 100644 backend/ppocr/utils/dict/it_dict.txt delete mode 100644 backend/ppocr/utils/dict/japan_dict.txt delete mode 100644 backend/ppocr/utils/dict/ka_dict.txt delete mode 100644 backend/ppocr/utils/dict/kie_dict/xfund_class_list.txt delete mode 100644 backend/ppocr/utils/dict/kn_dict.txt delete mode 100644 backend/ppocr/utils/dict/korean_dict.txt delete mode 100644 backend/ppocr/utils/dict/latin_dict.txt delete mode 100644 backend/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt delete mode 100644 backend/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt delete mode 100644 backend/ppocr/utils/dict/layout_dict/layout_table_dict.txt delete mode 100644 backend/ppocr/utils/dict/mr_dict.txt delete mode 100644 backend/ppocr/utils/dict/ne_dict.txt delete mode 100644 backend/ppocr/utils/dict/oc_dict.txt delete mode 100644 backend/ppocr/utils/dict/pt_dict.txt delete mode 100644 backend/ppocr/utils/dict/pu_dict.txt delete mode 100644 backend/ppocr/utils/dict/rs_cyrillic_dict.txt delete mode 100644 backend/ppocr/utils/dict/rs_dict.txt delete mode 100644 backend/ppocr/utils/dict/rs_latin_dict.txt delete mode 100644 backend/ppocr/utils/dict/rsc_dict.txt delete mode 100644 backend/ppocr/utils/dict/ru_dict.txt delete mode 100644 backend/ppocr/utils/dict/spin_dict.txt delete mode 100644 backend/ppocr/utils/dict/ta_dict.txt delete mode 100644 backend/ppocr/utils/dict/table_dict.txt delete mode 100644 backend/ppocr/utils/dict/table_master_structure_dict.txt delete mode 100644 backend/ppocr/utils/dict/table_structure_dict.txt delete mode 100644 backend/ppocr/utils/dict/table_structure_dict_ch.txt delete mode 100644 backend/ppocr/utils/dict/te_dict.txt delete mode 100644 backend/ppocr/utils/dict/ug_dict.txt delete mode 100644 backend/ppocr/utils/dict/uk_dict.txt delete mode 100644 backend/ppocr/utils/dict/ur_dict.txt delete mode 100644 backend/ppocr/utils/dict/xi_dict.txt delete mode 100755 backend/ppocr/utils/e2e_metric/Deteval.py delete mode 100755 backend/ppocr/utils/e2e_metric/polygon_fast.py delete mode 100644 backend/ppocr/utils/e2e_utils/extract_batchsize.py delete mode 100644 backend/ppocr/utils/e2e_utils/extract_textpoint_fast.py delete mode 100644 backend/ppocr/utils/e2e_utils/extract_textpoint_slow.py delete mode 100644 backend/ppocr/utils/e2e_utils/pgnet_pp_utils.py delete mode 100644 backend/ppocr/utils/e2e_utils/visual.py delete mode 100644 backend/ppocr/utils/iou.py delete mode 100644 backend/ppocr/utils/loggers/__init__.py delete mode 100644 backend/ppocr/utils/loggers/base_logger.py delete mode 100644 backend/ppocr/utils/loggers/loggers.py delete mode 100644 backend/ppocr/utils/loggers/vdl_logger.py delete mode 100644 backend/ppocr/utils/loggers/wandb_logger.py delete mode 100644 backend/ppocr/utils/logging.py delete mode 100644 backend/ppocr/utils/network.py delete mode 100644 backend/ppocr/utils/poly_nms.py delete mode 100644 backend/ppocr/utils/profiler.py delete mode 100644 backend/ppocr/utils/save_load.py delete mode 100755 backend/ppocr/utils/stats.py delete mode 100755 backend/ppocr/utils/utility.py delete mode 100644 backend/ppocr/utils/visual.py delete mode 100755 backend/tools/infer/predict_cls.py delete mode 100755 backend/tools/infer/predict_det.py delete mode 100755 backend/tools/infer/predict_e2e.py delete mode 100755 backend/tools/infer/predict_rec.py delete mode 100755 backend/tools/infer/predict_system.py delete mode 100644 backend/tools/infer/utility.py diff --git a/.gitignore b/.gitignore index b977997..9a3671c 100644 --- a/.gitignore +++ b/.gitignore @@ -373,3 +373,4 @@ test*_no_sub*.mp4 /backend/models/big-lama/big-lama.pt /test/debug/ /backend/tools/train/release_model/ +model.onnx \ No newline at end of file diff --git a/backend/config.py b/backend/config.py index f366f8c..a6c891e 100644 --- a/backend/config.py +++ b/backend/config.py @@ -7,9 +7,9 @@ import logging import platform import stat from fsplit.filesplit import Filesplit -import paddle +import onnxruntime as ort + # ×××××××××××××××××××× [不要改] start ×××××××××××××××××××× -paddle.disable_signal_handler() logging.disable(logging.DEBUG) # 关闭DEBUG日志的打印 logging.disable(logging.WARNING) # 关闭WARNING日志的打印 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -50,6 +50,28 @@ if 'ffmpeg.exe' not in os.listdir(os.path.join(BASE_DIR, '', 'ffmpeg', 'win_x64' # 将ffmpeg添加可执行权限 os.chmod(FFMPEG_PATH, stat.S_IRWXU + stat.S_IRWXG + stat.S_IRWXO) os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' + +# 是否使用ONNX(DirectML/AMD/Intel) +ONNX_PROVIDERS = [] +available_providers = ort.get_available_providers() +for provider in available_providers: + if provider in [ + "CPUExecutionProvider" + ]: + continue + if provider not in [ + "DmlExecutionProvider", # DirectML,适用于 Windows GPU + "ROCMExecutionProvider", # AMD ROCm + "MIGraphXExecutionProvider", # AMD MIGraphX + "VitisAIExecutionProvider", # AMD VitisAI,适用于 RyzenAI & Windows, 实测和DirectML性能似乎差不多 + "OpenVINOExecutionProvider", # Intel GPU + "MetalExecutionProvider", # Apple macOS + "CoreMLExecutionProvider", # Apple macOS + "CUDAExecutionProvider", # Nvidia GPU + ]: + continue + print(f"Detected execution provider: {provider}") + ONNX_PROVIDERS.append(provider) # ×××××××××××××××××××× [不要改] end ×××××××××××××××××××× diff --git a/backend/main.py b/backend/main.py index 3aa0a23..e8c29d9 100644 --- a/backend/main.py +++ b/backend/main.py @@ -6,6 +6,7 @@ from pathlib import Path import threading import cv2 import sys +from functools import cached_property sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -24,8 +25,6 @@ import multiprocessing from shapely.geometry import Polygon import time from tqdm import tqdm -from tools.infer import utility -from tools.infer.predict_det import TextDetector class SubtitleDetect: @@ -34,14 +33,23 @@ class SubtitleDetect: """ def __init__(self, video_path, sub_area=None): + self.video_path = video_path + self.sub_area = sub_area + + @cached_property + def text_detector(self): + import paddle + paddle.disable_signal_handler() + from paddleocr.tools.infer import utility + from paddleocr.tools.infer.predict_det import TextDetector # 获取参数对象 importlib.reload(config) args = utility.parse_args() args.det_algorithm = 'DB' - args.det_model_dir = config.DET_MODEL_PATH - self.text_detector = TextDetector(args) - self.video_path = video_path - self.sub_area = sub_area + args.det_model_dir = self.convertToOnnxModelIfNeeded(config.DET_MODEL_PATH) + args.use_onnx=len(config.ONNX_PROVIDERS) > 0 + args.onnx_providers=config.ONNX_PROVIDERS + return TextDetector(args) def detect_subtitle(self, img): dt_boxes, elapse = self.text_detector(img) @@ -121,6 +129,52 @@ class SubtitleDetect: new_subtitle_frame_no_box_dict[key] = subtitle_frame_no_box_dict[key] return new_subtitle_frame_no_box_dict + def convertToOnnxModelIfNeeded(self, model_dir, model_filename="inference.pdmodel", params_filename="inference.pdiparams", opset_version=14): + """Converts a Paddle model to ONNX if ONNX providers are available and the model does not already exist.""" + + if not config.ONNX_PROVIDERS: + return model_dir + + onnx_model_path = os.path.join(model_dir, "model.onnx") + + if os.path.exists(onnx_model_path): + print(f"ONNX model already exists: {onnx_model_path}. Skipping conversion.") + return onnx_model_path + + print(f"Converting Paddle model {model_dir} to ONNX...") + model_file = os.path.join(model_dir, model_filename) + params_file = os.path.join(model_dir, params_filename) if params_filename else "" + + try: + import paddle2onnx + # Ensure the target directory exists + os.makedirs(os.path.dirname(onnx_model_path), exist_ok=True) + + # Convert and save the model + onnx_model = paddle2onnx.export( + model_filename=model_file, + params_filename=params_file, + save_file=onnx_model_path, + opset_version=opset_version, + auto_upgrade_opset=True, + verbose=True, + enable_onnx_checker=True, + enable_experimental_op=True, + enable_optimize=True, + custom_op_info={}, + deploy_backend="onnxruntime", + calibration_file="calibration.cache", + external_file=os.path.join(model_dir, "external_data"), + export_fp16_model=False, + ) + + print(f"Conversion successful. ONNX model saved to: {onnx_model_path}") + return onnx_model_path + except Exception as e: + print(f"Error during conversion: {e}") + return model_dir + + @staticmethod def split_range_by_scene(intervals, points): # 确保离散值列表是有序的 @@ -553,6 +607,10 @@ class SubtitleRemover: self.video_out_name = os.path.join(pic_dir, f'{self.vd_name}{self.ext}') if torch.cuda.is_available(): print('use GPU for acceleration') + for provider in config.ONNX_PROVIDERS: + print(f"Detected execution provider: {provider}") + + # 总处理进度 self.progress_total = 0 self.progress_remover = 0 diff --git a/backend/ppocr/__init__.py b/backend/ppocr/__init__.py deleted file mode 100755 index e438e53..0000000 --- a/backend/ppocr/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import warnings -warnings.filterwarnings("ignore", category=Warning) -warnings.filterwarnings("ignore", category=DeprecationWarning) diff --git a/backend/ppocr/data/__init__.py b/backend/ppocr/data/__init__.py deleted file mode 100644 index 78c3279..0000000 --- a/backend/ppocr/data/__init__.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import sys -import numpy as np -import skimage -import paddle -import signal -import random - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) - -import copy -from paddle.io import Dataset, DataLoader, BatchSampler, DistributedBatchSampler -import paddle.distributed as dist - -from ppocr.data.imaug import transform, create_operators -from ppocr.data.simple_dataset import SimpleDataSet -from ppocr.data.lmdb_dataset import LMDBDataSet -from ppocr.data.pgnet_dataset import PGDataSet -from ppocr.data.pubtab_dataset import PubTabDataSet - -__all__ = ['build_dataloader', 'transform', 'create_operators'] - - -def term_mp(sig_num, frame): - """ kill all child processes - """ - pid = os.getpid() - pgid = os.getpgid(os.getpid()) - print("main proc {} exit, kill process group " "{}".format(pid, pgid)) - os.killpg(pgid, signal.SIGKILL) - - -def build_dataloader(config, mode, device, logger, seed=None): - config = copy.deepcopy(config) - - support_dict = [ - 'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet' - ] - module_name = config[mode]['dataset']['name'] - assert module_name in support_dict, Exception( - 'DataSet only support {}'.format(support_dict)) - assert mode in ['Train', 'Eval', 'Test' - ], "Mode should be Train, Eval or Test." - - dataset = eval(module_name)(config, mode, logger, seed) - loader_config = config[mode]['loader'] - batch_size = loader_config['batch_size_per_card'] - drop_last = loader_config['drop_last'] - shuffle = loader_config['shuffle'] - num_workers = loader_config['num_workers'] - if 'use_shared_memory' in loader_config.keys(): - use_shared_memory = loader_config['use_shared_memory'] - else: - use_shared_memory = True - - if mode == "Train": - # Distribute data to multiple cards - batch_sampler = DistributedBatchSampler( - dataset=dataset, - batch_size=batch_size, - shuffle=shuffle, - drop_last=drop_last) - else: - # Distribute data to single card - batch_sampler = BatchSampler( - dataset=dataset, - batch_size=batch_size, - shuffle=shuffle, - drop_last=drop_last) - - if 'collate_fn' in loader_config: - from . import collate_fn - collate_fn = getattr(collate_fn, loader_config['collate_fn'])() - else: - collate_fn = None - data_loader = DataLoader( - dataset=dataset, - batch_sampler=batch_sampler, - places=device, - num_workers=num_workers, - return_list=True, - use_shared_memory=use_shared_memory, - collate_fn=collate_fn) - - # support exit using ctrl+c - signal.signal(signal.SIGINT, term_mp) - signal.signal(signal.SIGTERM, term_mp) - - return data_loader diff --git a/backend/ppocr/data/collate_fn.py b/backend/ppocr/data/collate_fn.py deleted file mode 100644 index 0da6060..0000000 --- a/backend/ppocr/data/collate_fn.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -import numbers -import numpy as np -from collections import defaultdict - - -class DictCollator(object): - """ - data batch - """ - - def __call__(self, batch): - # todo:support batch operators - data_dict = defaultdict(list) - to_tensor_keys = [] - for sample in batch: - for k, v in sample.items(): - if isinstance(v, (np.ndarray, paddle.Tensor, numbers.Number)): - if k not in to_tensor_keys: - to_tensor_keys.append(k) - data_dict[k].append(v) - for k in to_tensor_keys: - data_dict[k] = paddle.to_tensor(data_dict[k]) - return data_dict - - -class ListCollator(object): - """ - data batch - """ - - def __call__(self, batch): - # todo:support batch operators - data_dict = defaultdict(list) - to_tensor_idxs = [] - for sample in batch: - for idx, v in enumerate(sample): - if isinstance(v, (np.ndarray, paddle.Tensor, numbers.Number)): - if idx not in to_tensor_idxs: - to_tensor_idxs.append(idx) - data_dict[idx].append(v) - for idx in to_tensor_idxs: - data_dict[idx] = paddle.to_tensor(data_dict[idx]) - return list(data_dict.values()) - - -class SSLRotateCollate(object): - """ - bach: [ - [(4*3xH*W), (4,)] - [(4*3xH*W), (4,)] - ... - ] - """ - - def __call__(self, batch): - output = [np.concatenate(d, axis=0) for d in zip(*batch)] - return output diff --git a/backend/ppocr/data/imaug/ColorJitter.py b/backend/ppocr/data/imaug/ColorJitter.py deleted file mode 100644 index 4b542ab..0000000 --- a/backend/ppocr/data/imaug/ColorJitter.py +++ /dev/null @@ -1,26 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddle.vision.transforms import ColorJitter as pp_ColorJitter - -__all__ = ['ColorJitter'] - -class ColorJitter(object): - def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,**kwargs): - self.aug = pp_ColorJitter(brightness, contrast, saturation, hue) - - def __call__(self, data): - image = data['image'] - image = self.aug(image) - data['image'] = image - return data diff --git a/backend/ppocr/data/imaug/__init__.py b/backend/ppocr/data/imaug/__init__.py deleted file mode 100644 index 548832f..0000000 --- a/backend/ppocr/data/imaug/__init__.py +++ /dev/null @@ -1,74 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from .iaa_augment import IaaAugment -from .make_border_map import MakeBorderMap -from .make_shrink_map import MakeShrinkMap -from .random_crop_data import EastRandomCropData, RandomCropImgMask -from .make_pse_gt import MakePseGt - -from .rec_img_aug import RecAug, RecConAug, RecResizeImg, ClsResizeImg, \ - SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg, PRENResizeImg -from .ssl_img_aug import SSLRotateResize -from .randaugment import RandAugment -from .copy_paste import CopyPaste -from .ColorJitter import ColorJitter -from .operators import * -from .label_ops import * - -from .east_process import * -from .sast_process import * -from .pg_process import * -from .gen_table_mask import * - -from .vqa import * - -from .fce_aug import * -from .fce_targets import FCENetTargets - - -def transform(data, ops=None): - """ transform """ - if ops is None: - ops = [] - for op in ops: - data = op(data) - if data is None: - return None - return data - - -def create_operators(op_param_list, global_config=None): - """ - create operators based on the config - - Args: - params(list): a dict list, used to create some operators - """ - assert isinstance(op_param_list, list), ('operator config should be a list') - ops = [] - for operator in op_param_list: - assert isinstance(operator, - dict) and len(operator) == 1, "yaml format error" - op_name = list(operator)[0] - param = {} if operator[op_name] is None else operator[op_name] - if global_config is not None: - param.update(global_config) - op = eval(op_name)(**param) - ops.append(op) - return ops diff --git a/backend/ppocr/data/imaug/copy_paste.py b/backend/ppocr/data/imaug/copy_paste.py deleted file mode 100644 index 0b3386c..0000000 --- a/backend/ppocr/data/imaug/copy_paste.py +++ /dev/null @@ -1,170 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import copy -import cv2 -import random -import numpy as np -from PIL import Image -from shapely.geometry import Polygon - -from ppocr.data.imaug.iaa_augment import IaaAugment -from ppocr.data.imaug.random_crop_data import is_poly_outside_rect -from tools.infer.utility import get_rotate_crop_image - - -class CopyPaste(object): - def __init__(self, objects_paste_ratio=0.2, limit_paste=True, **kwargs): - self.ext_data_num = 1 - self.objects_paste_ratio = objects_paste_ratio - self.limit_paste = limit_paste - augmenter_args = [{'type': 'Resize', 'args': {'size': [0.5, 3]}}] - self.aug = IaaAugment(augmenter_args) - - def __call__(self, data): - point_num = data['polys'].shape[1] - src_img = data['image'] - src_polys = data['polys'].tolist() - src_ignores = data['ignore_tags'].tolist() - ext_data = data['ext_data'][0] - ext_image = ext_data['image'] - ext_polys = ext_data['polys'] - ext_ignores = ext_data['ignore_tags'] - - indexs = [i for i in range(len(ext_ignores)) if not ext_ignores[i]] - select_num = max( - 1, min(int(self.objects_paste_ratio * len(ext_polys)), 30)) - - random.shuffle(indexs) - select_idxs = indexs[:select_num] - select_polys = ext_polys[select_idxs] - select_ignores = ext_ignores[select_idxs] - - src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB) - ext_image = cv2.cvtColor(ext_image, cv2.COLOR_BGR2RGB) - src_img = Image.fromarray(src_img).convert('RGBA') - for poly, tag in zip(select_polys, select_ignores): - box_img = get_rotate_crop_image(ext_image, poly) - - src_img, box = self.paste_img(src_img, box_img, src_polys) - if box is not None: - box = box.tolist() - for _ in range(len(box), point_num): - box.append(box[-1]) - src_polys.append(box) - src_ignores.append(tag) - src_img = cv2.cvtColor(np.array(src_img), cv2.COLOR_RGB2BGR) - h, w = src_img.shape[:2] - src_polys = np.array(src_polys) - src_polys[:, :, 0] = np.clip(src_polys[:, :, 0], 0, w) - src_polys[:, :, 1] = np.clip(src_polys[:, :, 1], 0, h) - data['image'] = src_img - data['polys'] = src_polys - data['ignore_tags'] = np.array(src_ignores) - return data - - def paste_img(self, src_img, box_img, src_polys): - box_img_pil = Image.fromarray(box_img).convert('RGBA') - src_w, src_h = src_img.size - box_w, box_h = box_img_pil.size - - angle = np.random.randint(0, 360) - box = np.array([[[0, 0], [box_w, 0], [box_w, box_h], [0, box_h]]]) - box = rotate_bbox(box_img, box, angle)[0] - box_img_pil = box_img_pil.rotate(angle, expand=1) - box_w, box_h = box_img_pil.width, box_img_pil.height - if src_w - box_w < 0 or src_h - box_h < 0: - return src_img, None - - paste_x, paste_y = self.select_coord(src_polys, box, src_w - box_w, - src_h - box_h) - if paste_x is None: - return src_img, None - box[:, 0] += paste_x - box[:, 1] += paste_y - r, g, b, A = box_img_pil.split() - src_img.paste(box_img_pil, (paste_x, paste_y), mask=A) - - return src_img, box - - def select_coord(self, src_polys, box, endx, endy): - if self.limit_paste: - xmin, ymin, xmax, ymax = box[:, 0].min(), box[:, 1].min( - ), box[:, 0].max(), box[:, 1].max() - for _ in range(50): - paste_x = random.randint(0, endx) - paste_y = random.randint(0, endy) - xmin1 = xmin + paste_x - xmax1 = xmax + paste_x - ymin1 = ymin + paste_y - ymax1 = ymax + paste_y - - num_poly_in_rect = 0 - for poly in src_polys: - if not is_poly_outside_rect(poly, xmin1, ymin1, - xmax1 - xmin1, ymax1 - ymin1): - num_poly_in_rect += 1 - break - if num_poly_in_rect == 0: - return paste_x, paste_y - return None, None - else: - paste_x = random.randint(0, endx) - paste_y = random.randint(0, endy) - return paste_x, paste_y - - -def get_union(pD, pG): - return Polygon(pD).union(Polygon(pG)).area - - -def get_intersection_over_union(pD, pG): - return get_intersection(pD, pG) / get_union(pD, pG) - - -def get_intersection(pD, pG): - return Polygon(pD).intersection(Polygon(pG)).area - - -def rotate_bbox(img, text_polys, angle, scale=1): - """ - from https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/augment.py - Args: - img: np.ndarray - text_polys: np.ndarray N*4*2 - angle: int - scale: int - - Returns: - - """ - w = img.shape[1] - h = img.shape[0] - - rangle = np.deg2rad(angle) - nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) - nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) - rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale) - rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0])) - rot_mat[0, 2] += rot_move[0] - rot_mat[1, 2] += rot_move[1] - - # ---------------------- rotate box ---------------------- - rot_text_polys = list() - for bbox in text_polys: - point1 = np.dot(rot_mat, np.array([bbox[0, 0], bbox[0, 1], 1])) - point2 = np.dot(rot_mat, np.array([bbox[1, 0], bbox[1, 1], 1])) - point3 = np.dot(rot_mat, np.array([bbox[2, 0], bbox[2, 1], 1])) - point4 = np.dot(rot_mat, np.array([bbox[3, 0], bbox[3, 1], 1])) - rot_text_polys.append([point1, point2, point3, point4]) - return np.array(rot_text_polys, dtype=np.float32) diff --git a/backend/ppocr/data/imaug/east_process.py b/backend/ppocr/data/imaug/east_process.py deleted file mode 100644 index df08adf..0000000 --- a/backend/ppocr/data/imaug/east_process.py +++ /dev/null @@ -1,436 +0,0 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -""" -This code is refered from: -https://github.com/songdejia/EAST/blob/master/data_utils.py -""" -import math -import cv2 -import numpy as np -import json -import sys -import os - -__all__ = ['EASTProcessTrain'] - - -class EASTProcessTrain(object): - def __init__(self, - image_shape=[512, 512], - background_ratio=0.125, - min_crop_side_ratio=0.1, - min_text_size=10, - **kwargs): - self.input_size = image_shape[1] - self.random_scale = np.array([0.5, 1, 2.0, 3.0]) - self.background_ratio = background_ratio - self.min_crop_side_ratio = min_crop_side_ratio - self.min_text_size = min_text_size - - def preprocess(self, im): - input_size = self.input_size - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(input_size) / float(im_size_max) - im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale) - img_mean = [0.485, 0.456, 0.406] - img_std = [0.229, 0.224, 0.225] - # im = im[:, :, ::-1].astype(np.float32) - im = im / 255 - im -= img_mean - im /= img_std - new_h, new_w, _ = im.shape - im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32) - im_padded[:new_h, :new_w, :] = im - im_padded = im_padded.transpose((2, 0, 1)) - im_padded = im_padded[np.newaxis, :] - return im_padded, im_scale - - def rotate_im_poly(self, im, text_polys): - """ - rotate image with 90 / 180 / 270 degre - """ - im_w, im_h = im.shape[1], im.shape[0] - dst_im = im.copy() - dst_polys = [] - rand_degree_ratio = np.random.rand() - rand_degree_cnt = 1 - if 0.333 < rand_degree_ratio < 0.666: - rand_degree_cnt = 2 - elif rand_degree_ratio > 0.666: - rand_degree_cnt = 3 - for i in range(rand_degree_cnt): - dst_im = np.rot90(dst_im) - rot_degree = -90 * rand_degree_cnt - rot_angle = rot_degree * math.pi / 180.0 - n_poly = text_polys.shape[0] - cx, cy = 0.5 * im_w, 0.5 * im_h - ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0] - for i in range(n_poly): - wordBB = text_polys[i] - poly = [] - for j in range(4): - sx, sy = wordBB[j][0], wordBB[j][1] - dx = math.cos(rot_angle) * (sx - cx)\ - - math.sin(rot_angle) * (sy - cy) + ncx - dy = math.sin(rot_angle) * (sx - cx)\ - + math.cos(rot_angle) * (sy - cy) + ncy - poly.append([dx, dy]) - dst_polys.append(poly) - dst_polys = np.array(dst_polys, dtype=np.float32) - return dst_im, dst_polys - - def polygon_area(self, poly): - """ - compute area of a polygon - :param poly: - :return: - """ - edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), - (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), - (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), - (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])] - return np.sum(edge) / 2. - - def check_and_validate_polys(self, polys, tags, img_height, img_width): - """ - check so that the text poly is in the same direction, - and also filter some invalid polygons - :param polys: - :param tags: - :return: - """ - h, w = img_height, img_width - if polys.shape[0] == 0: - return polys - polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) - polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1) - - validated_polys = [] - validated_tags = [] - for poly, tag in zip(polys, tags): - p_area = self.polygon_area(poly) - #invalid poly - if abs(p_area) < 1: - continue - if p_area > 0: - #'poly in wrong direction' - if not tag: - tag = True #reversed cases should be ignore - poly = poly[(0, 3, 2, 1), :] - validated_polys.append(poly) - validated_tags.append(tag) - return np.array(validated_polys), np.array(validated_tags) - - def draw_img_polys(self, img, polys): - if len(img.shape) == 4: - img = np.squeeze(img, axis=0) - if img.shape[0] == 3: - img = img.transpose((1, 2, 0)) - img[:, :, 2] += 123.68 - img[:, :, 1] += 116.78 - img[:, :, 0] += 103.94 - cv2.imwrite("tmp.jpg", img) - img = cv2.imread("tmp.jpg") - for box in polys: - box = box.astype(np.int32).reshape((-1, 1, 2)) - cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2) - import random - ino = random.randint(0, 100) - cv2.imwrite("tmp_%d.jpg" % ino, img) - return - - def shrink_poly(self, poly, r): - """ - fit a poly inside the origin poly, maybe bugs here... - used for generate the score map - :param poly: the text poly - :param r: r in the paper - :return: the shrinked poly - """ - # shrink ratio - R = 0.3 - # find the longer pair - dist0 = np.linalg.norm(poly[0] - poly[1]) - dist1 = np.linalg.norm(poly[2] - poly[3]) - dist2 = np.linalg.norm(poly[0] - poly[3]) - dist3 = np.linalg.norm(poly[1] - poly[2]) - if dist0 + dist1 > dist2 + dist3: - # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2) - ## p0, p1 - theta = np.arctan2((poly[1][1] - poly[0][1]), - (poly[1][0] - poly[0][0])) - poly[0][0] += R * r[0] * np.cos(theta) - poly[0][1] += R * r[0] * np.sin(theta) - poly[1][0] -= R * r[1] * np.cos(theta) - poly[1][1] -= R * r[1] * np.sin(theta) - ## p2, p3 - theta = np.arctan2((poly[2][1] - poly[3][1]), - (poly[2][0] - poly[3][0])) - poly[3][0] += R * r[3] * np.cos(theta) - poly[3][1] += R * r[3] * np.sin(theta) - poly[2][0] -= R * r[2] * np.cos(theta) - poly[2][1] -= R * r[2] * np.sin(theta) - ## p0, p3 - theta = np.arctan2((poly[3][0] - poly[0][0]), - (poly[3][1] - poly[0][1])) - poly[0][0] += R * r[0] * np.sin(theta) - poly[0][1] += R * r[0] * np.cos(theta) - poly[3][0] -= R * r[3] * np.sin(theta) - poly[3][1] -= R * r[3] * np.cos(theta) - ## p1, p2 - theta = np.arctan2((poly[2][0] - poly[1][0]), - (poly[2][1] - poly[1][1])) - poly[1][0] += R * r[1] * np.sin(theta) - poly[1][1] += R * r[1] * np.cos(theta) - poly[2][0] -= R * r[2] * np.sin(theta) - poly[2][1] -= R * r[2] * np.cos(theta) - else: - ## p0, p3 - # print poly - theta = np.arctan2((poly[3][0] - poly[0][0]), - (poly[3][1] - poly[0][1])) - poly[0][0] += R * r[0] * np.sin(theta) - poly[0][1] += R * r[0] * np.cos(theta) - poly[3][0] -= R * r[3] * np.sin(theta) - poly[3][1] -= R * r[3] * np.cos(theta) - ## p1, p2 - theta = np.arctan2((poly[2][0] - poly[1][0]), - (poly[2][1] - poly[1][1])) - poly[1][0] += R * r[1] * np.sin(theta) - poly[1][1] += R * r[1] * np.cos(theta) - poly[2][0] -= R * r[2] * np.sin(theta) - poly[2][1] -= R * r[2] * np.cos(theta) - ## p0, p1 - theta = np.arctan2((poly[1][1] - poly[0][1]), - (poly[1][0] - poly[0][0])) - poly[0][0] += R * r[0] * np.cos(theta) - poly[0][1] += R * r[0] * np.sin(theta) - poly[1][0] -= R * r[1] * np.cos(theta) - poly[1][1] -= R * r[1] * np.sin(theta) - ## p2, p3 - theta = np.arctan2((poly[2][1] - poly[3][1]), - (poly[2][0] - poly[3][0])) - poly[3][0] += R * r[3] * np.cos(theta) - poly[3][1] += R * r[3] * np.sin(theta) - poly[2][0] -= R * r[2] * np.cos(theta) - poly[2][1] -= R * r[2] * np.sin(theta) - return poly - - def generate_quad(self, im_size, polys, tags): - """ - Generate quadrangle. - """ - h, w = im_size - poly_mask = np.zeros((h, w), dtype=np.uint8) - score_map = np.zeros((h, w), dtype=np.uint8) - # (x1, y1, ..., x4, y4, short_edge_norm) - geo_map = np.zeros((h, w, 9), dtype=np.float32) - # mask used during traning, to ignore some hard areas - training_mask = np.ones((h, w), dtype=np.uint8) - for poly_idx, poly_tag in enumerate(zip(polys, tags)): - poly = poly_tag[0] - tag = poly_tag[1] - - r = [None, None, None, None] - for i in range(4): - dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4]) - dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4]) - r[i] = min(dist1, dist2) - # score map - shrinked_poly = self.shrink_poly( - poly.copy(), r).astype(np.int32)[np.newaxis, :, :] - cv2.fillPoly(score_map, shrinked_poly, 1) - cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1) - # if the poly is too small, then ignore it during training - poly_h = min( - np.linalg.norm(poly[0] - poly[3]), - np.linalg.norm(poly[1] - poly[2])) - poly_w = min( - np.linalg.norm(poly[0] - poly[1]), - np.linalg.norm(poly[2] - poly[3])) - if min(poly_h, poly_w) < self.min_text_size: - cv2.fillPoly(training_mask, - poly.astype(np.int32)[np.newaxis, :, :], 0) - - if tag: - cv2.fillPoly(training_mask, - poly.astype(np.int32)[np.newaxis, :, :], 0) - - xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1)) - # geo map. - y_in_poly = xy_in_poly[:, 0] - x_in_poly = xy_in_poly[:, 1] - poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w) - poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h) - for pno in range(4): - geo_channel_beg = pno * 2 - geo_map[y_in_poly, x_in_poly, geo_channel_beg] =\ - x_in_poly - poly[pno, 0] - geo_map[y_in_poly, x_in_poly, geo_channel_beg+1] =\ - y_in_poly - poly[pno, 1] - geo_map[y_in_poly, x_in_poly, 8] = \ - 1.0 / max(min(poly_h, poly_w), 1.0) - return score_map, geo_map, training_mask - - def crop_area(self, im, polys, tags, crop_background=False, max_tries=50): - """ - make random crop from the input image - :param im: - :param polys: - :param tags: - :param crop_background: - :param max_tries: - :return: - """ - h, w, _ = im.shape - pad_h = h // 10 - pad_w = w // 10 - h_array = np.zeros((h + pad_h * 2), dtype=np.int32) - w_array = np.zeros((w + pad_w * 2), dtype=np.int32) - for poly in polys: - poly = np.round(poly, decimals=0).astype(np.int32) - minx = np.min(poly[:, 0]) - maxx = np.max(poly[:, 0]) - w_array[minx + pad_w:maxx + pad_w] = 1 - miny = np.min(poly[:, 1]) - maxy = np.max(poly[:, 1]) - h_array[miny + pad_h:maxy + pad_h] = 1 - # ensure the cropped area not across a text - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - if len(h_axis) == 0 or len(w_axis) == 0: - return im, polys, tags - - for i in range(max_tries): - xx = np.random.choice(w_axis, size=2) - xmin = np.min(xx) - pad_w - xmax = np.max(xx) - pad_w - xmin = np.clip(xmin, 0, w - 1) - xmax = np.clip(xmax, 0, w - 1) - yy = np.random.choice(h_axis, size=2) - ymin = np.min(yy) - pad_h - ymax = np.max(yy) - pad_h - ymin = np.clip(ymin, 0, h - 1) - ymax = np.clip(ymax, 0, h - 1) - if xmax - xmin < self.min_crop_side_ratio * w or \ - ymax - ymin < self.min_crop_side_ratio * h: - # area too small - continue - if polys.shape[0] != 0: - poly_axis_in_area = (polys[:, :, 0] >= xmin)\ - & (polys[:, :, 0] <= xmax)\ - & (polys[:, :, 1] >= ymin)\ - & (polys[:, :, 1] <= ymax) - selected_polys = np.where( - np.sum(poly_axis_in_area, axis=1) == 4)[0] - else: - selected_polys = [] - - if len(selected_polys) == 0: - # no text in this area - if crop_background: - im = im[ymin:ymax + 1, xmin:xmax + 1, :] - polys = [] - tags = [] - return im, polys, tags - else: - continue - - im = im[ymin:ymax + 1, xmin:xmax + 1, :] - polys = polys[selected_polys] - tags = tags[selected_polys] - polys[:, :, 0] -= xmin - polys[:, :, 1] -= ymin - return im, polys, tags - return im, polys, tags - - def crop_background_infor(self, im, text_polys, text_tags): - im, text_polys, text_tags = self.crop_area( - im, text_polys, text_tags, crop_background=True) - - if len(text_polys) > 0: - return None - # pad and resize image - input_size = self.input_size - im, ratio = self.preprocess(im) - score_map = np.zeros((input_size, input_size), dtype=np.float32) - geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32) - training_mask = np.ones((input_size, input_size), dtype=np.float32) - return im, score_map, geo_map, training_mask - - def crop_foreground_infor(self, im, text_polys, text_tags): - im, text_polys, text_tags = self.crop_area( - im, text_polys, text_tags, crop_background=False) - - if text_polys.shape[0] == 0: - return None - #continue for all ignore case - if np.sum((text_tags * 1.0)) >= text_tags.size: - return None - # pad and resize image - input_size = self.input_size - im, ratio = self.preprocess(im) - text_polys[:, :, 0] *= ratio - text_polys[:, :, 1] *= ratio - _, _, new_h, new_w = im.shape - # print(im.shape) - # self.draw_img_polys(im, text_polys) - score_map, geo_map, training_mask = self.generate_quad( - (new_h, new_w), text_polys, text_tags) - return im, score_map, geo_map, training_mask - - def __call__(self, data): - im = data['image'] - text_polys = data['polys'] - text_tags = data['ignore_tags'] - if im is None: - return None - if text_polys.shape[0] == 0: - return None - - #add rotate cases - if np.random.rand() < 0.5: - im, text_polys = self.rotate_im_poly(im, text_polys) - h, w, _ = im.shape - text_polys, text_tags = self.check_and_validate_polys(text_polys, - text_tags, h, w) - if text_polys.shape[0] == 0: - return None - - # random scale this image - rd_scale = np.random.choice(self.random_scale) - im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale) - text_polys *= rd_scale - if np.random.rand() < self.background_ratio: - outs = self.crop_background_infor(im, text_polys, text_tags) - else: - outs = self.crop_foreground_infor(im, text_polys, text_tags) - - if outs is None: - return None - im, score_map, geo_map, training_mask = outs - score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32) - geo_map = np.swapaxes(geo_map, 1, 2) - geo_map = np.swapaxes(geo_map, 1, 0) - geo_map = geo_map[:, ::4, ::4].astype(np.float32) - training_mask = training_mask[np.newaxis, ::4, ::4] - training_mask = training_mask.astype(np.float32) - - data['image'] = im[0] - data['score_map'] = score_map - data['geo_map'] = geo_map - data['training_mask'] = training_mask - return data diff --git a/backend/ppocr/data/imaug/fce_aug.py b/backend/ppocr/data/imaug/fce_aug.py deleted file mode 100644 index 66bafef..0000000 --- a/backend/ppocr/data/imaug/fce_aug.py +++ /dev/null @@ -1,564 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/transforms.py -""" -import numpy as np -from PIL import Image, ImageDraw -import cv2 -from shapely.geometry import Polygon -import math -from ppocr.utils.poly_nms import poly_intersection - - -class RandomScaling: - def __init__(self, size=800, scale=(3. / 4, 5. / 2), **kwargs): - """Random scale the image while keeping aspect. - - Args: - size (int) : Base size before scaling. - scale (tuple(float)) : The range of scaling. - """ - assert isinstance(size, int) - assert isinstance(scale, float) or isinstance(scale, tuple) - self.size = size - self.scale = scale if isinstance(scale, tuple) \ - else (1 - scale, 1 + scale) - - def __call__(self, data): - image = data['image'] - text_polys = data['polys'] - h, w, _ = image.shape - - aspect_ratio = np.random.uniform(min(self.scale), max(self.scale)) - scales = self.size * 1.0 / max(h, w) * aspect_ratio - scales = np.array([scales, scales]) - out_size = (int(h * scales[1]), int(w * scales[0])) - image = cv2.resize(image, out_size[::-1]) - - data['image'] = image - text_polys[:, :, 0::2] = text_polys[:, :, 0::2] * scales[1] - text_polys[:, :, 1::2] = text_polys[:, :, 1::2] * scales[0] - data['polys'] = text_polys - - return data - - -class RandomCropFlip: - def __init__(self, - pad_ratio=0.1, - crop_ratio=0.5, - iter_num=1, - min_area_ratio=0.2, - **kwargs): - """Random crop and flip a patch of the image. - - Args: - crop_ratio (float): The ratio of cropping. - iter_num (int): Number of operations. - min_area_ratio (float): Minimal area ratio between cropped patch - and original image. - """ - assert isinstance(crop_ratio, float) - assert isinstance(iter_num, int) - assert isinstance(min_area_ratio, float) - - self.pad_ratio = pad_ratio - self.epsilon = 1e-2 - self.crop_ratio = crop_ratio - self.iter_num = iter_num - self.min_area_ratio = min_area_ratio - - def __call__(self, results): - for i in range(self.iter_num): - results = self.random_crop_flip(results) - - return results - - def random_crop_flip(self, results): - image = results['image'] - polygons = results['polys'] - ignore_tags = results['ignore_tags'] - if len(polygons) == 0: - return results - - if np.random.random() >= self.crop_ratio: - return results - - h, w, _ = image.shape - area = h * w - pad_h = int(h * self.pad_ratio) - pad_w = int(w * self.pad_ratio) - h_axis, w_axis = self.generate_crop_target(image, polygons, pad_h, - pad_w) - if len(h_axis) == 0 or len(w_axis) == 0: - return results - - attempt = 0 - while attempt < 50: - attempt += 1 - polys_keep = [] - polys_new = [] - ignore_tags_keep = [] - ignore_tags_new = [] - xx = np.random.choice(w_axis, size=2) - xmin = np.min(xx) - pad_w - xmax = np.max(xx) - pad_w - xmin = np.clip(xmin, 0, w - 1) - xmax = np.clip(xmax, 0, w - 1) - yy = np.random.choice(h_axis, size=2) - ymin = np.min(yy) - pad_h - ymax = np.max(yy) - pad_h - ymin = np.clip(ymin, 0, h - 1) - ymax = np.clip(ymax, 0, h - 1) - if (xmax - xmin) * (ymax - ymin) < area * self.min_area_ratio: - # area too small - continue - - pts = np.stack([[xmin, xmax, xmax, xmin], - [ymin, ymin, ymax, ymax]]).T.astype(np.int32) - pp = Polygon(pts) - fail_flag = False - for polygon, ignore_tag in zip(polygons, ignore_tags): - ppi = Polygon(polygon.reshape(-1, 2)) - ppiou, _ = poly_intersection(ppi, pp, buffer=0) - if np.abs(ppiou - float(ppi.area)) > self.epsilon and \ - np.abs(ppiou) > self.epsilon: - fail_flag = True - break - elif np.abs(ppiou - float(ppi.area)) < self.epsilon: - polys_new.append(polygon) - ignore_tags_new.append(ignore_tag) - else: - polys_keep.append(polygon) - ignore_tags_keep.append(ignore_tag) - - if fail_flag: - continue - else: - break - - cropped = image[ymin:ymax, xmin:xmax, :] - select_type = np.random.randint(3) - if select_type == 0: - img = np.ascontiguousarray(cropped[:, ::-1]) - elif select_type == 1: - img = np.ascontiguousarray(cropped[::-1, :]) - else: - img = np.ascontiguousarray(cropped[::-1, ::-1]) - image[ymin:ymax, xmin:xmax, :] = img - results['img'] = image - - if len(polys_new) != 0: - height, width, _ = cropped.shape - if select_type == 0: - for idx, polygon in enumerate(polys_new): - poly = polygon.reshape(-1, 2) - poly[:, 0] = width - poly[:, 0] + 2 * xmin - polys_new[idx] = poly - elif select_type == 1: - for idx, polygon in enumerate(polys_new): - poly = polygon.reshape(-1, 2) - poly[:, 1] = height - poly[:, 1] + 2 * ymin - polys_new[idx] = poly - else: - for idx, polygon in enumerate(polys_new): - poly = polygon.reshape(-1, 2) - poly[:, 0] = width - poly[:, 0] + 2 * xmin - poly[:, 1] = height - poly[:, 1] + 2 * ymin - polys_new[idx] = poly - polygons = polys_keep + polys_new - ignore_tags = ignore_tags_keep + ignore_tags_new - results['polys'] = np.array(polygons) - results['ignore_tags'] = ignore_tags - - return results - - def generate_crop_target(self, image, all_polys, pad_h, pad_w): - """Generate crop target and make sure not to crop the polygon - instances. - - Args: - image (ndarray): The image waited to be crop. - all_polys (list[list[ndarray]]): All polygons including ground - truth polygons and ground truth ignored polygons. - pad_h (int): Padding length of height. - pad_w (int): Padding length of width. - Returns: - h_axis (ndarray): Vertical cropping range. - w_axis (ndarray): Horizontal cropping range. - """ - h, w, _ = image.shape - h_array = np.zeros((h + pad_h * 2), dtype=np.int32) - w_array = np.zeros((w + pad_w * 2), dtype=np.int32) - - text_polys = [] - for polygon in all_polys: - rect = cv2.minAreaRect(polygon.astype(np.int32).reshape(-1, 2)) - box = cv2.boxPoints(rect) - box = np.int0(box) - text_polys.append([box[0], box[1], box[2], box[3]]) - - polys = np.array(text_polys, dtype=np.int32) - for poly in polys: - poly = np.round(poly, decimals=0).astype(np.int32) - minx = np.min(poly[:, 0]) - maxx = np.max(poly[:, 0]) - w_array[minx + pad_w:maxx + pad_w] = 1 - miny = np.min(poly[:, 1]) - maxy = np.max(poly[:, 1]) - h_array[miny + pad_h:maxy + pad_h] = 1 - - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - return h_axis, w_axis - - -class RandomCropPolyInstances: - """Randomly crop images and make sure to contain at least one intact - instance.""" - - def __init__(self, crop_ratio=5.0 / 8.0, min_side_ratio=0.4, **kwargs): - super().__init__() - self.crop_ratio = crop_ratio - self.min_side_ratio = min_side_ratio - - def sample_valid_start_end(self, valid_array, min_len, max_start, min_end): - - assert isinstance(min_len, int) - assert len(valid_array) > min_len - - start_array = valid_array.copy() - max_start = min(len(start_array) - min_len, max_start) - start_array[max_start:] = 0 - start_array[0] = 1 - diff_array = np.hstack([0, start_array]) - np.hstack([start_array, 0]) - region_starts = np.where(diff_array < 0)[0] - region_ends = np.where(diff_array > 0)[0] - region_ind = np.random.randint(0, len(region_starts)) - start = np.random.randint(region_starts[region_ind], - region_ends[region_ind]) - - end_array = valid_array.copy() - min_end = max(start + min_len, min_end) - end_array[:min_end] = 0 - end_array[-1] = 1 - diff_array = np.hstack([0, end_array]) - np.hstack([end_array, 0]) - region_starts = np.where(diff_array < 0)[0] - region_ends = np.where(diff_array > 0)[0] - region_ind = np.random.randint(0, len(region_starts)) - end = np.random.randint(region_starts[region_ind], - region_ends[region_ind]) - return start, end - - def sample_crop_box(self, img_size, results): - """Generate crop box and make sure not to crop the polygon instances. - - Args: - img_size (tuple(int)): The image size (h, w). - results (dict): The results dict. - """ - - assert isinstance(img_size, tuple) - h, w = img_size[:2] - - key_masks = results['polys'] - - x_valid_array = np.ones(w, dtype=np.int32) - y_valid_array = np.ones(h, dtype=np.int32) - - selected_mask = key_masks[np.random.randint(0, len(key_masks))] - selected_mask = selected_mask.reshape((-1, 2)).astype(np.int32) - max_x_start = max(np.min(selected_mask[:, 0]) - 2, 0) - min_x_end = min(np.max(selected_mask[:, 0]) + 3, w - 1) - max_y_start = max(np.min(selected_mask[:, 1]) - 2, 0) - min_y_end = min(np.max(selected_mask[:, 1]) + 3, h - 1) - - for mask in key_masks: - mask = mask.reshape((-1, 2)).astype(np.int32) - clip_x = np.clip(mask[:, 0], 0, w - 1) - clip_y = np.clip(mask[:, 1], 0, h - 1) - min_x, max_x = np.min(clip_x), np.max(clip_x) - min_y, max_y = np.min(clip_y), np.max(clip_y) - - x_valid_array[min_x - 2:max_x + 3] = 0 - y_valid_array[min_y - 2:max_y + 3] = 0 - - min_w = int(w * self.min_side_ratio) - min_h = int(h * self.min_side_ratio) - - x1, x2 = self.sample_valid_start_end(x_valid_array, min_w, max_x_start, - min_x_end) - y1, y2 = self.sample_valid_start_end(y_valid_array, min_h, max_y_start, - min_y_end) - - return np.array([x1, y1, x2, y2]) - - def crop_img(self, img, bbox): - assert img.ndim == 3 - h, w, _ = img.shape - assert 0 <= bbox[1] < bbox[3] <= h - assert 0 <= bbox[0] < bbox[2] <= w - return img[bbox[1]:bbox[3], bbox[0]:bbox[2]] - - def __call__(self, results): - image = results['image'] - polygons = results['polys'] - ignore_tags = results['ignore_tags'] - if len(polygons) < 1: - return results - - if np.random.random_sample() < self.crop_ratio: - - crop_box = self.sample_crop_box(image.shape, results) - img = self.crop_img(image, crop_box) - results['image'] = img - # crop and filter masks - x1, y1, x2, y2 = crop_box - w = max(x2 - x1, 1) - h = max(y2 - y1, 1) - polygons[:, :, 0::2] = polygons[:, :, 0::2] - x1 - polygons[:, :, 1::2] = polygons[:, :, 1::2] - y1 - - valid_masks_list = [] - valid_tags_list = [] - for ind, polygon in enumerate(polygons): - if (polygon[:, ::2] > -4).all() and ( - polygon[:, ::2] < w + 4).all() and ( - polygon[:, 1::2] > -4).all() and ( - polygon[:, 1::2] < h + 4).all(): - polygon[:, ::2] = np.clip(polygon[:, ::2], 0, w) - polygon[:, 1::2] = np.clip(polygon[:, 1::2], 0, h) - valid_masks_list.append(polygon) - valid_tags_list.append(ignore_tags[ind]) - - results['polys'] = np.array(valid_masks_list) - results['ignore_tags'] = valid_tags_list - - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - return repr_str - - -class RandomRotatePolyInstances: - def __init__(self, - rotate_ratio=0.5, - max_angle=10, - pad_with_fixed_color=False, - pad_value=(0, 0, 0), - **kwargs): - """Randomly rotate images and polygon masks. - - Args: - rotate_ratio (float): The ratio of samples to operate rotation. - max_angle (int): The maximum rotation angle. - pad_with_fixed_color (bool): The flag for whether to pad rotated - image with fixed value. If set to False, the rotated image will - be padded onto cropped image. - pad_value (tuple(int)): The color value for padding rotated image. - """ - self.rotate_ratio = rotate_ratio - self.max_angle = max_angle - self.pad_with_fixed_color = pad_with_fixed_color - self.pad_value = pad_value - - def rotate(self, center, points, theta, center_shift=(0, 0)): - # rotate points. - (center_x, center_y) = center - center_y = -center_y - x, y = points[:, ::2], points[:, 1::2] - y = -y - - theta = theta / 180 * math.pi - cos = math.cos(theta) - sin = math.sin(theta) - - x = (x - center_x) - y = (y - center_y) - - _x = center_x + x * cos - y * sin + center_shift[0] - _y = -(center_y + x * sin + y * cos) + center_shift[1] - - points[:, ::2], points[:, 1::2] = _x, _y - return points - - def cal_canvas_size(self, ori_size, degree): - assert isinstance(ori_size, tuple) - angle = degree * math.pi / 180.0 - h, w = ori_size[:2] - - cos = math.cos(angle) - sin = math.sin(angle) - canvas_h = int(w * math.fabs(sin) + h * math.fabs(cos)) - canvas_w = int(w * math.fabs(cos) + h * math.fabs(sin)) - - canvas_size = (canvas_h, canvas_w) - return canvas_size - - def sample_angle(self, max_angle): - angle = np.random.random_sample() * 2 * max_angle - max_angle - return angle - - def rotate_img(self, img, angle, canvas_size): - h, w = img.shape[:2] - rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) - rotation_matrix[0, 2] += int((canvas_size[1] - w) / 2) - rotation_matrix[1, 2] += int((canvas_size[0] - h) / 2) - - if self.pad_with_fixed_color: - target_img = cv2.warpAffine( - img, - rotation_matrix, (canvas_size[1], canvas_size[0]), - flags=cv2.INTER_NEAREST, - borderValue=self.pad_value) - else: - mask = np.zeros_like(img) - (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8), - np.random.randint(0, w * 7 // 8)) - img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)] - img_cut = cv2.resize(img_cut, (canvas_size[1], canvas_size[0])) - - mask = cv2.warpAffine( - mask, - rotation_matrix, (canvas_size[1], canvas_size[0]), - borderValue=[1, 1, 1]) - target_img = cv2.warpAffine( - img, - rotation_matrix, (canvas_size[1], canvas_size[0]), - borderValue=[0, 0, 0]) - target_img = target_img + img_cut * mask - - return target_img - - def __call__(self, results): - if np.random.random_sample() < self.rotate_ratio: - image = results['image'] - polygons = results['polys'] - h, w = image.shape[:2] - - angle = self.sample_angle(self.max_angle) - canvas_size = self.cal_canvas_size((h, w), angle) - center_shift = (int((canvas_size[1] - w) / 2), int( - (canvas_size[0] - h) / 2)) - image = self.rotate_img(image, angle, canvas_size) - results['image'] = image - # rotate polygons - rotated_masks = [] - for mask in polygons: - rotated_mask = self.rotate((w / 2, h / 2), mask, angle, - center_shift) - rotated_masks.append(rotated_mask) - results['polys'] = np.array(rotated_masks) - - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - return repr_str - - -class SquareResizePad: - def __init__(self, - target_size, - pad_ratio=0.6, - pad_with_fixed_color=False, - pad_value=(0, 0, 0), - **kwargs): - """Resize or pad images to be square shape. - - Args: - target_size (int): The target size of square shaped image. - pad_with_fixed_color (bool): The flag for whether to pad rotated - image with fixed value. If set to False, the rescales image will - be padded onto cropped image. - pad_value (tuple(int)): The color value for padding rotated image. - """ - assert isinstance(target_size, int) - assert isinstance(pad_ratio, float) - assert isinstance(pad_with_fixed_color, bool) - assert isinstance(pad_value, tuple) - - self.target_size = target_size - self.pad_ratio = pad_ratio - self.pad_with_fixed_color = pad_with_fixed_color - self.pad_value = pad_value - - def resize_img(self, img, keep_ratio=True): - h, w, _ = img.shape - if keep_ratio: - t_h = self.target_size if h >= w else int(h * self.target_size / w) - t_w = self.target_size if h <= w else int(w * self.target_size / h) - else: - t_h = t_w = self.target_size - img = cv2.resize(img, (t_w, t_h)) - return img, (t_h, t_w) - - def square_pad(self, img): - h, w = img.shape[:2] - if h == w: - return img, (0, 0) - pad_size = max(h, w) - if self.pad_with_fixed_color: - expand_img = np.ones((pad_size, pad_size, 3), dtype=np.uint8) - expand_img[:] = self.pad_value - else: - (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8), - np.random.randint(0, w * 7 // 8)) - img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)] - expand_img = cv2.resize(img_cut, (pad_size, pad_size)) - if h > w: - y0, x0 = 0, (h - w) // 2 - else: - y0, x0 = (w - h) // 2, 0 - expand_img[y0:y0 + h, x0:x0 + w] = img - offset = (x0, y0) - - return expand_img, offset - - def square_pad_mask(self, points, offset): - x0, y0 = offset - pad_points = points.copy() - pad_points[::2] = pad_points[::2] + x0 - pad_points[1::2] = pad_points[1::2] + y0 - return pad_points - - def __call__(self, results): - image = results['image'] - polygons = results['polys'] - h, w = image.shape[:2] - - if np.random.random_sample() < self.pad_ratio: - image, out_size = self.resize_img(image, keep_ratio=True) - image, offset = self.square_pad(image) - else: - image, out_size = self.resize_img(image, keep_ratio=False) - offset = (0, 0) - results['image'] = image - try: - polygons[:, :, 0::2] = polygons[:, :, 0::2] * out_size[ - 1] / w + offset[0] - polygons[:, :, 1::2] = polygons[:, :, 1::2] * out_size[ - 0] / h + offset[1] - except: - pass - results['polys'] = polygons - - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - return repr_str diff --git a/backend/ppocr/data/imaug/fce_targets.py b/backend/ppocr/data/imaug/fce_targets.py deleted file mode 100644 index 1818480..0000000 --- a/backend/ppocr/data/imaug/fce_targets.py +++ /dev/null @@ -1,658 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/fcenet_targets.py -""" - -import cv2 -import numpy as np -from numpy.fft import fft -from numpy.linalg import norm -import sys - - -class FCENetTargets: - """Generate the ground truth targets of FCENet: Fourier Contour Embedding - for Arbitrary-Shaped Text Detection. - - [https://arxiv.org/abs/2104.10442] - - Args: - fourier_degree (int): The maximum Fourier transform degree k. - resample_step (float): The step size for resampling the text center - line (TCL). It's better not to exceed half of the minimum width. - center_region_shrink_ratio (float): The shrink ratio of text center - region. - level_size_divisors (tuple(int)): The downsample ratio on each level. - level_proportion_range (tuple(tuple(int))): The range of text sizes - assigned to each level. - """ - - def __init__(self, - fourier_degree=5, - resample_step=4.0, - center_region_shrink_ratio=0.3, - level_size_divisors=(8, 16, 32), - level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0)), - orientation_thr=2.0, - **kwargs): - - super().__init__() - assert isinstance(level_size_divisors, tuple) - assert isinstance(level_proportion_range, tuple) - assert len(level_size_divisors) == len(level_proportion_range) - self.fourier_degree = fourier_degree - self.resample_step = resample_step - self.center_region_shrink_ratio = center_region_shrink_ratio - self.level_size_divisors = level_size_divisors - self.level_proportion_range = level_proportion_range - - self.orientation_thr = orientation_thr - - def vector_angle(self, vec1, vec2): - if vec1.ndim > 1: - unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8).reshape((-1, 1)) - else: - unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8) - if vec2.ndim > 1: - unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8).reshape((-1, 1)) - else: - unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8) - return np.arccos( - np.clip( - np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0)) - - def resample_line(self, line, n): - """Resample n points on a line. - - Args: - line (ndarray): The points composing a line. - n (int): The resampled points number. - - Returns: - resampled_line (ndarray): The points composing the resampled line. - """ - - assert line.ndim == 2 - assert line.shape[0] >= 2 - assert line.shape[1] == 2 - assert isinstance(n, int) - assert n > 0 - - length_list = [ - norm(line[i + 1] - line[i]) for i in range(len(line) - 1) - ] - total_length = sum(length_list) - length_cumsum = np.cumsum([0.0] + length_list) - delta_length = total_length / (float(n) + 1e-8) - - current_edge_ind = 0 - resampled_line = [line[0]] - - for i in range(1, n): - current_line_len = i * delta_length - - while current_line_len >= length_cumsum[current_edge_ind + 1]: - current_edge_ind += 1 - current_edge_end_shift = current_line_len - length_cumsum[ - current_edge_ind] - end_shift_ratio = current_edge_end_shift / length_list[ - current_edge_ind] - current_point = line[current_edge_ind] + (line[current_edge_ind + 1] - - line[current_edge_ind] - ) * end_shift_ratio - resampled_line.append(current_point) - - resampled_line.append(line[-1]) - resampled_line = np.array(resampled_line) - - return resampled_line - - def reorder_poly_edge(self, points): - """Get the respective points composing head edge, tail edge, top - sideline and bottom sideline. - - Args: - points (ndarray): The points composing a text polygon. - - Returns: - head_edge (ndarray): The two points composing the head edge of text - polygon. - tail_edge (ndarray): The two points composing the tail edge of text - polygon. - top_sideline (ndarray): The points composing top curved sideline of - text polygon. - bot_sideline (ndarray): The points composing bottom curved sideline - of text polygon. - """ - - assert points.ndim == 2 - assert points.shape[0] >= 4 - assert points.shape[1] == 2 - - head_inds, tail_inds = self.find_head_tail(points, self.orientation_thr) - head_edge, tail_edge = points[head_inds], points[tail_inds] - - pad_points = np.vstack([points, points]) - if tail_inds[1] < 1: - tail_inds[1] = len(points) - sideline1 = pad_points[head_inds[1]:tail_inds[1]] - sideline2 = pad_points[tail_inds[1]:(head_inds[1] + len(points))] - sideline_mean_shift = np.mean( - sideline1, axis=0) - np.mean( - sideline2, axis=0) - - if sideline_mean_shift[1] > 0: - top_sideline, bot_sideline = sideline2, sideline1 - else: - top_sideline, bot_sideline = sideline1, sideline2 - - return head_edge, tail_edge, top_sideline, bot_sideline - - def find_head_tail(self, points, orientation_thr): - """Find the head edge and tail edge of a text polygon. - - Args: - points (ndarray): The points composing a text polygon. - orientation_thr (float): The threshold for distinguishing between - head edge and tail edge among the horizontal and vertical edges - of a quadrangle. - - Returns: - head_inds (list): The indexes of two points composing head edge. - tail_inds (list): The indexes of two points composing tail edge. - """ - - assert points.ndim == 2 - assert points.shape[0] >= 4 - assert points.shape[1] == 2 - assert isinstance(orientation_thr, float) - - if len(points) > 4: - pad_points = np.vstack([points, points[0]]) - edge_vec = pad_points[1:] - pad_points[:-1] - - theta_sum = [] - adjacent_vec_theta = [] - for i, edge_vec1 in enumerate(edge_vec): - adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]] - adjacent_edge_vec = edge_vec[adjacent_ind] - temp_theta_sum = np.sum( - self.vector_angle(edge_vec1, adjacent_edge_vec)) - temp_adjacent_theta = self.vector_angle(adjacent_edge_vec[0], - adjacent_edge_vec[1]) - theta_sum.append(temp_theta_sum) - adjacent_vec_theta.append(temp_adjacent_theta) - theta_sum_score = np.array(theta_sum) / np.pi - adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi - poly_center = np.mean(points, axis=0) - edge_dist = np.maximum( - norm( - pad_points[1:] - poly_center, axis=-1), - norm( - pad_points[:-1] - poly_center, axis=-1)) - dist_score = edge_dist / np.max(edge_dist) - position_score = np.zeros(len(edge_vec)) - score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score - score += 0.35 * dist_score - if len(points) % 2 == 0: - position_score[(len(score) // 2 - 1)] += 1 - position_score[-1] += 1 - score += 0.1 * position_score - pad_score = np.concatenate([score, score]) - score_matrix = np.zeros((len(score), len(score) - 3)) - x = np.arange(len(score) - 3) / float(len(score) - 4) - gaussian = 1. / (np.sqrt(2. * np.pi) * 0.5) * np.exp(-np.power( - (x - 0.5) / 0.5, 2.) / 2) - gaussian = gaussian / np.max(gaussian) - for i in range(len(score)): - score_matrix[i, :] = score[i] + pad_score[(i + 2):(i + len( - score) - 1)] * gaussian * 0.3 - - head_start, tail_increment = np.unravel_index(score_matrix.argmax(), - score_matrix.shape) - tail_start = (head_start + tail_increment + 2) % len(points) - head_end = (head_start + 1) % len(points) - tail_end = (tail_start + 1) % len(points) - - if head_end > tail_end: - head_start, tail_start = tail_start, head_start - head_end, tail_end = tail_end, head_end - head_inds = [head_start, head_end] - tail_inds = [tail_start, tail_end] - else: - if self.vector_slope(points[1] - points[0]) + self.vector_slope( - points[3] - points[2]) < self.vector_slope(points[ - 2] - points[1]) + self.vector_slope(points[0] - points[ - 3]): - horizontal_edge_inds = [[0, 1], [2, 3]] - vertical_edge_inds = [[3, 0], [1, 2]] - else: - horizontal_edge_inds = [[3, 0], [1, 2]] - vertical_edge_inds = [[0, 1], [2, 3]] - - vertical_len_sum = norm(points[vertical_edge_inds[0][0]] - points[ - vertical_edge_inds[0][1]]) + norm(points[vertical_edge_inds[1][ - 0]] - points[vertical_edge_inds[1][1]]) - horizontal_len_sum = norm(points[horizontal_edge_inds[0][ - 0]] - points[horizontal_edge_inds[0][1]]) + norm(points[ - horizontal_edge_inds[1][0]] - points[horizontal_edge_inds[1] - [1]]) - - if vertical_len_sum > horizontal_len_sum * orientation_thr: - head_inds = horizontal_edge_inds[0] - tail_inds = horizontal_edge_inds[1] - else: - head_inds = vertical_edge_inds[0] - tail_inds = vertical_edge_inds[1] - - return head_inds, tail_inds - - def resample_sidelines(self, sideline1, sideline2, resample_step): - """Resample two sidelines to be of the same points number according to - step size. - - Args: - sideline1 (ndarray): The points composing a sideline of a text - polygon. - sideline2 (ndarray): The points composing another sideline of a - text polygon. - resample_step (float): The resampled step size. - - Returns: - resampled_line1 (ndarray): The resampled line 1. - resampled_line2 (ndarray): The resampled line 2. - """ - - assert sideline1.ndim == sideline2.ndim == 2 - assert sideline1.shape[1] == sideline2.shape[1] == 2 - assert sideline1.shape[0] >= 2 - assert sideline2.shape[0] >= 2 - assert isinstance(resample_step, float) - - length1 = sum([ - norm(sideline1[i + 1] - sideline1[i]) - for i in range(len(sideline1) - 1) - ]) - length2 = sum([ - norm(sideline2[i + 1] - sideline2[i]) - for i in range(len(sideline2) - 1) - ]) - - total_length = (length1 + length2) / 2 - resample_point_num = max(int(float(total_length) / resample_step), 1) - - resampled_line1 = self.resample_line(sideline1, resample_point_num) - resampled_line2 = self.resample_line(sideline2, resample_point_num) - - return resampled_line1, resampled_line2 - - def generate_center_region_mask(self, img_size, text_polys): - """Generate text center region mask. - - Args: - img_size (tuple): The image size of (height, width). - text_polys (list[list[ndarray]]): The list of text polygons. - - Returns: - center_region_mask (ndarray): The text center region mask. - """ - - assert isinstance(img_size, tuple) - # assert check_argument.is_2dlist(text_polys) - - h, w = img_size - - center_region_mask = np.zeros((h, w), np.uint8) - - center_region_boxes = [] - for poly in text_polys: - # assert len(poly) == 1 - polygon_points = poly.reshape(-1, 2) - _, _, top_line, bot_line = self.reorder_poly_edge(polygon_points) - resampled_top_line, resampled_bot_line = self.resample_sidelines( - top_line, bot_line, self.resample_step) - resampled_bot_line = resampled_bot_line[::-1] - center_line = (resampled_top_line + resampled_bot_line) / 2 - - line_head_shrink_len = norm(resampled_top_line[0] - - resampled_bot_line[0]) / 4.0 - line_tail_shrink_len = norm(resampled_top_line[-1] - - resampled_bot_line[-1]) / 4.0 - head_shrink_num = int(line_head_shrink_len // self.resample_step) - tail_shrink_num = int(line_tail_shrink_len // self.resample_step) - if len(center_line) > head_shrink_num + tail_shrink_num + 2: - center_line = center_line[head_shrink_num:len(center_line) - - tail_shrink_num] - resampled_top_line = resampled_top_line[head_shrink_num:len( - resampled_top_line) - tail_shrink_num] - resampled_bot_line = resampled_bot_line[head_shrink_num:len( - resampled_bot_line) - tail_shrink_num] - - for i in range(0, len(center_line) - 1): - tl = center_line[i] + (resampled_top_line[i] - center_line[i] - ) * self.center_region_shrink_ratio - tr = center_line[i + 1] + (resampled_top_line[i + 1] - - center_line[i + 1] - ) * self.center_region_shrink_ratio - br = center_line[i + 1] + (resampled_bot_line[i + 1] - - center_line[i + 1] - ) * self.center_region_shrink_ratio - bl = center_line[i] + (resampled_bot_line[i] - center_line[i] - ) * self.center_region_shrink_ratio - current_center_box = np.vstack([tl, tr, br, - bl]).astype(np.int32) - center_region_boxes.append(current_center_box) - - cv2.fillPoly(center_region_mask, center_region_boxes, 1) - return center_region_mask - - def resample_polygon(self, polygon, n=400): - """Resample one polygon with n points on its boundary. - - Args: - polygon (list[float]): The input polygon. - n (int): The number of resampled points. - Returns: - resampled_polygon (list[float]): The resampled polygon. - """ - length = [] - - for i in range(len(polygon)): - p1 = polygon[i] - if i == len(polygon) - 1: - p2 = polygon[0] - else: - p2 = polygon[i + 1] - length.append(((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)**0.5) - - total_length = sum(length) - n_on_each_line = (np.array(length) / (total_length + 1e-8)) * n - n_on_each_line = n_on_each_line.astype(np.int32) - new_polygon = [] - - for i in range(len(polygon)): - num = n_on_each_line[i] - p1 = polygon[i] - if i == len(polygon) - 1: - p2 = polygon[0] - else: - p2 = polygon[i + 1] - - if num == 0: - continue - - dxdy = (p2 - p1) / num - for j in range(num): - point = p1 + dxdy * j - new_polygon.append(point) - - return np.array(new_polygon) - - def normalize_polygon(self, polygon): - """Normalize one polygon so that its start point is at right most. - - Args: - polygon (list[float]): The origin polygon. - Returns: - new_polygon (lost[float]): The polygon with start point at right. - """ - temp_polygon = polygon - polygon.mean(axis=0) - x = np.abs(temp_polygon[:, 0]) - y = temp_polygon[:, 1] - index_x = np.argsort(x) - index_y = np.argmin(y[index_x[:8]]) - index = index_x[index_y] - new_polygon = np.concatenate([polygon[index:], polygon[:index]]) - return new_polygon - - def poly2fourier(self, polygon, fourier_degree): - """Perform Fourier transformation to generate Fourier coefficients ck - from polygon. - - Args: - polygon (ndarray): An input polygon. - fourier_degree (int): The maximum Fourier degree K. - Returns: - c (ndarray(complex)): Fourier coefficients. - """ - points = polygon[:, 0] + polygon[:, 1] * 1j - c_fft = fft(points) / len(points) - c = np.hstack((c_fft[-fourier_degree:], c_fft[:fourier_degree + 1])) - return c - - def clockwise(self, c, fourier_degree): - """Make sure the polygon reconstructed from Fourier coefficients c in - the clockwise direction. - - Args: - polygon (list[float]): The origin polygon. - Returns: - new_polygon (lost[float]): The polygon in clockwise point order. - """ - if np.abs(c[fourier_degree + 1]) > np.abs(c[fourier_degree - 1]): - return c - elif np.abs(c[fourier_degree + 1]) < np.abs(c[fourier_degree - 1]): - return c[::-1] - else: - if np.abs(c[fourier_degree + 2]) > np.abs(c[fourier_degree - 2]): - return c - else: - return c[::-1] - - def cal_fourier_signature(self, polygon, fourier_degree): - """Calculate Fourier signature from input polygon. - - Args: - polygon (ndarray): The input polygon. - fourier_degree (int): The maximum Fourier degree K. - Returns: - fourier_signature (ndarray): An array shaped (2k+1, 2) containing - real part and image part of 2k+1 Fourier coefficients. - """ - resampled_polygon = self.resample_polygon(polygon) - resampled_polygon = self.normalize_polygon(resampled_polygon) - - fourier_coeff = self.poly2fourier(resampled_polygon, fourier_degree) - fourier_coeff = self.clockwise(fourier_coeff, fourier_degree) - - real_part = np.real(fourier_coeff).reshape((-1, 1)) - image_part = np.imag(fourier_coeff).reshape((-1, 1)) - fourier_signature = np.hstack([real_part, image_part]) - - return fourier_signature - - def generate_fourier_maps(self, img_size, text_polys): - """Generate Fourier coefficient maps. - - Args: - img_size (tuple): The image size of (height, width). - text_polys (list[list[ndarray]]): The list of text polygons. - - Returns: - fourier_real_map (ndarray): The Fourier coefficient real part maps. - fourier_image_map (ndarray): The Fourier coefficient image part - maps. - """ - - assert isinstance(img_size, tuple) - - h, w = img_size - k = self.fourier_degree - real_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) - imag_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) - - for poly in text_polys: - mask = np.zeros((h, w), dtype=np.uint8) - polygon = np.array(poly).reshape((1, -1, 2)) - cv2.fillPoly(mask, polygon.astype(np.int32), 1) - fourier_coeff = self.cal_fourier_signature(polygon[0], k) - for i in range(-k, k + 1): - if i != 0: - real_map[i + k, :, :] = mask * fourier_coeff[i + k, 0] + ( - 1 - mask) * real_map[i + k, :, :] - imag_map[i + k, :, :] = mask * fourier_coeff[i + k, 1] + ( - 1 - mask) * imag_map[i + k, :, :] - else: - yx = np.argwhere(mask > 0.5) - k_ind = np.ones((len(yx)), dtype=np.int64) * k - y, x = yx[:, 0], yx[:, 1] - real_map[k_ind, y, x] = fourier_coeff[k, 0] - x - imag_map[k_ind, y, x] = fourier_coeff[k, 1] - y - - return real_map, imag_map - - def generate_text_region_mask(self, img_size, text_polys): - """Generate text center region mask and geometry attribute maps. - - Args: - img_size (tuple): The image size (height, width). - text_polys (list[list[ndarray]]): The list of text polygons. - - Returns: - text_region_mask (ndarray): The text region mask. - """ - - assert isinstance(img_size, tuple) - - h, w = img_size - text_region_mask = np.zeros((h, w), dtype=np.uint8) - - for poly in text_polys: - polygon = np.array(poly, dtype=np.int32).reshape((1, -1, 2)) - cv2.fillPoly(text_region_mask, polygon, 1) - - return text_region_mask - - def generate_effective_mask(self, mask_size: tuple, polygons_ignore): - """Generate effective mask by setting the ineffective regions to 0 and - effective regions to 1. - - Args: - mask_size (tuple): The mask size. - polygons_ignore (list[[ndarray]]: The list of ignored text - polygons. - - Returns: - mask (ndarray): The effective mask of (height, width). - """ - - mask = np.ones(mask_size, dtype=np.uint8) - - for poly in polygons_ignore: - instance = poly.reshape(-1, 2).astype(np.int32).reshape(1, -1, 2) - cv2.fillPoly(mask, instance, 0) - - return mask - - def generate_level_targets(self, img_size, text_polys, ignore_polys): - """Generate ground truth target on each level. - - Args: - img_size (list[int]): Shape of input image. - text_polys (list[list[ndarray]]): A list of ground truth polygons. - ignore_polys (list[list[ndarray]]): A list of ignored polygons. - Returns: - level_maps (list(ndarray)): A list of ground target on each level. - """ - h, w = img_size - lv_size_divs = self.level_size_divisors - lv_proportion_range = self.level_proportion_range - lv_text_polys = [[] for i in range(len(lv_size_divs))] - lv_ignore_polys = [[] for i in range(len(lv_size_divs))] - level_maps = [] - for poly in text_polys: - polygon = np.array(poly, dtype=np.int).reshape((1, -1, 2)) - _, _, box_w, box_h = cv2.boundingRect(polygon) - proportion = max(box_h, box_w) / (h + 1e-8) - - for ind, proportion_range in enumerate(lv_proportion_range): - if proportion_range[0] < proportion < proportion_range[1]: - lv_text_polys[ind].append(poly / lv_size_divs[ind]) - - for ignore_poly in ignore_polys: - polygon = np.array(ignore_poly, dtype=np.int).reshape((1, -1, 2)) - _, _, box_w, box_h = cv2.boundingRect(polygon) - proportion = max(box_h, box_w) / (h + 1e-8) - - for ind, proportion_range in enumerate(lv_proportion_range): - if proportion_range[0] < proportion < proportion_range[1]: - lv_ignore_polys[ind].append(ignore_poly / lv_size_divs[ind]) - - for ind, size_divisor in enumerate(lv_size_divs): - current_level_maps = [] - level_img_size = (h // size_divisor, w // size_divisor) - - text_region = self.generate_text_region_mask( - level_img_size, lv_text_polys[ind])[None] - current_level_maps.append(text_region) - - center_region = self.generate_center_region_mask( - level_img_size, lv_text_polys[ind])[None] - current_level_maps.append(center_region) - - effective_mask = self.generate_effective_mask( - level_img_size, lv_ignore_polys[ind])[None] - current_level_maps.append(effective_mask) - - fourier_real_map, fourier_image_maps = self.generate_fourier_maps( - level_img_size, lv_text_polys[ind]) - current_level_maps.append(fourier_real_map) - current_level_maps.append(fourier_image_maps) - - level_maps.append(np.concatenate(current_level_maps)) - - return level_maps - - def generate_targets(self, results): - """Generate the ground truth targets for FCENet. - - Args: - results (dict): The input result dictionary. - - Returns: - results (dict): The output result dictionary. - """ - - assert isinstance(results, dict) - image = results['image'] - polygons = results['polys'] - ignore_tags = results['ignore_tags'] - h, w, _ = image.shape - - polygon_masks = [] - polygon_masks_ignore = [] - for tag, polygon in zip(ignore_tags, polygons): - if tag is True: - polygon_masks_ignore.append(polygon) - else: - polygon_masks.append(polygon) - - level_maps = self.generate_level_targets((h, w), polygon_masks, - polygon_masks_ignore) - - mapping = { - 'p3_maps': level_maps[0], - 'p4_maps': level_maps[1], - 'p5_maps': level_maps[2] - } - for key, value in mapping.items(): - results[key] = value - - return results - - def __call__(self, results): - results = self.generate_targets(results) - return results diff --git a/backend/ppocr/data/imaug/gen_table_mask.py b/backend/ppocr/data/imaug/gen_table_mask.py deleted file mode 100644 index 08e35d5..0000000 --- a/backend/ppocr/data/imaug/gen_table_mask.py +++ /dev/null @@ -1,244 +0,0 @@ -""" -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys -import six -import cv2 -import numpy as np - - -class GenTableMask(object): - """ gen table mask """ - - def __init__(self, shrink_h_max, shrink_w_max, mask_type=0, **kwargs): - self.shrink_h_max = 5 - self.shrink_w_max = 5 - self.mask_type = mask_type - - def projection(self, erosion, h, w, spilt_threshold=0): - # 水平投影 - projection_map = np.ones_like(erosion) - project_val_array = [0 for _ in range(0, h)] - - for j in range(0, h): - for i in range(0, w): - if erosion[j, i] == 255: - project_val_array[j] += 1 - # 根据数组,获取切割点 - start_idx = 0 # 记录进入字符区的索引 - end_idx = 0 # 记录进入空白区域的索引 - in_text = False # 是否遍历到了字符区内 - box_list = [] - for i in range(len(project_val_array)): - if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了 - in_text = True - start_idx = i - elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了 - end_idx = i - in_text = False - if end_idx - start_idx <= 2: - continue - box_list.append((start_idx, end_idx + 1)) - - if in_text: - box_list.append((start_idx, h - 1)) - # 绘制投影直方图 - for j in range(0, h): - for i in range(0, project_val_array[j]): - projection_map[j, i] = 0 - return box_list, projection_map - - def projection_cx(self, box_img): - box_gray_img = cv2.cvtColor(box_img, cv2.COLOR_BGR2GRAY) - h, w = box_gray_img.shape - # 灰度图片进行二值化处理 - ret, thresh1 = cv2.threshold(box_gray_img, 200, 255, cv2.THRESH_BINARY_INV) - # 纵向腐蚀 - if h < w: - kernel = np.ones((2, 1), np.uint8) - erode = cv2.erode(thresh1, kernel, iterations=1) - else: - erode = thresh1 - # 水平膨胀 - kernel = np.ones((1, 5), np.uint8) - erosion = cv2.dilate(erode, kernel, iterations=1) - # 水平投影 - projection_map = np.ones_like(erosion) - project_val_array = [0 for _ in range(0, h)] - - for j in range(0, h): - for i in range(0, w): - if erosion[j, i] == 255: - project_val_array[j] += 1 - # 根据数组,获取切割点 - start_idx = 0 # 记录进入字符区的索引 - end_idx = 0 # 记录进入空白区域的索引 - in_text = False # 是否遍历到了字符区内 - box_list = [] - spilt_threshold = 0 - for i in range(len(project_val_array)): - if in_text == False and project_val_array[i] > spilt_threshold: # 进入字符区了 - in_text = True - start_idx = i - elif project_val_array[i] <= spilt_threshold and in_text == True: # 进入空白区了 - end_idx = i - in_text = False - if end_idx - start_idx <= 2: - continue - box_list.append((start_idx, end_idx + 1)) - - if in_text: - box_list.append((start_idx, h - 1)) - # 绘制投影直方图 - for j in range(0, h): - for i in range(0, project_val_array[j]): - projection_map[j, i] = 0 - split_bbox_list = [] - if len(box_list) > 1: - for i, (h_start, h_end) in enumerate(box_list): - if i == 0: - h_start = 0 - if i == len(box_list): - h_end = h - word_img = erosion[h_start:h_end + 1, :] - word_h, word_w = word_img.shape - w_split_list, w_projection_map = self.projection(word_img.T, word_w, word_h) - w_start, w_end = w_split_list[0][0], w_split_list[-1][1] - if h_start > 0: - h_start -= 1 - h_end += 1 - word_img = box_img[h_start:h_end + 1:, w_start:w_end + 1, :] - split_bbox_list.append([w_start, h_start, w_end, h_end]) - else: - split_bbox_list.append([0, 0, w, h]) - return split_bbox_list - - def shrink_bbox(self, bbox): - left, top, right, bottom = bbox - sh_h = min(max(int((bottom - top) * 0.1), 1), self.shrink_h_max) - sh_w = min(max(int((right - left) * 0.1), 1), self.shrink_w_max) - left_new = left + sh_w - right_new = right - sh_w - top_new = top + sh_h - bottom_new = bottom - sh_h - if left_new >= right_new: - left_new = left - right_new = right - if top_new >= bottom_new: - top_new = top - bottom_new = bottom - return [left_new, top_new, right_new, bottom_new] - - def __call__(self, data): - img = data['image'] - cells = data['cells'] - height, width = img.shape[0:2] - if self.mask_type == 1: - mask_img = np.zeros((height, width), dtype=np.float32) - else: - mask_img = np.zeros((height, width, 3), dtype=np.float32) - cell_num = len(cells) - for cno in range(cell_num): - if "bbox" in cells[cno]: - bbox = cells[cno]['bbox'] - left, top, right, bottom = bbox - box_img = img[top:bottom, left:right, :].copy() - split_bbox_list = self.projection_cx(box_img) - for sno in range(len(split_bbox_list)): - split_bbox_list[sno][0] += left - split_bbox_list[sno][1] += top - split_bbox_list[sno][2] += left - split_bbox_list[sno][3] += top - - for sno in range(len(split_bbox_list)): - left, top, right, bottom = split_bbox_list[sno] - left, top, right, bottom = self.shrink_bbox([left, top, right, bottom]) - if self.mask_type == 1: - mask_img[top:bottom, left:right] = 1.0 - data['mask_img'] = mask_img - else: - mask_img[top:bottom, left:right, :] = (255, 255, 255) - data['image'] = mask_img - return data - -class ResizeTableImage(object): - def __init__(self, max_len, **kwargs): - super(ResizeTableImage, self).__init__() - self.max_len = max_len - - def get_img_bbox(self, cells): - bbox_list = [] - if len(cells) == 0: - return bbox_list - cell_num = len(cells) - for cno in range(cell_num): - if "bbox" in cells[cno]: - bbox = cells[cno]['bbox'] - bbox_list.append(bbox) - return bbox_list - - def resize_img_table(self, img, bbox_list, max_len): - height, width = img.shape[0:2] - ratio = max_len / (max(height, width) * 1.0) - resize_h = int(height * ratio) - resize_w = int(width * ratio) - img_new = cv2.resize(img, (resize_w, resize_h)) - bbox_list_new = [] - for bno in range(len(bbox_list)): - left, top, right, bottom = bbox_list[bno].copy() - left = int(left * ratio) - top = int(top * ratio) - right = int(right * ratio) - bottom = int(bottom * ratio) - bbox_list_new.append([left, top, right, bottom]) - return img_new, bbox_list_new - - def __call__(self, data): - img = data['image'] - if 'cells' not in data: - cells = [] - else: - cells = data['cells'] - bbox_list = self.get_img_bbox(cells) - img_new, bbox_list_new = self.resize_img_table(img, bbox_list, self.max_len) - data['image'] = img_new - cell_num = len(cells) - bno = 0 - for cno in range(cell_num): - if "bbox" in data['cells'][cno]: - data['cells'][cno]['bbox'] = bbox_list_new[bno] - bno += 1 - data['max_len'] = self.max_len - return data - -class PaddingTableImage(object): - def __init__(self, **kwargs): - super(PaddingTableImage, self).__init__() - - def __call__(self, data): - img = data['image'] - max_len = data['max_len'] - padding_img = np.zeros((max_len, max_len, 3), dtype=np.float32) - height, width = img.shape[0:2] - padding_img[0:height, 0:width, :] = img.copy() - data['image'] = padding_img - return data - \ No newline at end of file diff --git a/backend/ppocr/data/imaug/iaa_augment.py b/backend/ppocr/data/imaug/iaa_augment.py deleted file mode 100644 index 0aac787..0000000 --- a/backend/ppocr/data/imaug/iaa_augment.py +++ /dev/null @@ -1,105 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/iaa_augment.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import imgaug -import imgaug.augmenters as iaa - - -class AugmenterBuilder(object): - def __init__(self): - pass - - def build(self, args, root=True): - if args is None or len(args) == 0: - return None - elif isinstance(args, list): - if root: - sequence = [self.build(value, root=False) for value in args] - return iaa.Sequential(sequence) - else: - return getattr(iaa, args[0])( - *[self.to_tuple_if_list(a) for a in args[1:]]) - elif isinstance(args, dict): - cls = getattr(iaa, args['type']) - return cls(**{ - k: self.to_tuple_if_list(v) - for k, v in args['args'].items() - }) - else: - raise RuntimeError('unknown augmenter arg: ' + str(args)) - - def to_tuple_if_list(self, obj): - if isinstance(obj, list): - return tuple(obj) - return obj - - -class IaaAugment(): - def __init__(self, augmenter_args=None, **kwargs): - if augmenter_args is None: - augmenter_args = [{ - 'type': 'Fliplr', - 'args': { - 'p': 0.5 - } - }, { - 'type': 'Affine', - 'args': { - 'rotate': [-10, 10] - } - }, { - 'type': 'Resize', - 'args': { - 'size': [0.5, 3] - } - }] - self.augmenter = AugmenterBuilder().build(augmenter_args) - - def __call__(self, data): - image = data['image'] - shape = image.shape - - if self.augmenter: - aug = self.augmenter.to_deterministic() - data['image'] = aug.augment_image(image) - data = self.may_augment_annotation(aug, data, shape) - return data - - def may_augment_annotation(self, aug, data, shape): - if aug is None: - return data - - line_polys = [] - for poly in data['polys']: - new_poly = self.may_augment_poly(aug, shape, poly) - line_polys.append(new_poly) - data['polys'] = np.array(line_polys) - return data - - def may_augment_poly(self, aug, img_shape, poly): - keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly] - keypoints = aug.augment_keypoints( - [imgaug.KeypointsOnImage( - keypoints, shape=img_shape)])[0].keypoints - poly = [(p.x, p.y) for p in keypoints] - return poly diff --git a/backend/ppocr/data/imaug/label_ops.py b/backend/ppocr/data/imaug/label_ops.py deleted file mode 100644 index c9bc2e7..0000000 --- a/backend/ppocr/data/imaug/label_ops.py +++ /dev/null @@ -1,1041 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import copy -import numpy as np -import string -from shapely.geometry import LineString, Point, Polygon -import json -import copy - -from ppocr.utils.logging import get_logger - - -class ClsLabelEncode(object): - def __init__(self, label_list, **kwargs): - self.label_list = label_list - - def __call__(self, data): - label = data['label'] - if label not in self.label_list: - return None - label = self.label_list.index(label) - data['label'] = label - return data - - -class DetLabelEncode(object): - def __init__(self, **kwargs): - pass - - def __call__(self, data): - label = data['label'] - label = json.loads(label) - nBox = len(label) - boxes, txts, txt_tags = [], [], [] - for bno in range(0, nBox): - box = label[bno]['points'] - txt = label[bno]['transcription'] - boxes.append(box) - txts.append(txt) - if txt in ['*', '###']: - txt_tags.append(True) - else: - txt_tags.append(False) - if len(boxes) == 0: - return None - boxes = self.expand_points_num(boxes) - boxes = np.array(boxes, dtype=np.float32) - txt_tags = np.array(txt_tags, dtype=np.bool) - - data['polys'] = boxes - data['texts'] = txts - data['ignore_tags'] = txt_tags - return data - - def order_points_clockwise(self, pts): - rect = np.zeros((4, 2), dtype="float32") - s = pts.sum(axis=1) - rect[0] = pts[np.argmin(s)] - rect[2] = pts[np.argmax(s)] - diff = np.diff(pts, axis=1) - rect[1] = pts[np.argmin(diff)] - rect[3] = pts[np.argmax(diff)] - return rect - - def expand_points_num(self, boxes): - max_points_num = 0 - for box in boxes: - if len(box) > max_points_num: - max_points_num = len(box) - ex_boxes = [] - for box in boxes: - ex_box = box + [box[-1]] * (max_points_num - len(box)) - ex_boxes.append(ex_box) - return ex_boxes - - -class BaseRecLabelEncode(object): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False): - - self.max_text_len = max_text_length - self.beg_str = "sos" - self.end_str = "eos" - self.lower = False - - if character_dict_path is None: - logger = get_logger() - logger.warning( - "The character_dict_path is None, model can only recognize number and lower letters" - ) - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - self.lower = True - else: - self.character_str = [] - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - self.character_str.append(line) - if use_space_char: - self.character_str.append(" ") - dict_character = list(self.character_str) - dict_character = self.add_special_char(dict_character) - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def add_special_char(self, dict_character): - return dict_character - - def encode(self, text): - """convert text-label into text-index. - input: - text: text labels of each image. [batch_size] - - output: - text: concatenated text index for CTCLoss. - [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] - length: length of each text. [batch_size] - """ - if len(text) == 0 or len(text) > self.max_text_len: - return None - if self.lower: - text = text.lower() - text_list = [] - for char in text: - if char not in self.dict: - # logger = get_logger() - # logger.warning('{} is not in dict'.format(char)) - continue - text_list.append(self.dict[char]) - if len(text_list) == 0: - return None - return text_list - - -class NRTRLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - - super(NRTRLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - if len(text) >= self.max_text_len - 1: - return None - data['length'] = np.array(len(text)) - text.insert(0, 2) - text.append(3) - text = text + [0] * (self.max_text_len - len(text)) - data['label'] = np.array(text) - return data - - def add_special_char(self, dict_character): - dict_character = ['blank', '', '', ''] + dict_character - return dict_character - - -class CTCLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(CTCLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - data['length'] = np.array(len(text)) - text = text + [0] * (self.max_text_len - len(text)) - data['label'] = np.array(text) - - label = [0] * len(self.character) - for x in text: - label[x] += 1 - data['label_ace'] = np.array(label) - return data - - def add_special_char(self, dict_character): - dict_character = ['blank'] + dict_character - return dict_character - - -class E2ELabelEncodeTest(BaseRecLabelEncode): - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(E2ELabelEncodeTest, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def __call__(self, data): - import json - padnum = len(self.dict) - label = data['label'] - label = json.loads(label) - nBox = len(label) - boxes, txts, txt_tags = [], [], [] - for bno in range(0, nBox): - box = label[bno]['points'] - txt = label[bno]['transcription'] - boxes.append(box) - txts.append(txt) - if txt in ['*', '###']: - txt_tags.append(True) - else: - txt_tags.append(False) - boxes = np.array(boxes, dtype=np.float32) - txt_tags = np.array(txt_tags, dtype=np.bool) - data['polys'] = boxes - data['ignore_tags'] = txt_tags - temp_texts = [] - for text in txts: - text = text.lower() - text = self.encode(text) - if text is None: - return None - text = text + [padnum] * (self.max_text_len - len(text) - ) # use 36 to pad - temp_texts.append(text) - data['texts'] = np.array(temp_texts) - return data - - -class E2ELabelEncodeTrain(object): - def __init__(self, **kwargs): - pass - - def __call__(self, data): - import json - label = data['label'] - label = json.loads(label) - nBox = len(label) - boxes, txts, txt_tags = [], [], [] - for bno in range(0, nBox): - box = label[bno]['points'] - txt = label[bno]['transcription'] - boxes.append(box) - txts.append(txt) - if txt in ['*', '###']: - txt_tags.append(True) - else: - txt_tags.append(False) - boxes = np.array(boxes, dtype=np.float32) - txt_tags = np.array(txt_tags, dtype=np.bool) - - data['polys'] = boxes - data['texts'] = txts - data['ignore_tags'] = txt_tags - return data - - -class KieLabelEncode(object): - def __init__(self, character_dict_path, norm=10, directed=False, **kwargs): - super(KieLabelEncode, self).__init__() - self.dict = dict({'': 0}) - with open(character_dict_path, 'r', encoding='utf-8') as fr: - idx = 1 - for line in fr: - char = line.strip() - self.dict[char] = idx - idx += 1 - self.norm = norm - self.directed = directed - - def compute_relation(self, boxes): - """Compute relation between every two boxes.""" - x1s, y1s = boxes[:, 0:1], boxes[:, 1:2] - x2s, y2s = boxes[:, 4:5], boxes[:, 5:6] - ws, hs = x2s - x1s + 1, np.maximum(y2s - y1s + 1, 1) - dxs = (x1s[:, 0][None] - x1s) / self.norm - dys = (y1s[:, 0][None] - y1s) / self.norm - xhhs, xwhs = hs[:, 0][None] / hs, ws[:, 0][None] / hs - whs = ws / hs + np.zeros_like(xhhs) - relations = np.stack([dxs, dys, whs, xhhs, xwhs], -1) - bboxes = np.concatenate([x1s, y1s, x2s, y2s], -1).astype(np.float32) - return relations, bboxes - - def pad_text_indices(self, text_inds): - """Pad text index to same length.""" - max_len = 300 - recoder_len = max([len(text_ind) for text_ind in text_inds]) - padded_text_inds = -np.ones((len(text_inds), max_len), np.int32) - for idx, text_ind in enumerate(text_inds): - padded_text_inds[idx, :len(text_ind)] = np.array(text_ind) - return padded_text_inds, recoder_len - - def list_to_numpy(self, ann_infos): - """Convert bboxes, relations, texts and labels to ndarray.""" - boxes, text_inds = ann_infos['points'], ann_infos['text_inds'] - boxes = np.array(boxes, np.int32) - relations, bboxes = self.compute_relation(boxes) - - labels = ann_infos.get('labels', None) - if labels is not None: - labels = np.array(labels, np.int32) - edges = ann_infos.get('edges', None) - if edges is not None: - labels = labels[:, None] - edges = np.array(edges) - edges = (edges[:, None] == edges[None, :]).astype(np.int32) - if self.directed: - edges = (edges & labels == 1).astype(np.int32) - np.fill_diagonal(edges, -1) - labels = np.concatenate([labels, edges], -1) - padded_text_inds, recoder_len = self.pad_text_indices(text_inds) - max_num = 300 - temp_bboxes = np.zeros([max_num, 4]) - h, _ = bboxes.shape - temp_bboxes[:h, :] = bboxes - - temp_relations = np.zeros([max_num, max_num, 5]) - temp_relations[:h, :h, :] = relations - - temp_padded_text_inds = np.zeros([max_num, max_num]) - temp_padded_text_inds[:h, :] = padded_text_inds - - temp_labels = np.zeros([max_num, max_num]) - temp_labels[:h, :h + 1] = labels - - tag = np.array([h, recoder_len]) - return dict( - image=ann_infos['image'], - points=temp_bboxes, - relations=temp_relations, - texts=temp_padded_text_inds, - labels=temp_labels, - tag=tag) - - def convert_canonical(self, points_x, points_y): - - assert len(points_x) == 4 - assert len(points_y) == 4 - - points = [Point(points_x[i], points_y[i]) for i in range(4)] - - polygon = Polygon([(p.x, p.y) for p in points]) - min_x, min_y, _, _ = polygon.bounds - points_to_lefttop = [ - LineString([points[i], Point(min_x, min_y)]) for i in range(4) - ] - distances = np.array([line.length for line in points_to_lefttop]) - sort_dist_idx = np.argsort(distances) - lefttop_idx = sort_dist_idx[0] - - if lefttop_idx == 0: - point_orders = [0, 1, 2, 3] - elif lefttop_idx == 1: - point_orders = [1, 2, 3, 0] - elif lefttop_idx == 2: - point_orders = [2, 3, 0, 1] - else: - point_orders = [3, 0, 1, 2] - - sorted_points_x = [points_x[i] for i in point_orders] - sorted_points_y = [points_y[j] for j in point_orders] - - return sorted_points_x, sorted_points_y - - def sort_vertex(self, points_x, points_y): - - assert len(points_x) == 4 - assert len(points_y) == 4 - - x = np.array(points_x) - y = np.array(points_y) - center_x = np.sum(x) * 0.25 - center_y = np.sum(y) * 0.25 - - x_arr = np.array(x - center_x) - y_arr = np.array(y - center_y) - - angle = np.arctan2(y_arr, x_arr) * 180.0 / np.pi - sort_idx = np.argsort(angle) - - sorted_points_x, sorted_points_y = [], [] - for i in range(4): - sorted_points_x.append(points_x[sort_idx[i]]) - sorted_points_y.append(points_y[sort_idx[i]]) - - return self.convert_canonical(sorted_points_x, sorted_points_y) - - def __call__(self, data): - import json - label = data['label'] - annotations = json.loads(label) - boxes, texts, text_inds, labels, edges = [], [], [], [], [] - for ann in annotations: - box = ann['points'] - x_list = [box[i][0] for i in range(4)] - y_list = [box[i][1] for i in range(4)] - sorted_x_list, sorted_y_list = self.sort_vertex(x_list, y_list) - sorted_box = [] - for x, y in zip(sorted_x_list, sorted_y_list): - sorted_box.append(x) - sorted_box.append(y) - boxes.append(sorted_box) - text = ann['transcription'] - texts.append(ann['transcription']) - text_ind = [self.dict[c] for c in text if c in self.dict] - text_inds.append(text_ind) - labels.append(ann['label']) - edges.append(ann.get('edge', 0)) - ann_infos = dict( - image=data['image'], - points=boxes, - texts=texts, - text_inds=text_inds, - edges=edges, - labels=labels) - - return self.list_to_numpy(ann_infos) - - -class AttnLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(AttnLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - self.beg_str = "sos" - self.end_str = "eos" - dict_character = [self.beg_str] + dict_character + [self.end_str] - return dict_character - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - if len(text) >= self.max_text_len: - return None - data['length'] = np.array(len(text)) - text = [0] + text + [len(self.character) - 1] + [0] * (self.max_text_len - - len(text) - 2) - data['label'] = np.array(text) - return data - - def get_ignored_tokens(self): - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx" \ - % beg_or_end - return idx - - -class SEEDLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(SEEDLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - self.padding = "padding" - self.end_str = "eos" - self.unknown = "unknown" - dict_character = dict_character + [ - self.end_str, self.padding, self.unknown - ] - return dict_character - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - if len(text) >= self.max_text_len: - return None - data['length'] = np.array(len(text)) + 1 # conclude eos - text = text + [len(self.character) - 3] + [len(self.character) - 2] * ( - self.max_text_len - len(text) - 1) - data['label'] = np.array(text) - return data - - -class SRNLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length=25, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(SRNLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - dict_character = dict_character + [self.beg_str, self.end_str] - return dict_character - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - char_num = len(self.character) - if text is None: - return None - if len(text) > self.max_text_len: - return None - data['length'] = np.array(len(text)) - text = text + [char_num - 1] * (self.max_text_len - len(text)) - data['label'] = np.array(text) - return data - - def get_ignored_tokens(self): - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx" \ - % beg_or_end - return idx - - -class TableLabelEncode(object): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - max_elem_length, - max_cell_num, - character_dict_path, - span_weight=1.0, - **kwargs): - self.max_text_length = max_text_length - self.max_elem_length = max_elem_length - self.max_cell_num = max_cell_num - list_character, list_elem = self.load_char_elem_dict( - character_dict_path) - list_character = self.add_special_char(list_character) - list_elem = self.add_special_char(list_elem) - self.dict_character = {} - for i, char in enumerate(list_character): - self.dict_character[char] = i - self.dict_elem = {} - for i, elem in enumerate(list_elem): - self.dict_elem[elem] = i - self.span_weight = span_weight - - def load_char_elem_dict(self, character_dict_path): - list_character = [] - list_elem = [] - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - substr = lines[0].decode('utf-8').strip("\r\n").split("\t") - character_num = int(substr[0]) - elem_num = int(substr[1]) - for cno in range(1, 1 + character_num): - character = lines[cno].decode('utf-8').strip("\r\n") - list_character.append(character) - for eno in range(1 + character_num, 1 + character_num + elem_num): - elem = lines[eno].decode('utf-8').strip("\r\n") - list_elem.append(elem) - return list_character, list_elem - - def add_special_char(self, list_character): - self.beg_str = "sos" - self.end_str = "eos" - list_character = [self.beg_str] + list_character + [self.end_str] - return list_character - - def get_span_idx_list(self): - span_idx_list = [] - for elem in self.dict_elem: - if 'span' in elem: - span_idx_list.append(self.dict_elem[elem]) - return span_idx_list - - def __call__(self, data): - cells = data['cells'] - structure = data['structure']['tokens'] - structure = self.encode(structure, 'elem') - if structure is None: - return None - elem_num = len(structure) - structure = [0] + structure + [len(self.dict_elem) - 1] - structure = structure + [0] * (self.max_elem_length + 2 - len(structure) - ) - structure = np.array(structure) - data['structure'] = structure - elem_char_idx1 = self.dict_elem[''] - elem_char_idx2 = self.dict_elem[' 0: - span_weight = len(td_idx_list) * 1.0 / len(span_idx_list) - span_weight = min(max(span_weight, 1.0), self.span_weight) - for cno in range(len(cells)): - if 'bbox' in cells[cno]: - bbox = cells[cno]['bbox'].copy() - bbox[0] = bbox[0] * 1.0 / img_width - bbox[1] = bbox[1] * 1.0 / img_height - bbox[2] = bbox[2] * 1.0 / img_width - bbox[3] = bbox[3] * 1.0 / img_height - td_idx = td_idx_list[cno] - bbox_list[td_idx] = bbox - bbox_list_mask[td_idx] = 1.0 - cand_span_idx = td_idx + 1 - if cand_span_idx < (self.max_elem_length + 2): - if structure[cand_span_idx] in span_idx_list: - structure_mask[cand_span_idx] = span_weight - - data['bbox_list'] = bbox_list - data['bbox_list_mask'] = bbox_list_mask - data['structure_mask'] = structure_mask - char_beg_idx = self.get_beg_end_flag_idx('beg', 'char') - char_end_idx = self.get_beg_end_flag_idx('end', 'char') - elem_beg_idx = self.get_beg_end_flag_idx('beg', 'elem') - elem_end_idx = self.get_beg_end_flag_idx('end', 'elem') - data['sp_tokens'] = np.array([ - char_beg_idx, char_end_idx, elem_beg_idx, elem_end_idx, - elem_char_idx1, elem_char_idx2, self.max_text_length, - self.max_elem_length, self.max_cell_num, elem_num - ]) - return data - - def encode(self, text, char_or_elem): - """convert text-label into text-index. - """ - if char_or_elem == "char": - max_len = self.max_text_length - current_dict = self.dict_character - else: - max_len = self.max_elem_length - current_dict = self.dict_elem - if len(text) > max_len: - return None - if len(text) == 0: - if char_or_elem == "char": - return [self.dict_character['space']] - else: - return None - text_list = [] - for char in text: - if char not in current_dict: - return None - text_list.append(current_dict[char]) - if len(text_list) == 0: - if char_or_elem == "char": - return [self.dict_character['space']] - else: - return None - return text_list - - def get_ignored_tokens(self, char_or_elem): - beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem) - end_idx = self.get_beg_end_flag_idx("end", char_or_elem) - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end, char_or_elem): - if char_or_elem == "char": - if beg_or_end == "beg": - idx = np.array(self.dict_character[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict_character[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \ - % beg_or_end - elif char_or_elem == "elem": - if beg_or_end == "beg": - idx = np.array(self.dict_elem[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict_elem[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \ - % beg_or_end - else: - assert False, "Unsupport type %s in char_or_elem" \ - % char_or_elem - return idx - - -class SARLabelEncode(BaseRecLabelEncode): - """ Convert between text-label and text-index """ - - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(SARLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - beg_end_str = "" - unknown_str = "" - padding_str = "" - dict_character = dict_character + [unknown_str] - self.unknown_idx = len(dict_character) - 1 - dict_character = dict_character + [beg_end_str] - self.start_idx = len(dict_character) - 1 - self.end_idx = len(dict_character) - 1 - dict_character = dict_character + [padding_str] - self.padding_idx = len(dict_character) - 1 - - return dict_character - - def __call__(self, data): - text = data['label'] - text = self.encode(text) - if text is None: - return None - if len(text) >= self.max_text_len - 1: - return None - data['length'] = np.array(len(text)) - target = [self.start_idx] + text + [self.end_idx] - padded_text = [self.padding_idx for _ in range(self.max_text_len)] - - padded_text[:len(target)] = target - data['label'] = np.array(padded_text) - return data - - def get_ignored_tokens(self): - return [self.padding_idx] - - -class PRENLabelEncode(BaseRecLabelEncode): - def __init__(self, - max_text_length, - character_dict_path, - use_space_char=False, - **kwargs): - super(PRENLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - def add_special_char(self, dict_character): - padding_str = '' # 0 - end_str = '' # 1 - unknown_str = '' # 2 - - dict_character = [padding_str, end_str, unknown_str] + dict_character - self.padding_idx = 0 - self.end_idx = 1 - self.unknown_idx = 2 - - return dict_character - - def encode(self, text): - if len(text) == 0 or len(text) >= self.max_text_len: - return None - if self.lower: - text = text.lower() - text_list = [] - for char in text: - if char not in self.dict: - text_list.append(self.unknown_idx) - else: - text_list.append(self.dict[char]) - text_list.append(self.end_idx) - if len(text_list) < self.max_text_len: - text_list += [self.padding_idx] * ( - self.max_text_len - len(text_list)) - return text_list - - def __call__(self, data): - text = data['label'] - encoded_text = self.encode(text) - if encoded_text is None: - return None - data['label'] = np.array(encoded_text) - return data - - -class VQATokenLabelEncode(object): - """ - Label encode for NLP VQA methods - """ - - def __init__(self, - class_path, - contains_re=False, - add_special_ids=False, - algorithm='LayoutXLM', - infer_mode=False, - ocr_engine=None, - **kwargs): - super(VQATokenLabelEncode, self).__init__() - from paddlenlp.transformers import LayoutXLMTokenizer, LayoutLMTokenizer, LayoutLMv2Tokenizer - from ppocr.utils.utility import load_vqa_bio_label_maps - tokenizer_dict = { - 'LayoutXLM': { - 'class': LayoutXLMTokenizer, - 'pretrained_model': 'layoutxlm-base-uncased' - }, - 'LayoutLM': { - 'class': LayoutLMTokenizer, - 'pretrained_model': 'layoutlm-base-uncased' - }, - 'LayoutLMv2': { - 'class': LayoutLMv2Tokenizer, - 'pretrained_model': 'layoutlmv2-base-uncased' - } - } - self.contains_re = contains_re - tokenizer_config = tokenizer_dict[algorithm] - self.tokenizer = tokenizer_config['class'].from_pretrained( - tokenizer_config['pretrained_model']) - self.label2id_map, id2label_map = load_vqa_bio_label_maps(class_path) - self.add_special_ids = add_special_ids - self.infer_mode = infer_mode - self.ocr_engine = ocr_engine - - def __call__(self, data): - # load bbox and label info - ocr_info = self._load_ocr_info(data) - - height, width, _ = data['image'].shape - - words_list = [] - bbox_list = [] - input_ids_list = [] - token_type_ids_list = [] - segment_offset_id = [] - gt_label_list = [] - - entities = [] - - # for re - train_re = self.contains_re and not self.infer_mode - if train_re: - relations = [] - id2label = {} - entity_id_to_index_map = {} - empty_entity = set() - - data['ocr_info'] = copy.deepcopy(ocr_info) - - for info in ocr_info: - if train_re: - # for re - if len(info["text"]) == 0: - empty_entity.add(info["id"]) - continue - id2label[info["id"]] = info["label"] - relations.extend([tuple(sorted(l)) for l in info["linking"]]) - # smooth_box - bbox = self._smooth_box(info["bbox"], height, width) - - text = info["text"] - encode_res = self.tokenizer.encode( - text, pad_to_max_seq_len=False, return_attention_mask=True) - - if not self.add_special_ids: - # TODO: use tok.all_special_ids to remove - encode_res["input_ids"] = encode_res["input_ids"][1:-1] - encode_res["token_type_ids"] = encode_res["token_type_ids"][1: - -1] - encode_res["attention_mask"] = encode_res["attention_mask"][1: - -1] - # parse label - if not self.infer_mode: - label = info['label'] - gt_label = self._parse_label(label, encode_res) - - # construct entities for re - if train_re: - if gt_label[0] != self.label2id_map["O"]: - entity_id_to_index_map[info["id"]] = len(entities) - label = label.upper() - entities.append({ - "start": len(input_ids_list), - "end": - len(input_ids_list) + len(encode_res["input_ids"]), - "label": label.upper(), - }) - else: - entities.append({ - "start": len(input_ids_list), - "end": len(input_ids_list) + len(encode_res["input_ids"]), - "label": 'O', - }) - input_ids_list.extend(encode_res["input_ids"]) - token_type_ids_list.extend(encode_res["token_type_ids"]) - bbox_list.extend([bbox] * len(encode_res["input_ids"])) - words_list.append(text) - segment_offset_id.append(len(input_ids_list)) - if not self.infer_mode: - gt_label_list.extend(gt_label) - - data['input_ids'] = input_ids_list - data['token_type_ids'] = token_type_ids_list - data['bbox'] = bbox_list - data['attention_mask'] = [1] * len(input_ids_list) - data['labels'] = gt_label_list - data['segment_offset_id'] = segment_offset_id - data['tokenizer_params'] = dict( - padding_side=self.tokenizer.padding_side, - pad_token_type_id=self.tokenizer.pad_token_type_id, - pad_token_id=self.tokenizer.pad_token_id) - data['entities'] = entities - - if train_re: - data['relations'] = relations - data['id2label'] = id2label - data['empty_entity'] = empty_entity - data['entity_id_to_index_map'] = entity_id_to_index_map - return data - - def _load_ocr_info(self, data): - def trans_poly_to_bbox(poly): - x1 = np.min([p[0] for p in poly]) - x2 = np.max([p[0] for p in poly]) - y1 = np.min([p[1] for p in poly]) - y2 = np.max([p[1] for p in poly]) - return [x1, y1, x2, y2] - - if self.infer_mode: - ocr_result = self.ocr_engine.ocr(data['image'], cls=False) - ocr_info = [] - for res in ocr_result: - ocr_info.append({ - "text": res[1][0], - "bbox": trans_poly_to_bbox(res[0]), - "poly": res[0], - }) - return ocr_info - else: - info = data['label'] - # read text info - info_dict = json.loads(info) - return info_dict["ocr_info"] - - def _smooth_box(self, bbox, height, width): - bbox[0] = int(bbox[0] * 1000.0 / width) - bbox[2] = int(bbox[2] * 1000.0 / width) - bbox[1] = int(bbox[1] * 1000.0 / height) - bbox[3] = int(bbox[3] * 1000.0 / height) - return bbox - - def _parse_label(self, label, encode_res): - gt_label = [] - if label.lower() == "other": - gt_label.extend([0] * len(encode_res["input_ids"])) - else: - gt_label.append(self.label2id_map[("b-" + label).upper()]) - gt_label.extend([self.label2id_map[("i-" + label).upper()]] * - (len(encode_res["input_ids"]) - 1)) - return gt_label - - -class MultiLabelEncode(BaseRecLabelEncode): - def __init__(self, - max_text_length, - character_dict_path=None, - use_space_char=False, - **kwargs): - super(MultiLabelEncode, self).__init__( - max_text_length, character_dict_path, use_space_char) - - self.ctc_encode = CTCLabelEncode(max_text_length, character_dict_path, - use_space_char, **kwargs) - self.sar_encode = SARLabelEncode(max_text_length, character_dict_path, - use_space_char, **kwargs) - - def __call__(self, data): - - data_ctc = copy.deepcopy(data) - data_sar = copy.deepcopy(data) - data_out = dict() - data_out['img_path'] = data.get('img_path', None) - data_out['image'] = data['image'] - ctc = self.ctc_encode.__call__(data_ctc) - sar = self.sar_encode.__call__(data_sar) - if ctc is None or sar is None: - return None - data_out['label_ctc'] = ctc['label'] - data_out['label_sar'] = sar['label'] - data_out['length'] = ctc['length'] - return data_out diff --git a/backend/ppocr/data/imaug/make_border_map.py b/backend/ppocr/data/imaug/make_border_map.py deleted file mode 100644 index abab383..0000000 --- a/backend/ppocr/data/imaug/make_border_map.py +++ /dev/null @@ -1,173 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_border_map.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import cv2 - -np.seterr(divide='ignore', invalid='ignore') -import pyclipper -from shapely.geometry import Polygon -import sys -import warnings - -warnings.simplefilter("ignore") - -__all__ = ['MakeBorderMap'] - - -class MakeBorderMap(object): - def __init__(self, - shrink_ratio=0.4, - thresh_min=0.3, - thresh_max=0.7, - **kwargs): - self.shrink_ratio = shrink_ratio - self.thresh_min = thresh_min - self.thresh_max = thresh_max - - def __call__(self, data): - - img = data['image'] - text_polys = data['polys'] - ignore_tags = data['ignore_tags'] - - canvas = np.zeros(img.shape[:2], dtype=np.float32) - mask = np.zeros(img.shape[:2], dtype=np.float32) - - for i in range(len(text_polys)): - if ignore_tags[i]: - continue - self.draw_border_map(text_polys[i], canvas, mask=mask) - canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min - - data['threshold_map'] = canvas - data['threshold_mask'] = mask - return data - - def draw_border_map(self, polygon, canvas, mask): - polygon = np.array(polygon) - assert polygon.ndim == 2 - assert polygon.shape[1] == 2 - - polygon_shape = Polygon(polygon) - if polygon_shape.area <= 0: - return - distance = polygon_shape.area * ( - 1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length - subject = [tuple(l) for l in polygon] - padding = pyclipper.PyclipperOffset() - padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - - padded_polygon = np.array(padding.Execute(distance)[0]) - cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0) - - xmin = padded_polygon[:, 0].min() - xmax = padded_polygon[:, 0].max() - ymin = padded_polygon[:, 1].min() - ymax = padded_polygon[:, 1].max() - width = xmax - xmin + 1 - height = ymax - ymin + 1 - - polygon[:, 0] = polygon[:, 0] - xmin - polygon[:, 1] = polygon[:, 1] - ymin - - xs = np.broadcast_to( - np.linspace( - 0, width - 1, num=width).reshape(1, width), (height, width)) - ys = np.broadcast_to( - np.linspace( - 0, height - 1, num=height).reshape(height, 1), (height, width)) - - distance_map = np.zeros( - (polygon.shape[0], height, width), dtype=np.float32) - for i in range(polygon.shape[0]): - j = (i + 1) % polygon.shape[0] - absolute_distance = self._distance(xs, ys, polygon[i], polygon[j]) - distance_map[i] = np.clip(absolute_distance / distance, 0, 1) - distance_map = distance_map.min(axis=0) - - xmin_valid = min(max(0, xmin), canvas.shape[1] - 1) - xmax_valid = min(max(0, xmax), canvas.shape[1] - 1) - ymin_valid = min(max(0, ymin), canvas.shape[0] - 1) - ymax_valid = min(max(0, ymax), canvas.shape[0] - 1) - canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax( - 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height, - xmin_valid - xmin:xmax_valid - xmax + width], - canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1]) - - def _distance(self, xs, ys, point_1, point_2): - ''' - compute the distance from point to a line - ys: coordinates in the first axis - xs: coordinates in the second axis - point_1, point_2: (x, y), the end of the line - ''' - height, width = xs.shape[:2] - square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[ - 1]) - square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[ - 1]) - square_distance = np.square(point_1[0] - point_2[0]) + np.square( - point_1[1] - point_2[1]) - - cosin = (square_distance - square_distance_1 - square_distance_2) / ( - 2 * np.sqrt(square_distance_1 * square_distance_2)) - square_sin = 1 - np.square(cosin) - square_sin = np.nan_to_num(square_sin) - result = np.sqrt(square_distance_1 * square_distance_2 * square_sin / - square_distance) - - result[cosin < - 0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin - < 0] - # self.extend_line(point_1, point_2, result) - return result - - def extend_line(self, point_1, point_2, result, shrink_ratio): - ex_point_1 = (int( - round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))), - int( - round(point_1[1] + (point_1[1] - point_2[1]) * ( - 1 + shrink_ratio)))) - cv2.line( - result, - tuple(ex_point_1), - tuple(point_1), - 4096.0, - 1, - lineType=cv2.LINE_AA, - shift=0) - ex_point_2 = (int( - round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))), - int( - round(point_2[1] + (point_2[1] - point_1[1]) * ( - 1 + shrink_ratio)))) - cv2.line( - result, - tuple(ex_point_2), - tuple(point_2), - 4096.0, - 1, - lineType=cv2.LINE_AA, - shift=0) - return ex_point_1, ex_point_2 diff --git a/backend/ppocr/data/imaug/make_pse_gt.py b/backend/ppocr/data/imaug/make_pse_gt.py deleted file mode 100644 index 255d076..0000000 --- a/backend/ppocr/data/imaug/make_pse_gt.py +++ /dev/null @@ -1,106 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import cv2 -import numpy as np -import pyclipper -from shapely.geometry import Polygon - -__all__ = ['MakePseGt'] - - -class MakePseGt(object): - def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs): - self.kernel_num = kernel_num - self.min_shrink_ratio = min_shrink_ratio - self.size = size - - def __call__(self, data): - - image = data['image'] - text_polys = data['polys'] - ignore_tags = data['ignore_tags'] - - h, w, _ = image.shape - short_edge = min(h, w) - if short_edge < self.size: - # keep short_size >= self.size - scale = self.size / short_edge - image = cv2.resize(image, dsize=None, fx=scale, fy=scale) - text_polys *= scale - - gt_kernels = [] - for i in range(1, self.kernel_num + 1): - # s1->sn, from big to small - rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1 - ) * i - text_kernel, ignore_tags = self.generate_kernel( - image.shape[0:2], rate, text_polys, ignore_tags) - gt_kernels.append(text_kernel) - - training_mask = np.ones(image.shape[0:2], dtype='uint8') - for i in range(text_polys.shape[0]): - if ignore_tags[i]: - cv2.fillPoly(training_mask, - text_polys[i].astype(np.int32)[np.newaxis, :, :], - 0) - - gt_kernels = np.array(gt_kernels) - gt_kernels[gt_kernels > 0] = 1 - - data['image'] = image - data['polys'] = text_polys - data['gt_kernels'] = gt_kernels[0:] - data['gt_text'] = gt_kernels[0] - data['mask'] = training_mask.astype('float32') - return data - - def generate_kernel(self, - img_size, - shrink_ratio, - text_polys, - ignore_tags=None): - """ - Refer to part of the code: - https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py - """ - - h, w = img_size - text_kernel = np.zeros((h, w), dtype=np.float32) - for i, poly in enumerate(text_polys): - polygon = Polygon(poly) - distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / ( - polygon.length + 1e-6) - subject = [tuple(l) for l in poly] - pco = pyclipper.PyclipperOffset() - pco.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - shrinked = np.array(pco.Execute(-distance)) - - if len(shrinked) == 0 or shrinked.size == 0: - if ignore_tags is not None: - ignore_tags[i] = True - continue - try: - shrinked = np.array(shrinked[0]).reshape(-1, 2) - except: - if ignore_tags is not None: - ignore_tags[i] = True - continue - cv2.fillPoly(text_kernel, [shrinked.astype(np.int32)], i + 1) - return text_kernel, ignore_tags diff --git a/backend/ppocr/data/imaug/make_shrink_map.py b/backend/ppocr/data/imaug/make_shrink_map.py deleted file mode 100644 index 6c65c20..0000000 --- a/backend/ppocr/data/imaug/make_shrink_map.py +++ /dev/null @@ -1,123 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_shrink_map.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import cv2 -from shapely.geometry import Polygon -import pyclipper - -__all__ = ['MakeShrinkMap'] - - -class MakeShrinkMap(object): - r''' - Making binary mask from detection data with ICDAR format. - Typically following the process of class `MakeICDARData`. - ''' - - def __init__(self, min_text_size=8, shrink_ratio=0.4, **kwargs): - self.min_text_size = min_text_size - self.shrink_ratio = shrink_ratio - - def __call__(self, data): - image = data['image'] - text_polys = data['polys'] - ignore_tags = data['ignore_tags'] - - h, w = image.shape[:2] - text_polys, ignore_tags = self.validate_polygons(text_polys, - ignore_tags, h, w) - gt = np.zeros((h, w), dtype=np.float32) - mask = np.ones((h, w), dtype=np.float32) - for i in range(len(text_polys)): - polygon = text_polys[i] - height = max(polygon[:, 1]) - min(polygon[:, 1]) - width = max(polygon[:, 0]) - min(polygon[:, 0]) - if ignore_tags[i] or min(height, width) < self.min_text_size: - cv2.fillPoly(mask, - polygon.astype(np.int32)[np.newaxis, :, :], 0) - ignore_tags[i] = True - else: - polygon_shape = Polygon(polygon) - subject = [tuple(l) for l in polygon] - padding = pyclipper.PyclipperOffset() - padding.AddPath(subject, pyclipper.JT_ROUND, - pyclipper.ET_CLOSEDPOLYGON) - shrinked = [] - - # Increase the shrink ratio every time we get multiple polygon returned back - possible_ratios = np.arange(self.shrink_ratio, 1, - self.shrink_ratio) - np.append(possible_ratios, 1) - # print(possible_ratios) - for ratio in possible_ratios: - # print(f"Change shrink ratio to {ratio}") - distance = polygon_shape.area * ( - 1 - np.power(ratio, 2)) / polygon_shape.length - shrinked = padding.Execute(-distance) - if len(shrinked) == 1: - break - - if shrinked == []: - cv2.fillPoly(mask, - polygon.astype(np.int32)[np.newaxis, :, :], 0) - ignore_tags[i] = True - continue - - for each_shirnk in shrinked: - shirnk = np.array(each_shirnk).reshape(-1, 2) - cv2.fillPoly(gt, [shirnk.astype(np.int32)], 1) - - data['shrink_map'] = gt - data['shrink_mask'] = mask - return data - - def validate_polygons(self, polygons, ignore_tags, h, w): - ''' - polygons (numpy.array, required): of shape (num_instances, num_points, 2) - ''' - if len(polygons) == 0: - return polygons, ignore_tags - assert len(polygons) == len(ignore_tags) - for polygon in polygons: - polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1) - polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1) - - for i in range(len(polygons)): - area = self.polygon_area(polygons[i]) - if abs(area) < 1: - ignore_tags[i] = True - if area > 0: - polygons[i] = polygons[i][::-1, :] - return polygons, ignore_tags - - def polygon_area(self, polygon): - """ - compute polygon area - """ - area = 0 - q = polygon[-1] - for p in polygon: - area += p[0] * q[1] - p[1] * q[0] - q = p - return area / 2.0 diff --git a/backend/ppocr/data/imaug/operators.py b/backend/ppocr/data/imaug/operators.py deleted file mode 100644 index 0973651..0000000 --- a/backend/ppocr/data/imaug/operators.py +++ /dev/null @@ -1,468 +0,0 @@ -""" -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys -import six -import cv2 -import numpy as np -import math - - -class DecodeImage(object): - """ decode image """ - - def __init__(self, - img_mode='RGB', - channel_first=False, - ignore_orientation=False, - **kwargs): - self.img_mode = img_mode - self.channel_first = channel_first - self.ignore_orientation = ignore_orientation - - def __call__(self, data): - img = data['image'] - if six.PY2: - assert type(img) is str and len( - img) > 0, "invalid input 'img' in DecodeImage" - else: - assert type(img) is bytes and len( - img) > 0, "invalid input 'img' in DecodeImage" - img = np.frombuffer(img, dtype='uint8') - if self.ignore_orientation: - img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION | - cv2.IMREAD_COLOR) - else: - img = cv2.imdecode(img, 1) - if img is None: - return None - if self.img_mode == 'GRAY': - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - elif self.img_mode == 'RGB': - assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) - img = img[:, :, ::-1] - - if self.channel_first: - img = img.transpose((2, 0, 1)) - - data['image'] = img - return data - - -class NRTRDecodeImage(object): - """ decode image """ - - def __init__(self, img_mode='RGB', channel_first=False, **kwargs): - self.img_mode = img_mode - self.channel_first = channel_first - - def __call__(self, data): - img = data['image'] - if six.PY2: - assert type(img) is str and len( - img) > 0, "invalid input 'img' in DecodeImage" - else: - assert type(img) is bytes and len( - img) > 0, "invalid input 'img' in DecodeImage" - img = np.frombuffer(img, dtype='uint8') - - img = cv2.imdecode(img, 1) - - if img is None: - return None - if self.img_mode == 'GRAY': - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - elif self.img_mode == 'RGB': - assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) - img = img[:, :, ::-1] - img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - if self.channel_first: - img = img.transpose((2, 0, 1)) - data['image'] = img - return data - - -class NormalizeImage(object): - """ normalize image such as substract mean, divide std - """ - - def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs): - if isinstance(scale, str): - scale = eval(scale) - self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) - mean = mean if mean is not None else [0.485, 0.456, 0.406] - std = std if std is not None else [0.229, 0.224, 0.225] - - shape = (3, 1, 1) if order == 'chw' else (1, 1, 3) - self.mean = np.array(mean).reshape(shape).astype('float32') - self.std = np.array(std).reshape(shape).astype('float32') - - def __call__(self, data): - img = data['image'] - from PIL import Image - if isinstance(img, Image.Image): - img = np.array(img) - assert isinstance(img, - np.ndarray), "invalid input 'img' in NormalizeImage" - data['image'] = ( - img.astype('float32') * self.scale - self.mean) / self.std - return data - - -class ToCHWImage(object): - """ convert hwc image to chw image - """ - - def __init__(self, **kwargs): - pass - - def __call__(self, data): - img = data['image'] - from PIL import Image - if isinstance(img, Image.Image): - img = np.array(img) - data['image'] = img.transpose((2, 0, 1)) - return data - - -class Fasttext(object): - def __init__(self, path="None", **kwargs): - import fasttext - self.fast_model = fasttext.load_model(path) - - def __call__(self, data): - label = data['label'] - fast_label = self.fast_model[label] - data['fast_label'] = fast_label - return data - - -class KeepKeys(object): - def __init__(self, keep_keys, **kwargs): - self.keep_keys = keep_keys - - def __call__(self, data): - data_list = [] - for key in self.keep_keys: - data_list.append(data[key]) - return data_list - - -class Pad(object): - def __init__(self, size=None, size_div=32, **kwargs): - if size is not None and not isinstance(size, (int, list, tuple)): - raise TypeError("Type of target_size is invalid. Now is {}".format( - type(size))) - if isinstance(size, int): - size = [size, size] - self.size = size - self.size_div = size_div - - def __call__(self, data): - - img = data['image'] - img_h, img_w = img.shape[0], img.shape[1] - if self.size: - resize_h2, resize_w2 = self.size - assert ( - img_h < resize_h2 and img_w < resize_w2 - ), '(h, w) of target size should be greater than (img_h, img_w)' - else: - resize_h2 = max( - int(math.ceil(img.shape[0] / self.size_div) * self.size_div), - self.size_div) - resize_w2 = max( - int(math.ceil(img.shape[1] / self.size_div) * self.size_div), - self.size_div) - img = cv2.copyMakeBorder( - img, - 0, - resize_h2 - img_h, - 0, - resize_w2 - img_w, - cv2.BORDER_CONSTANT, - value=0) - data['image'] = img - return data - - -class Resize(object): - def __init__(self, size=(640, 640), **kwargs): - self.size = size - - def resize_image(self, img): - resize_h, resize_w = self.size - ori_h, ori_w = img.shape[:2] # (h, w, c) - ratio_h = float(resize_h) / ori_h - ratio_w = float(resize_w) / ori_w - img = cv2.resize(img, (int(resize_w), int(resize_h))) - return img, [ratio_h, ratio_w] - - def __call__(self, data): - img = data['image'] - if 'polys' in data: - text_polys = data['polys'] - - img_resize, [ratio_h, ratio_w] = self.resize_image(img) - if 'polys' in data: - new_boxes = [] - for box in text_polys: - new_box = [] - for cord in box: - new_box.append([cord[0] * ratio_w, cord[1] * ratio_h]) - new_boxes.append(new_box) - data['polys'] = np.array(new_boxes, dtype=np.float32) - data['image'] = img_resize - return data - - -class DetResizeForTest(object): - def __init__(self, **kwargs): - super(DetResizeForTest, self).__init__() - self.resize_type = 0 - if 'image_shape' in kwargs: - self.image_shape = kwargs['image_shape'] - self.resize_type = 1 - elif 'limit_side_len' in kwargs: - self.limit_side_len = kwargs['limit_side_len'] - self.limit_type = kwargs.get('limit_type', 'min') - elif 'resize_long' in kwargs: - self.resize_type = 2 - self.resize_long = kwargs.get('resize_long', 960) - else: - self.limit_side_len = 736 - self.limit_type = 'min' - - def __call__(self, data): - img = data['image'] - src_h, src_w, _ = img.shape - - if self.resize_type == 0: - # img, shape = self.resize_image_type0(img) - img, [ratio_h, ratio_w] = self.resize_image_type0(img) - elif self.resize_type == 2: - img, [ratio_h, ratio_w] = self.resize_image_type2(img) - else: - # img, shape = self.resize_image_type1(img) - img, [ratio_h, ratio_w] = self.resize_image_type1(img) - data['image'] = img - data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) - return data - - def resize_image_type1(self, img): - resize_h, resize_w = self.image_shape - ori_h, ori_w = img.shape[:2] # (h, w, c) - ratio_h = float(resize_h) / ori_h - ratio_w = float(resize_w) / ori_w - img = cv2.resize(img, (int(resize_w), int(resize_h))) - # return img, np.array([ori_h, ori_w]) - return img, [ratio_h, ratio_w] - - def resize_image_type0(self, img): - """ - resize image to a size multiple of 32 which is required by the network - args: - img(array): array with shape [h, w, c] - return(tuple): - img, (ratio_h, ratio_w) - """ - limit_side_len = self.limit_side_len - h, w, c = img.shape - - # limit the max side - if self.limit_type == 'max': - if max(h, w) > limit_side_len: - if h > w: - ratio = float(limit_side_len) / h - else: - ratio = float(limit_side_len) / w - else: - ratio = 1. - elif self.limit_type == 'min': - if min(h, w) < limit_side_len: - if h < w: - ratio = float(limit_side_len) / h - else: - ratio = float(limit_side_len) / w - else: - ratio = 1. - elif self.limit_type == 'resize_long': - ratio = float(limit_side_len) / max(h, w) - else: - raise Exception('not support limit type, image ') - resize_h = int(h * ratio) - resize_w = int(w * ratio) - - resize_h = max(int(round(resize_h / 32) * 32), 32) - resize_w = max(int(round(resize_w / 32) * 32), 32) - - try: - if int(resize_w) <= 0 or int(resize_h) <= 0: - return None, (None, None) - img = cv2.resize(img, (int(resize_w), int(resize_h))) - except: - print(img.shape, resize_w, resize_h) - sys.exit(0) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return img, [ratio_h, ratio_w] - - def resize_image_type2(self, img): - h, w, _ = img.shape - - resize_w = w - resize_h = h - - if resize_h > resize_w: - ratio = float(self.resize_long) / resize_h - else: - ratio = float(self.resize_long) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - img = cv2.resize(img, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - - return img, [ratio_h, ratio_w] - - -class E2EResizeForTest(object): - def __init__(self, **kwargs): - super(E2EResizeForTest, self).__init__() - self.max_side_len = kwargs['max_side_len'] - self.valid_set = kwargs['valid_set'] - - def __call__(self, data): - img = data['image'] - src_h, src_w, _ = img.shape - if self.valid_set == 'totaltext': - im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext( - img, max_side_len=self.max_side_len) - else: - im_resized, (ratio_h, ratio_w) = self.resize_image( - img, max_side_len=self.max_side_len) - data['image'] = im_resized - data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) - return data - - def resize_image_for_totaltext(self, im, max_side_len=512): - - h, w, _ = im.shape - resize_w = w - resize_h = h - ratio = 1.25 - if h * ratio > max_side_len: - ratio = float(max_side_len) / resize_h - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return im, (ratio_h, ratio_w) - - def resize_image(self, im, max_side_len=512): - """ - resize image to a size multiple of max_stride which is required by the network - :param im: the resized image - :param max_side_len: limit of max image size to avoid out of memory in gpu - :return: the resized image and the resize ratio - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - - # Fix the longer side - if resize_h > resize_w: - ratio = float(max_side_len) / resize_h - else: - ratio = float(max_side_len) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - - return im, (ratio_h, ratio_w) - - -class KieResize(object): - def __init__(self, **kwargs): - super(KieResize, self).__init__() - self.max_side, self.min_side = kwargs['img_scale'][0], kwargs[ - 'img_scale'][1] - - def __call__(self, data): - img = data['image'] - points = data['points'] - src_h, src_w, _ = img.shape - im_resized, scale_factor, [ratio_h, ratio_w - ], [new_h, new_w] = self.resize_image(img) - resize_points = self.resize_boxes(img, points, scale_factor) - data['ori_image'] = img - data['ori_boxes'] = points - data['points'] = resize_points - data['image'] = im_resized - data['shape'] = np.array([new_h, new_w]) - return data - - def resize_image(self, img): - norm_img = np.zeros([1024, 1024, 3], dtype='float32') - scale = [512, 1024] - h, w = img.shape[:2] - max_long_edge = max(scale) - max_short_edge = min(scale) - scale_factor = min(max_long_edge / max(h, w), - max_short_edge / min(h, w)) - resize_w, resize_h = int(w * float(scale_factor) + 0.5), int(h * float( - scale_factor) + 0.5) - max_stride = 32 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(img, (resize_w, resize_h)) - new_h, new_w = im.shape[:2] - w_scale = new_w / w - h_scale = new_h / h - scale_factor = np.array( - [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) - norm_img[:new_h, :new_w, :] = im - return norm_img, scale_factor, [h_scale, w_scale], [new_h, new_w] - - def resize_boxes(self, im, points, scale_factor): - points = points * scale_factor - img_shape = im.shape[:2] - points[:, 0::2] = np.clip(points[:, 0::2], 0, img_shape[1]) - points[:, 1::2] = np.clip(points[:, 1::2], 0, img_shape[0]) - return points diff --git a/backend/ppocr/data/imaug/pg_process.py b/backend/ppocr/data/imaug/pg_process.py deleted file mode 100644 index 5303106..0000000 --- a/backend/ppocr/data/imaug/pg_process.py +++ /dev/null @@ -1,906 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import cv2 -import numpy as np - -__all__ = ['PGProcessTrain'] - - -class PGProcessTrain(object): - def __init__(self, - character_dict_path, - max_text_length, - max_text_nums, - tcl_len, - batch_size=14, - min_crop_size=24, - min_text_size=4, - max_text_size=512, - **kwargs): - self.tcl_len = tcl_len - self.max_text_length = max_text_length - self.max_text_nums = max_text_nums - self.batch_size = batch_size - self.min_crop_size = min_crop_size - self.min_text_size = min_text_size - self.max_text_size = max_text_size - self.Lexicon_Table = self.get_dict(character_dict_path) - self.pad_num = len(self.Lexicon_Table) - self.img_id = 0 - - def get_dict(self, character_dict_path): - character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - character_str += line - dict_character = list(character_str) - return dict_character - - def quad_area(self, poly): - """ - compute area of a polygon - :param poly: - :return: - """ - edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), - (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), - (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), - (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])] - return np.sum(edge) / 2. - - def gen_quad_from_poly(self, poly): - """ - Generate min area quad from poly. - """ - point_num = poly.shape[0] - min_area_quad = np.zeros((4, 2), dtype=np.float32) - rect = cv2.minAreaRect(poly.astype( - np.int32)) # (center (x,y), (width, height), angle of rotation) - box = np.array(cv2.boxPoints(rect)) - - first_point_idx = 0 - min_dist = 1e4 - for i in range(4): - dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \ - np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \ - np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \ - np.linalg.norm(box[(i + 3) % 4] - poly[-1]) - if dist < min_dist: - min_dist = dist - first_point_idx = i - for i in range(4): - min_area_quad[i] = box[(first_point_idx + i) % 4] - - return min_area_quad - - def check_and_validate_polys(self, polys, tags, im_size): - """ - check so that the text poly is in the same direction, - and also filter some invalid polygons - :param polys: - :param tags: - :return: - """ - (h, w) = im_size - if polys.shape[0] == 0: - return polys, np.array([]), np.array([]) - polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) - polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1) - - validated_polys = [] - validated_tags = [] - hv_tags = [] - for poly, tag in zip(polys, tags): - quad = self.gen_quad_from_poly(poly) - p_area = self.quad_area(quad) - if abs(p_area) < 1: - print('invalid poly') - continue - if p_area > 0: - if tag == False: - print('poly in wrong direction') - tag = True # reversed cases should be ignore - poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, - 1), :] - quad = quad[(0, 3, 2, 1), :] - - len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] - - quad[2]) - len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - - quad[2]) - hv_tag = 1 - - if len_w * 2.0 < len_h: - hv_tag = 0 - - validated_polys.append(poly) - validated_tags.append(tag) - hv_tags.append(hv_tag) - return np.array(validated_polys), np.array(validated_tags), np.array( - hv_tags) - - def crop_area(self, - im, - polys, - tags, - hv_tags, - txts, - crop_background=False, - max_tries=25): - """ - make random crop from the input image - :param im: - :param polys: [b,4,2] - :param tags: - :param crop_background: - :param max_tries: 50 -> 25 - :return: - """ - h, w, _ = im.shape - pad_h = h // 10 - pad_w = w // 10 - h_array = np.zeros((h + pad_h * 2), dtype=np.int32) - w_array = np.zeros((w + pad_w * 2), dtype=np.int32) - for poly in polys: - poly = np.round(poly, decimals=0).astype(np.int32) - minx = np.min(poly[:, 0]) - maxx = np.max(poly[:, 0]) - w_array[minx + pad_w:maxx + pad_w] = 1 - miny = np.min(poly[:, 1]) - maxy = np.max(poly[:, 1]) - h_array[miny + pad_h:maxy + pad_h] = 1 - # ensure the cropped area not across a text - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - if len(h_axis) == 0 or len(w_axis) == 0: - return im, polys, tags, hv_tags, txts - for i in range(max_tries): - xx = np.random.choice(w_axis, size=2) - xmin = np.min(xx) - pad_w - xmax = np.max(xx) - pad_w - xmin = np.clip(xmin, 0, w - 1) - xmax = np.clip(xmax, 0, w - 1) - yy = np.random.choice(h_axis, size=2) - ymin = np.min(yy) - pad_h - ymax = np.max(yy) - pad_h - ymin = np.clip(ymin, 0, h - 1) - ymax = np.clip(ymax, 0, h - 1) - if xmax - xmin < self.min_crop_size or \ - ymax - ymin < self.min_crop_size: - continue - if polys.shape[0] != 0: - poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \ - & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax) - selected_polys = np.where( - np.sum(poly_axis_in_area, axis=1) == 4)[0] - else: - selected_polys = [] - if len(selected_polys) == 0: - # no text in this area - if crop_background: - txts_tmp = [] - for selected_poly in selected_polys: - txts_tmp.append(txts[selected_poly]) - txts = txts_tmp - return im[ymin: ymax + 1, xmin: xmax + 1, :], \ - polys[selected_polys], tags[selected_polys], hv_tags[selected_polys], txts - else: - continue - im = im[ymin:ymax + 1, xmin:xmax + 1, :] - polys = polys[selected_polys] - tags = tags[selected_polys] - hv_tags = hv_tags[selected_polys] - txts_tmp = [] - for selected_poly in selected_polys: - txts_tmp.append(txts[selected_poly]) - txts = txts_tmp - polys[:, :, 0] -= xmin - polys[:, :, 1] -= ymin - return im, polys, tags, hv_tags, txts - - return im, polys, tags, hv_tags, txts - - def fit_and_gather_tcl_points_v2(self, - min_area_quad, - poly, - max_h, - max_w, - fixed_point_num=64, - img_id=0, - reference_height=3): - """ - Find the center point of poly as key_points, then fit and gather. - """ - key_point_xys = [] - point_num = poly.shape[0] - for idx in range(point_num // 2): - center_point = (poly[idx] + poly[point_num - 1 - idx]) / 2.0 - key_point_xys.append(center_point) - - tmp_image = np.zeros( - shape=( - max_h, - max_w, ), dtype='float32') - cv2.polylines(tmp_image, [np.array(key_point_xys).astype('int32')], - False, 1.0) - ys, xs = np.where(tmp_image > 0) - xy_text = np.array(list(zip(xs, ys)), dtype='float32') - - left_center_pt = ( - (min_area_quad[0] - min_area_quad[1]) / 2.0).reshape(1, 2) - right_center_pt = ( - (min_area_quad[1] - min_area_quad[2]) / 2.0).reshape(1, 2) - proj_unit_vec = (right_center_pt - left_center_pt) / ( - np.linalg.norm(right_center_pt - left_center_pt) + 1e-6) - proj_unit_vec_tile = np.tile(proj_unit_vec, - (xy_text.shape[0], 1)) # (n, 2) - left_center_pt_tile = np.tile(left_center_pt, - (xy_text.shape[0], 1)) # (n, 2) - xy_text_to_left_center = xy_text - left_center_pt_tile - proj_value = np.sum(xy_text_to_left_center * proj_unit_vec_tile, axis=1) - xy_text = xy_text[np.argsort(proj_value)] - - # convert to np and keep the num of point not greater then fixed_point_num - pos_info = np.array(xy_text).reshape(-1, 2)[:, ::-1] # xy-> yx - point_num = len(pos_info) - if point_num > fixed_point_num: - keep_ids = [ - int((point_num * 1.0 / fixed_point_num) * x) - for x in range(fixed_point_num) - ] - pos_info = pos_info[keep_ids, :] - - keep = int(min(len(pos_info), fixed_point_num)) - if np.random.rand() < 0.2 and reference_height >= 3: - dl = (np.random.rand(keep) - 0.5) * reference_height * 0.3 - random_float = np.array([1, 0]).reshape([1, 2]) * dl.reshape( - [keep, 1]) - pos_info += random_float - pos_info[:, 0] = np.clip(pos_info[:, 0], 0, max_h - 1) - pos_info[:, 1] = np.clip(pos_info[:, 1], 0, max_w - 1) - - # padding to fixed length - pos_l = np.zeros((self.tcl_len, 3), dtype=np.int32) - pos_l[:, 0] = np.ones((self.tcl_len, )) * img_id - pos_m = np.zeros((self.tcl_len, 1), dtype=np.float32) - pos_l[:keep, 1:] = np.round(pos_info).astype(np.int32) - pos_m[:keep] = 1.0 - return pos_l, pos_m - - def generate_direction_map(self, poly_quads, n_char, direction_map): - """ - """ - width_list = [] - height_list = [] - for quad in poly_quads: - quad_w = (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) / 2.0 - quad_h = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[2] - quad[1])) / 2.0 - width_list.append(quad_w) - height_list.append(quad_h) - norm_width = max(sum(width_list) / n_char, 1.0) - average_height = max(sum(height_list) / len(height_list), 1.0) - k = 1 - for quad in poly_quads: - direct_vector_full = ( - (quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0 - direct_vector = direct_vector_full / ( - np.linalg.norm(direct_vector_full) + 1e-6) * norm_width - direction_label = tuple( - map(float, - [direct_vector[0], direct_vector[1], 1.0 / average_height])) - cv2.fillPoly(direction_map, - quad.round().astype(np.int32)[np.newaxis, :, :], - direction_label) - k += 1 - return direction_map - - def calculate_average_height(self, poly_quads): - """ - """ - height_list = [] - for quad in poly_quads: - quad_h = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[2] - quad[1])) / 2.0 - height_list.append(quad_h) - average_height = max(sum(height_list) / len(height_list), 1.0) - return average_height - - def generate_tcl_ctc_label(self, - h, - w, - polys, - tags, - text_strs, - ds_ratio, - tcl_ratio=0.3, - shrink_ratio_of_width=0.15): - """ - Generate polygon. - """ - score_map_big = np.zeros( - ( - h, - w, ), dtype=np.float32) - h, w = int(h * ds_ratio), int(w * ds_ratio) - polys = polys * ds_ratio - - score_map = np.zeros( - ( - h, - w, ), dtype=np.float32) - score_label_map = np.zeros( - ( - h, - w, ), dtype=np.float32) - tbo_map = np.zeros((h, w, 5), dtype=np.float32) - training_mask = np.ones( - ( - h, - w, ), dtype=np.float32) - direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape( - [1, 1, 3]).astype(np.float32) - - label_idx = 0 - score_label_map_text_label_list = [] - pos_list, pos_mask, label_list = [], [], [] - for poly_idx, poly_tag in enumerate(zip(polys, tags)): - poly = poly_tag[0] - tag = poly_tag[1] - - # generate min_area_quad - min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) - min_area_quad_h = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + - np.linalg.norm(min_area_quad[1] - min_area_quad[2])) - min_area_quad_w = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + - np.linalg.norm(min_area_quad[2] - min_area_quad[3])) - - if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \ - or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio: - continue - - if tag: - cv2.fillPoly(training_mask, - poly.astype(np.int32)[np.newaxis, :, :], 0.15) - else: - text_label = text_strs[poly_idx] - text_label = self.prepare_text_label(text_label, - self.Lexicon_Table) - - text_label_index_list = [[self.Lexicon_Table.index(c_)] - for c_ in text_label - if c_ in self.Lexicon_Table] - if len(text_label_index_list) < 1: - continue - - tcl_poly = self.poly2tcl(poly, tcl_ratio) - tcl_quads = self.poly2quads(tcl_poly) - poly_quads = self.poly2quads(poly) - - stcl_quads, quad_index = self.shrink_poly_along_width( - tcl_quads, - shrink_ratio_of_width=shrink_ratio_of_width, - expand_height_ratio=1.0 / tcl_ratio) - - cv2.fillPoly(score_map, - np.round(stcl_quads).astype(np.int32), 1.0) - cv2.fillPoly(score_map_big, - np.round(stcl_quads / ds_ratio).astype(np.int32), - 1.0) - - for idx, quad in enumerate(stcl_quads): - quad_mask = np.zeros((h, w), dtype=np.float32) - quad_mask = cv2.fillPoly( - quad_mask, - np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0) - tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]], - quad_mask, tbo_map) - - # score label map and score_label_map_text_label_list for refine - if label_idx == 0: - text_pos_list_ = [[len(self.Lexicon_Table)], ] - score_label_map_text_label_list.append(text_pos_list_) - - label_idx += 1 - cv2.fillPoly(score_label_map, - np.round(poly_quads).astype(np.int32), label_idx) - score_label_map_text_label_list.append(text_label_index_list) - - # direction info, fix-me - n_char = len(text_label_index_list) - direction_map = self.generate_direction_map(poly_quads, n_char, - direction_map) - - # pos info - average_shrink_height = self.calculate_average_height( - stcl_quads) - pos_l, pos_m = self.fit_and_gather_tcl_points_v2( - min_area_quad, - poly, - max_h=h, - max_w=w, - fixed_point_num=64, - img_id=self.img_id, - reference_height=average_shrink_height) - - label_l = text_label_index_list - if len(text_label_index_list) < 2: - continue - - pos_list.append(pos_l) - pos_mask.append(pos_m) - label_list.append(label_l) - - # use big score_map for smooth tcl lines - score_map_big_resized = cv2.resize( - score_map_big, dsize=None, fx=ds_ratio, fy=ds_ratio) - score_map = np.array(score_map_big_resized > 1e-3, dtype='float32') - - return score_map, score_label_map, tbo_map, direction_map, training_mask, \ - pos_list, pos_mask, label_list, score_label_map_text_label_list - - def adjust_point(self, poly): - """ - adjust point order. - """ - point_num = poly.shape[0] - if point_num == 4: - len_1 = np.linalg.norm(poly[0] - poly[1]) - len_2 = np.linalg.norm(poly[1] - poly[2]) - len_3 = np.linalg.norm(poly[2] - poly[3]) - len_4 = np.linalg.norm(poly[3] - poly[0]) - - if (len_1 + len_3) * 1.5 < (len_2 + len_4): - poly = poly[[1, 2, 3, 0], :] - - elif point_num > 4: - vector_1 = poly[0] - poly[1] - vector_2 = poly[1] - poly[2] - cos_theta = np.dot(vector_1, vector_2) / ( - np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6) - theta = np.arccos(np.round(cos_theta, decimals=4)) - - if abs(theta) > (70 / 180 * math.pi): - index = list(range(1, point_num)) + [0] - poly = poly[np.array(index), :] - return poly - - def gen_min_area_quad_from_poly(self, poly): - """ - Generate min area quad from poly. - """ - point_num = poly.shape[0] - min_area_quad = np.zeros((4, 2), dtype=np.float32) - if point_num == 4: - min_area_quad = poly - center_point = np.sum(poly, axis=0) / 4 - else: - rect = cv2.minAreaRect(poly.astype( - np.int32)) # (center (x,y), (width, height), angle of rotation) - center_point = rect[0] - box = np.array(cv2.boxPoints(rect)) - - first_point_idx = 0 - min_dist = 1e4 - for i in range(4): - dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \ - np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \ - np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \ - np.linalg.norm(box[(i + 3) % 4] - poly[-1]) - if dist < min_dist: - min_dist = dist - first_point_idx = i - - for i in range(4): - min_area_quad[i] = box[(first_point_idx + i) % 4] - - return min_area_quad, center_point - - def shrink_quad_along_width(self, - quad, - begin_width_ratio=0., - end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - def shrink_poly_along_width(self, - quads, - shrink_ratio_of_width, - expand_height_ratio=1.0): - """ - shrink poly with given length. - """ - upper_edge_list = [] - - def get_cut_info(edge_len_list, cut_len): - for idx, edge_len in enumerate(edge_len_list): - cut_len -= edge_len - if cut_len <= 0.000001: - ratio = (cut_len + edge_len_list[idx]) / edge_len_list[idx] - return idx, ratio - - for quad in quads: - upper_edge_len = np.linalg.norm(quad[0] - quad[1]) - upper_edge_list.append(upper_edge_len) - - # length of left edge and right edge. - left_length = np.linalg.norm(quads[0][0] - quads[0][ - 3]) * expand_height_ratio - right_length = np.linalg.norm(quads[-1][1] - quads[-1][ - 2]) * expand_height_ratio - - shrink_length = min(left_length, right_length, - sum(upper_edge_list)) * shrink_ratio_of_width - # shrinking length - upper_len_left = shrink_length - upper_len_right = sum(upper_edge_list) - shrink_length - - left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left) - left_quad = self.shrink_quad_along_width( - quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1) - right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right) - right_quad = self.shrink_quad_along_width( - quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio) - - out_quad_list = [] - if left_idx == right_idx: - out_quad_list.append( - [left_quad[0], right_quad[1], right_quad[2], left_quad[3]]) - else: - out_quad_list.append(left_quad) - for idx in range(left_idx + 1, right_idx): - out_quad_list.append(quads[idx]) - out_quad_list.append(right_quad) - - return np.array(out_quad_list), list(range(left_idx, right_idx + 1)) - - def prepare_text_label(self, label_str, Lexicon_Table): - """ - Prepare text lablel by given Lexicon_Table. - """ - if len(Lexicon_Table) == 36: - return label_str.lower() - else: - return label_str - - def vector_angle(self, A, B): - """ - Calculate the angle between vector AB and x-axis positive direction. - """ - AB = np.array([B[1] - A[1], B[0] - A[0]]) - return np.arctan2(*AB) - - def theta_line_cross_point(self, theta, point): - """ - Calculate the line through given point and angle in ax + by + c =0 form. - """ - x, y = point - cos = np.cos(theta) - sin = np.sin(theta) - return [sin, -cos, cos * y - sin * x] - - def line_cross_two_point(self, A, B): - """ - Calculate the line through given point A and B in ax + by + c =0 form. - """ - angle = self.vector_angle(A, B) - return self.theta_line_cross_point(angle, A) - - def average_angle(self, poly): - """ - Calculate the average angle between left and right edge in given poly. - """ - p0, p1, p2, p3 = poly - angle30 = self.vector_angle(p3, p0) - angle21 = self.vector_angle(p2, p1) - return (angle30 + angle21) / 2 - - def line_cross_point(self, line1, line2): - """ - line1 and line2 in 0=ax+by+c form, compute the cross point of line1 and line2 - """ - a1, b1, c1 = line1 - a2, b2, c2 = line2 - d = a1 * b2 - a2 * b1 - - if d == 0: - print('Cross point does not exist') - return np.array([0, 0], dtype=np.float32) - else: - x = (b1 * c2 - b2 * c1) / d - y = (a2 * c1 - a1 * c2) / d - - return np.array([x, y], dtype=np.float32) - - def quad2tcl(self, poly, ratio): - """ - Generate center line by poly clock-wise point. (4, 2) - """ - ratio_pair = np.array( - [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) - p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair - p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair - return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]]) - - def poly2tcl(self, poly, ratio): - """ - Generate center line by poly clock-wise point. - """ - ratio_pair = np.array( - [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) - tcl_poly = np.zeros_like(poly) - point_num = poly.shape[0] - - for idx in range(point_num // 2): - point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx] - ) * ratio_pair - tcl_poly[idx] = point_pair[0] - tcl_poly[point_num - 1 - idx] = point_pair[1] - return tcl_poly - - def gen_quad_tbo(self, quad, tcl_mask, tbo_map): - """ - Generate tbo_map for give quad. - """ - # upper and lower line function: ax + by + c = 0; - up_line = self.line_cross_two_point(quad[0], quad[1]) - lower_line = self.line_cross_two_point(quad[3], quad[2]) - - quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[1] - quad[2])) - quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) - - # average angle of left and right line. - angle = self.average_angle(quad) - - xy_in_poly = np.argwhere(tcl_mask == 1) - for y, x in xy_in_poly: - point = (x, y) - line = self.theta_line_cross_point(angle, point) - cross_point_upper = self.line_cross_point(up_line, line) - cross_point_lower = self.line_cross_point(lower_line, line) - ##FIX, offset reverse - upper_offset_x, upper_offset_y = cross_point_upper - point - lower_offset_x, lower_offset_y = cross_point_lower - point - tbo_map[y, x, 0] = upper_offset_y - tbo_map[y, x, 1] = upper_offset_x - tbo_map[y, x, 2] = lower_offset_y - tbo_map[y, x, 3] = lower_offset_x - tbo_map[y, x, 4] = 1.0 / max(min(quad_h, quad_w), 1.0) * 2 - return tbo_map - - def poly2quads(self, poly): - """ - Split poly into quads. - """ - quad_list = [] - point_num = poly.shape[0] - - # point pair - point_pair_list = [] - for idx in range(point_num // 2): - point_pair = [poly[idx], poly[point_num - 1 - idx]] - point_pair_list.append(point_pair) - - quad_num = point_num // 2 - 1 - for idx in range(quad_num): - # reshape and adjust to clock-wise - quad_list.append((np.array(point_pair_list)[[idx, idx + 1]] - ).reshape(4, 2)[[0, 2, 3, 1]]) - - return np.array(quad_list) - - def rotate_im_poly(self, im, text_polys): - """ - rotate image with 90 / 180 / 270 degre - """ - im_w, im_h = im.shape[1], im.shape[0] - dst_im = im.copy() - dst_polys = [] - rand_degree_ratio = np.random.rand() - rand_degree_cnt = 1 - if rand_degree_ratio > 0.5: - rand_degree_cnt = 3 - for i in range(rand_degree_cnt): - dst_im = np.rot90(dst_im) - rot_degree = -90 * rand_degree_cnt - rot_angle = rot_degree * math.pi / 180.0 - n_poly = text_polys.shape[0] - cx, cy = 0.5 * im_w, 0.5 * im_h - ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0] - for i in range(n_poly): - wordBB = text_polys[i] - poly = [] - for j in range(4): # 16->4 - sx, sy = wordBB[j][0], wordBB[j][1] - dx = math.cos(rot_angle) * (sx - cx) - math.sin(rot_angle) * ( - sy - cy) + ncx - dy = math.sin(rot_angle) * (sx - cx) + math.cos(rot_angle) * ( - sy - cy) + ncy - poly.append([dx, dy]) - dst_polys.append(poly) - return dst_im, np.array(dst_polys, dtype=np.float32) - - def __call__(self, data): - input_size = 512 - im = data['image'] - text_polys = data['polys'] - text_tags = data['ignore_tags'] - text_strs = data['texts'] - h, w, _ = im.shape - text_polys, text_tags, hv_tags = self.check_and_validate_polys( - text_polys, text_tags, (h, w)) - if text_polys.shape[0] <= 0: - return None - # set aspect ratio and keep area fix - asp_scales = np.arange(1.0, 1.55, 0.1) - asp_scale = np.random.choice(asp_scales) - if np.random.rand() < 0.5: - asp_scale = 1.0 / asp_scale - asp_scale = math.sqrt(asp_scale) - - asp_wx = asp_scale - asp_hy = 1.0 / asp_scale - im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy) - text_polys[:, :, 0] *= asp_wx - text_polys[:, :, 1] *= asp_hy - - h, w, _ = im.shape - if max(h, w) > 2048: - rd_scale = 2048.0 / max(h, w) - im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale) - text_polys *= rd_scale - h, w, _ = im.shape - if min(h, w) < 16: - return None - - # no background - im, text_polys, text_tags, hv_tags, text_strs = self.crop_area( - im, - text_polys, - text_tags, - hv_tags, - text_strs, - crop_background=False) - - if text_polys.shape[0] == 0: - return None - # # continue for all ignore case - if np.sum((text_tags * 1.0)) >= text_tags.size: - return None - new_h, new_w, _ = im.shape - if (new_h is None) or (new_w is None): - return None - # resize image - std_ratio = float(input_size) / max(new_w, new_h) - rand_scales = np.array( - [0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0]) - rz_scale = std_ratio * np.random.choice(rand_scales) - im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale) - text_polys[:, :, 0] *= rz_scale - text_polys[:, :, 1] *= rz_scale - - # add gaussian blur - if np.random.rand() < 0.1 * 0.5: - ks = np.random.permutation(5)[0] + 1 - ks = int(ks / 2) * 2 + 1 - im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0) - # add brighter - if np.random.rand() < 0.1 * 0.5: - im = im * (1.0 + np.random.rand() * 0.5) - im = np.clip(im, 0.0, 255.0) - # add darker - if np.random.rand() < 0.1 * 0.5: - im = im * (1.0 - np.random.rand() * 0.5) - im = np.clip(im, 0.0, 255.0) - - # Padding the im to [input_size, input_size] - new_h, new_w, _ = im.shape - if min(new_w, new_h) < input_size * 0.5: - return None - im_padded = np.ones((input_size, input_size, 3), dtype=np.float32) - im_padded[:, :, 2] = 0.485 * 255 - im_padded[:, :, 1] = 0.456 * 255 - im_padded[:, :, 0] = 0.406 * 255 - - # Random the start position - del_h = input_size - new_h - del_w = input_size - new_w - sh, sw = 0, 0 - if del_h > 1: - sh = int(np.random.rand() * del_h) - if del_w > 1: - sw = int(np.random.rand() * del_w) - - # Padding - im_padded[sh:sh + new_h, sw:sw + new_w, :] = im.copy() - text_polys[:, :, 0] += sw - text_polys[:, :, 1] += sh - - score_map, score_label_map, border_map, direction_map, training_mask, \ - pos_list, pos_mask, label_list, score_label_map_text_label = self.generate_tcl_ctc_label(input_size, - input_size, - text_polys, - text_tags, - text_strs, 0.25) - if len(label_list) <= 0: # eliminate negative samples - return None - pos_list_temp = np.zeros([64, 3]) - pos_mask_temp = np.zeros([64, 1]) - label_list_temp = np.zeros([self.max_text_length, 1]) + self.pad_num - - for i, label in enumerate(label_list): - n = len(label) - if n > self.max_text_length: - label_list[i] = label[:self.max_text_length] - continue - while n < self.max_text_length: - label.append([self.pad_num]) - n += 1 - - for i in range(len(label_list)): - label_list[i] = np.array(label_list[i]) - - if len(pos_list) <= 0 or len(pos_list) > self.max_text_nums: - return None - for __ in range(self.max_text_nums - len(pos_list), 0, -1): - pos_list.append(pos_list_temp) - pos_mask.append(pos_mask_temp) - label_list.append(label_list_temp) - - if self.img_id == self.batch_size - 1: - self.img_id = 0 - else: - self.img_id += 1 - - im_padded[:, :, 2] -= 0.485 * 255 - im_padded[:, :, 1] -= 0.456 * 255 - im_padded[:, :, 0] -= 0.406 * 255 - im_padded[:, :, 2] /= (255.0 * 0.229) - im_padded[:, :, 1] /= (255.0 * 0.224) - im_padded[:, :, 0] /= (255.0 * 0.225) - im_padded = im_padded.transpose((2, 0, 1)) - images = im_padded[::-1, :, :] - tcl_maps = score_map[np.newaxis, :, :] - tcl_label_maps = score_label_map[np.newaxis, :, :] - border_maps = border_map.transpose((2, 0, 1)) - direction_maps = direction_map.transpose((2, 0, 1)) - training_masks = training_mask[np.newaxis, :, :] - pos_list = np.array(pos_list) - pos_mask = np.array(pos_mask) - label_list = np.array(label_list) - data['images'] = images - data['tcl_maps'] = tcl_maps - data['tcl_label_maps'] = tcl_label_maps - data['border_maps'] = border_maps - data['direction_maps'] = direction_maps - data['training_masks'] = training_masks - data['label_list'] = label_list - data['pos_list'] = pos_list - data['pos_mask'] = pos_mask - return data diff --git a/backend/ppocr/data/imaug/randaugment.py b/backend/ppocr/data/imaug/randaugment.py deleted file mode 100644 index 56f114d..0000000 --- a/backend/ppocr/data/imaug/randaugment.py +++ /dev/null @@ -1,143 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from PIL import Image, ImageEnhance, ImageOps -import numpy as np -import random -import six - - -class RawRandAugment(object): - def __init__(self, - num_layers=2, - magnitude=5, - fillcolor=(128, 128, 128), - **kwargs): - self.num_layers = num_layers - self.magnitude = magnitude - self.max_level = 10 - - abso_level = self.magnitude / self.max_level - self.level_map = { - "shearX": 0.3 * abso_level, - "shearY": 0.3 * abso_level, - "translateX": 150.0 / 331 * abso_level, - "translateY": 150.0 / 331 * abso_level, - "rotate": 30 * abso_level, - "color": 0.9 * abso_level, - "posterize": int(4.0 * abso_level), - "solarize": 256.0 * abso_level, - "contrast": 0.9 * abso_level, - "sharpness": 0.9 * abso_level, - "brightness": 0.9 * abso_level, - "autocontrast": 0, - "equalize": 0, - "invert": 0 - } - - # from https://stackoverflow.com/questions/5252170/ - # specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand - def rotate_with_fill(img, magnitude): - rot = img.convert("RGBA").rotate(magnitude) - return Image.composite(rot, - Image.new("RGBA", rot.size, (128, ) * 4), - rot).convert(img.mode) - - rnd_ch_op = random.choice - - self.func = { - "shearX": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, magnitude * rnd_ch_op([-1, 1]), 0, 0, 1, 0), - Image.BICUBIC, - fillcolor=fillcolor), - "shearY": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, 0, 0, magnitude * rnd_ch_op([-1, 1]), 1, 0), - Image.BICUBIC, - fillcolor=fillcolor), - "translateX": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, 0, magnitude * img.size[0] * rnd_ch_op([-1, 1]), 0, 1, 0), - fillcolor=fillcolor), - "translateY": lambda img, magnitude: img.transform( - img.size, - Image.AFFINE, - (1, 0, 0, 0, 1, magnitude * img.size[1] * rnd_ch_op([-1, 1])), - fillcolor=fillcolor), - "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), - "color": lambda img, magnitude: ImageEnhance.Color(img).enhance( - 1 + magnitude * rnd_ch_op([-1, 1])), - "posterize": lambda img, magnitude: - ImageOps.posterize(img, magnitude), - "solarize": lambda img, magnitude: - ImageOps.solarize(img, magnitude), - "contrast": lambda img, magnitude: - ImageEnhance.Contrast(img).enhance( - 1 + magnitude * rnd_ch_op([-1, 1])), - "sharpness": lambda img, magnitude: - ImageEnhance.Sharpness(img).enhance( - 1 + magnitude * rnd_ch_op([-1, 1])), - "brightness": lambda img, magnitude: - ImageEnhance.Brightness(img).enhance( - 1 + magnitude * rnd_ch_op([-1, 1])), - "autocontrast": lambda img, magnitude: - ImageOps.autocontrast(img), - "equalize": lambda img, magnitude: ImageOps.equalize(img), - "invert": lambda img, magnitude: ImageOps.invert(img) - } - - def __call__(self, img): - avaiable_op_names = list(self.level_map.keys()) - for layer_num in range(self.num_layers): - op_name = np.random.choice(avaiable_op_names) - img = self.func[op_name](img, self.level_map[op_name]) - return img - - -class RandAugment(RawRandAugment): - """ RandAugment wrapper to auto fit different img types """ - - def __init__(self, prob=0.5, *args, **kwargs): - self.prob = prob - if six.PY2: - super(RandAugment, self).__init__(*args, **kwargs) - else: - super().__init__(*args, **kwargs) - - def __call__(self, data): - if np.random.rand() > self.prob: - return data - img = data['image'] - if not isinstance(img, Image.Image): - img = np.ascontiguousarray(img) - img = Image.fromarray(img) - - if six.PY2: - img = super(RandAugment, self).__call__(img) - else: - img = super().__call__(img) - - if isinstance(img, Image.Image): - img = np.asarray(img) - data['image'] = img - return data diff --git a/backend/ppocr/data/imaug/random_crop_data.py b/backend/ppocr/data/imaug/random_crop_data.py deleted file mode 100644 index 64aa110..0000000 --- a/backend/ppocr/data/imaug/random_crop_data.py +++ /dev/null @@ -1,234 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/random_crop_data.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import cv2 -import random - - -def is_poly_in_rect(poly, x, y, w, h): - poly = np.array(poly) - if poly[:, 0].min() < x or poly[:, 0].max() > x + w: - return False - if poly[:, 1].min() < y or poly[:, 1].max() > y + h: - return False - return True - - -def is_poly_outside_rect(poly, x, y, w, h): - poly = np.array(poly) - if poly[:, 0].max() < x or poly[:, 0].min() > x + w: - return True - if poly[:, 1].max() < y or poly[:, 1].min() > y + h: - return True - return False - - -def split_regions(axis): - regions = [] - min_axis = 0 - for i in range(1, axis.shape[0]): - if axis[i] != axis[i - 1] + 1: - region = axis[min_axis:i] - min_axis = i - regions.append(region) - return regions - - -def random_select(axis, max_size): - xx = np.random.choice(axis, size=2) - xmin = np.min(xx) - xmax = np.max(xx) - xmin = np.clip(xmin, 0, max_size - 1) - xmax = np.clip(xmax, 0, max_size - 1) - return xmin, xmax - - -def region_wise_random_select(regions, max_size): - selected_index = list(np.random.choice(len(regions), 2)) - selected_values = [] - for index in selected_index: - axis = regions[index] - xx = int(np.random.choice(axis, size=1)) - selected_values.append(xx) - xmin = min(selected_values) - xmax = max(selected_values) - return xmin, xmax - - -def crop_area(im, text_polys, min_crop_side_ratio, max_tries): - h, w, _ = im.shape - h_array = np.zeros(h, dtype=np.int32) - w_array = np.zeros(w, dtype=np.int32) - for points in text_polys: - points = np.round(points, decimals=0).astype(np.int32) - minx = np.min(points[:, 0]) - maxx = np.max(points[:, 0]) - w_array[minx:maxx] = 1 - miny = np.min(points[:, 1]) - maxy = np.max(points[:, 1]) - h_array[miny:maxy] = 1 - # ensure the cropped area not across a text - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - - if len(h_axis) == 0 or len(w_axis) == 0: - return 0, 0, w, h - - h_regions = split_regions(h_axis) - w_regions = split_regions(w_axis) - - for i in range(max_tries): - if len(w_regions) > 1: - xmin, xmax = region_wise_random_select(w_regions, w) - else: - xmin, xmax = random_select(w_axis, w) - if len(h_regions) > 1: - ymin, ymax = region_wise_random_select(h_regions, h) - else: - ymin, ymax = random_select(h_axis, h) - - if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h: - # area too small - continue - num_poly_in_rect = 0 - for poly in text_polys: - if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin, - ymax - ymin): - num_poly_in_rect += 1 - break - - if num_poly_in_rect > 0: - return xmin, ymin, xmax - xmin, ymax - ymin - - return 0, 0, w, h - - -class EastRandomCropData(object): - def __init__(self, - size=(640, 640), - max_tries=10, - min_crop_side_ratio=0.1, - keep_ratio=True, - **kwargs): - self.size = size - self.max_tries = max_tries - self.min_crop_side_ratio = min_crop_side_ratio - self.keep_ratio = keep_ratio - - def __call__(self, data): - img = data['image'] - text_polys = data['polys'] - ignore_tags = data['ignore_tags'] - texts = data['texts'] - all_care_polys = [ - text_polys[i] for i, tag in enumerate(ignore_tags) if not tag - ] - # 计算crop区域 - crop_x, crop_y, crop_w, crop_h = crop_area( - img, all_care_polys, self.min_crop_side_ratio, self.max_tries) - # crop 图片 保持比例填充 - scale_w = self.size[0] / crop_w - scale_h = self.size[1] / crop_h - scale = min(scale_w, scale_h) - h = int(crop_h * scale) - w = int(crop_w * scale) - if self.keep_ratio: - padimg = np.zeros((self.size[1], self.size[0], img.shape[2]), - img.dtype) - padimg[:h, :w] = cv2.resize( - img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h)) - img = padimg - else: - img = cv2.resize( - img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], - tuple(self.size)) - # crop 文本框 - text_polys_crop = [] - ignore_tags_crop = [] - texts_crop = [] - for poly, text, tag in zip(text_polys, texts, ignore_tags): - poly = ((poly - (crop_x, crop_y)) * scale).tolist() - if not is_poly_outside_rect(poly, 0, 0, w, h): - text_polys_crop.append(poly) - ignore_tags_crop.append(tag) - texts_crop.append(text) - data['image'] = img - data['polys'] = np.array(text_polys_crop) - data['ignore_tags'] = ignore_tags_crop - data['texts'] = texts_crop - return data - - -class RandomCropImgMask(object): - def __init__(self, size, main_key, crop_keys, p=3 / 8, **kwargs): - self.size = size - self.main_key = main_key - self.crop_keys = crop_keys - self.p = p - - def __call__(self, data): - image = data['image'] - - h, w = image.shape[0:2] - th, tw = self.size - if w == tw and h == th: - return data - - mask = data[self.main_key] - if np.max(mask) > 0 and random.random() > self.p: - # make sure to crop the text region - tl = np.min(np.where(mask > 0), axis=1) - (th, tw) - tl[tl < 0] = 0 - br = np.max(np.where(mask > 0), axis=1) - (th, tw) - br[br < 0] = 0 - - br[0] = min(br[0], h - th) - br[1] = min(br[1], w - tw) - - i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0 - j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0 - else: - i = random.randint(0, h - th) if h - th > 0 else 0 - j = random.randint(0, w - tw) if w - tw > 0 else 0 - - # return i, j, th, tw - for k in data: - if k in self.crop_keys: - if len(data[k].shape) == 3: - if np.argmin(data[k].shape) == 0: - img = data[k][:, i:i + th, j:j + tw] - if img.shape[1] != img.shape[2]: - a = 1 - elif np.argmin(data[k].shape) == 2: - img = data[k][i:i + th, j:j + tw, :] - if img.shape[1] != img.shape[0]: - a = 1 - else: - img = data[k] - else: - img = data[k][i:i + th, j:j + tw] - if img.shape[0] != img.shape[1]: - a = 1 - data[k] = img - return data diff --git a/backend/ppocr/data/imaug/rec_img_aug.py b/backend/ppocr/data/imaug/rec_img_aug.py deleted file mode 100644 index 7483dff..0000000 --- a/backend/ppocr/data/imaug/rec_img_aug.py +++ /dev/null @@ -1,601 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import cv2 -import numpy as np -import random -import copy -from PIL import Image -from .text_image_aug import tia_perspective, tia_stretch, tia_distort - - -class RecAug(object): - def __init__(self, use_tia=True, aug_prob=0.4, **kwargs): - self.use_tia = use_tia - self.aug_prob = aug_prob - - def __call__(self, data): - img = data['image'] - img = warp(img, 10, self.use_tia, self.aug_prob) - data['image'] = img - return data - - -class RecConAug(object): - def __init__(self, - prob=0.5, - image_shape=(32, 320, 3), - max_text_length=25, - ext_data_num=1, - **kwargs): - self.ext_data_num = ext_data_num - self.prob = prob - self.max_text_length = max_text_length - self.image_shape = image_shape - self.max_wh_ratio = self.image_shape[1] / self.image_shape[0] - - def merge_ext_data(self, data, ext_data): - ori_w = round(data['image'].shape[1] / data['image'].shape[0] * - self.image_shape[0]) - ext_w = round(ext_data['image'].shape[1] / ext_data['image'].shape[0] * - self.image_shape[0]) - data['image'] = cv2.resize(data['image'], (ori_w, self.image_shape[0])) - ext_data['image'] = cv2.resize(ext_data['image'], - (ext_w, self.image_shape[0])) - data['image'] = np.concatenate( - [data['image'], ext_data['image']], axis=1) - data["label"] += ext_data["label"] - return data - - def __call__(self, data): - rnd_num = random.random() - if rnd_num > self.prob: - return data - for idx, ext_data in enumerate(data["ext_data"]): - if len(data["label"]) + len(ext_data[ - "label"]) > self.max_text_length: - break - concat_ratio = data['image'].shape[1] / data['image'].shape[ - 0] + ext_data['image'].shape[1] / ext_data['image'].shape[0] - if concat_ratio > self.max_wh_ratio: - break - data = self.merge_ext_data(data, ext_data) - data.pop("ext_data") - return data - - -class ClsResizeImg(object): - def __init__(self, image_shape, **kwargs): - self.image_shape = image_shape - - def __call__(self, data): - img = data['image'] - norm_img, _ = resize_norm_img(img, self.image_shape) - data['image'] = norm_img - return data - - -class NRTRRecResizeImg(object): - def __init__(self, image_shape, resize_type, padding=False, **kwargs): - self.image_shape = image_shape - self.resize_type = resize_type - self.padding = padding - - def __call__(self, data): - img = data['image'] - img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - image_shape = self.image_shape - if self.padding: - imgC, imgH, imgW = image_shape - # todo: change to 0 and modified image shape - h = img.shape[0] - w = img.shape[1] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - norm_img = np.expand_dims(resized_image, -1) - norm_img = norm_img.transpose((2, 0, 1)) - resized_image = norm_img.astype(np.float32) / 128. - 1. - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - data['image'] = padding_im - return data - if self.resize_type == 'PIL': - image_pil = Image.fromarray(np.uint8(img)) - img = image_pil.resize(self.image_shape, Image.ANTIALIAS) - img = np.array(img) - if self.resize_type == 'OpenCV': - img = cv2.resize(img, self.image_shape) - norm_img = np.expand_dims(img, -1) - norm_img = norm_img.transpose((2, 0, 1)) - data['image'] = norm_img.astype(np.float32) / 128. - 1. - return data - - -class RecResizeImg(object): - def __init__(self, - image_shape, - infer_mode=False, - character_dict_path='./ppocr/utils/ppocr_keys_v1.txt', - padding=True, - **kwargs): - self.image_shape = image_shape - self.infer_mode = infer_mode - self.character_dict_path = character_dict_path - self.padding = padding - - def __call__(self, data): - img = data['image'] - if self.infer_mode and self.character_dict_path is not None: - norm_img, valid_ratio = resize_norm_img_chinese(img, - self.image_shape) - else: - norm_img, valid_ratio = resize_norm_img(img, self.image_shape, - self.padding) - data['image'] = norm_img - data['valid_ratio'] = valid_ratio - return data - - -class SRNRecResizeImg(object): - def __init__(self, image_shape, num_heads, max_text_length, **kwargs): - self.image_shape = image_shape - self.num_heads = num_heads - self.max_text_length = max_text_length - - def __call__(self, data): - img = data['image'] - norm_img = resize_norm_img_srn(img, self.image_shape) - data['image'] = norm_img - [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \ - srn_other_inputs(self.image_shape, self.num_heads, self.max_text_length) - - data['encoder_word_pos'] = encoder_word_pos - data['gsrm_word_pos'] = gsrm_word_pos - data['gsrm_slf_attn_bias1'] = gsrm_slf_attn_bias1 - data['gsrm_slf_attn_bias2'] = gsrm_slf_attn_bias2 - return data - - -class SARRecResizeImg(object): - def __init__(self, image_shape, width_downsample_ratio=0.25, **kwargs): - self.image_shape = image_shape - self.width_downsample_ratio = width_downsample_ratio - - def __call__(self, data): - img = data['image'] - norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar( - img, self.image_shape, self.width_downsample_ratio) - data['image'] = norm_img - data['resized_shape'] = resize_shape - data['pad_shape'] = pad_shape - data['valid_ratio'] = valid_ratio - return data - - -class PRENResizeImg(object): - def __init__(self, image_shape, **kwargs): - """ - Accroding to original paper's realization, it's a hard resize method here. - So maybe you should optimize it to fit for your task better. - """ - self.dst_h, self.dst_w = image_shape - - def __call__(self, data): - img = data['image'] - resized_img = cv2.resize( - img, (self.dst_w, self.dst_h), interpolation=cv2.INTER_LINEAR) - resized_img = resized_img.transpose((2, 0, 1)) / 255 - resized_img -= 0.5 - resized_img /= 0.5 - data['image'] = resized_img.astype(np.float32) - return data - - -def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25): - imgC, imgH, imgW_min, imgW_max = image_shape - h = img.shape[0] - w = img.shape[1] - valid_ratio = 1.0 - # make sure new_width is an integral multiple of width_divisor. - width_divisor = int(1 / width_downsample_ratio) - # resize - ratio = w / float(h) - resize_w = math.ceil(imgH * ratio) - if resize_w % width_divisor != 0: - resize_w = round(resize_w / width_divisor) * width_divisor - if imgW_min is not None: - resize_w = max(imgW_min, resize_w) - if imgW_max is not None: - valid_ratio = min(1.0, 1.0 * resize_w / imgW_max) - resize_w = min(imgW_max, resize_w) - resized_image = cv2.resize(img, (resize_w, imgH)) - resized_image = resized_image.astype('float32') - # norm - if image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - resize_shape = resized_image.shape - padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32) - padding_im[:, :, 0:resize_w] = resized_image - pad_shape = padding_im.shape - - return padding_im, resize_shape, pad_shape, valid_ratio - - -def resize_norm_img(img, image_shape, padding=True): - imgC, imgH, imgW = image_shape - h = img.shape[0] - w = img.shape[1] - if not padding: - resized_image = cv2.resize( - img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) - resized_w = imgW - else: - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - if image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - valid_ratio = min(1.0, float(resized_w / imgW)) - return padding_im, valid_ratio - - -def resize_norm_img_chinese(img, image_shape): - imgC, imgH, imgW = image_shape - # todo: change to 0 and modified image shape - max_wh_ratio = imgW * 1.0 / imgH - h, w = img.shape[0], img.shape[1] - ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, ratio) - imgW = int(imgH * max_wh_ratio) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - if image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - valid_ratio = min(1.0, float(resized_w / imgW)) - return padding_im, valid_ratio - - -def resize_norm_img_srn(img, image_shape): - imgC, imgH, imgW = image_shape - - img_black = np.zeros((imgH, imgW)) - im_hei = img.shape[0] - im_wid = img.shape[1] - - if im_wid <= im_hei * 1: - img_new = cv2.resize(img, (imgH * 1, imgH)) - elif im_wid <= im_hei * 2: - img_new = cv2.resize(img, (imgH * 2, imgH)) - elif im_wid <= im_hei * 3: - img_new = cv2.resize(img, (imgH * 3, imgH)) - else: - img_new = cv2.resize(img, (imgW, imgH)) - - img_np = np.asarray(img_new) - img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) - img_black[:, 0:img_np.shape[1]] = img_np - img_black = img_black[:, :, np.newaxis] - - row, col, c = img_black.shape - c = 1 - - return np.reshape(img_black, (c, row, col)).astype(np.float32) - - -def srn_other_inputs(image_shape, num_heads, max_text_length): - - imgC, imgH, imgW = image_shape - feature_dim = int((imgH / 8) * (imgW / 8)) - - encoder_word_pos = np.array(range(0, feature_dim)).reshape( - (feature_dim, 1)).astype('int64') - gsrm_word_pos = np.array(range(0, max_text_length)).reshape( - (max_text_length, 1)).astype('int64') - - gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) - gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape( - [1, max_text_length, max_text_length]) - gsrm_slf_attn_bias1 = np.tile(gsrm_slf_attn_bias1, - [num_heads, 1, 1]) * [-1e9] - - gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape( - [1, max_text_length, max_text_length]) - gsrm_slf_attn_bias2 = np.tile(gsrm_slf_attn_bias2, - [num_heads, 1, 1]) * [-1e9] - - return [ - encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2 - ] - - -def flag(): - """ - flag - """ - return 1 if random.random() > 0.5000001 else -1 - - -def cvtColor(img): - """ - cvtColor - """ - hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - delta = 0.001 * random.random() * flag() - hsv[:, :, 2] = hsv[:, :, 2] * (1 + delta) - new_img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) - return new_img - - -def blur(img): - """ - blur - """ - h, w, _ = img.shape - if h > 10 and w > 10: - return cv2.GaussianBlur(img, (5, 5), 1) - else: - return img - - -def jitter(img): - """ - jitter - """ - w, h, _ = img.shape - if h > 10 and w > 10: - thres = min(w, h) - s = int(random.random() * thres * 0.01) - src_img = img.copy() - for i in range(s): - img[i:, i:, :] = src_img[:w - i, :h - i, :] - return img - else: - return img - - -def add_gasuss_noise(image, mean=0, var=0.1): - """ - Gasuss noise - """ - - noise = np.random.normal(mean, var**0.5, image.shape) - out = image + 0.5 * noise - out = np.clip(out, 0, 255) - out = np.uint8(out) - return out - - -def get_crop(image): - """ - random crop - """ - h, w, _ = image.shape - top_min = 1 - top_max = 8 - top_crop = int(random.randint(top_min, top_max)) - top_crop = min(top_crop, h - 1) - crop_img = image.copy() - ratio = random.randint(0, 1) - if ratio: - crop_img = crop_img[top_crop:h, :, :] - else: - crop_img = crop_img[0:h - top_crop, :, :] - return crop_img - - -class Config: - """ - Config - """ - - def __init__(self, use_tia): - self.anglex = random.random() * 30 - self.angley = random.random() * 15 - self.anglez = random.random() * 10 - self.fov = 42 - self.r = 0 - self.shearx = random.random() * 0.3 - self.sheary = random.random() * 0.05 - self.borderMode = cv2.BORDER_REPLICATE - self.use_tia = use_tia - - def make(self, w, h, ang): - """ - make - """ - self.anglex = random.random() * 5 * flag() - self.angley = random.random() * 5 * flag() - self.anglez = -1 * random.random() * int(ang) * flag() - self.fov = 42 - self.r = 0 - self.shearx = 0 - self.sheary = 0 - self.borderMode = cv2.BORDER_REPLICATE - self.w = w - self.h = h - - self.perspective = self.use_tia - self.stretch = self.use_tia - self.distort = self.use_tia - - self.crop = True - self.affine = False - self.reverse = True - self.noise = True - self.jitter = True - self.blur = True - self.color = True - - -def rad(x): - """ - rad - """ - return x * np.pi / 180 - - -def get_warpR(config): - """ - get_warpR - """ - anglex, angley, anglez, fov, w, h, r = \ - config.anglex, config.angley, config.anglez, config.fov, config.w, config.h, config.r - if w > 69 and w < 112: - anglex = anglex * 1.5 - - z = np.sqrt(w**2 + h**2) / 2 / np.tan(rad(fov / 2)) - # Homogeneous coordinate transformation matrix - rx = np.array([[1, 0, 0, 0], - [0, np.cos(rad(anglex)), -np.sin(rad(anglex)), 0], [ - 0, - -np.sin(rad(anglex)), - np.cos(rad(anglex)), - 0, - ], [0, 0, 0, 1]], np.float32) - ry = np.array([[np.cos(rad(angley)), 0, np.sin(rad(angley)), 0], - [0, 1, 0, 0], [ - -np.sin(rad(angley)), - 0, - np.cos(rad(angley)), - 0, - ], [0, 0, 0, 1]], np.float32) - rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0, 0], - [-np.sin(rad(anglez)), np.cos(rad(anglez)), 0, 0], - [0, 0, 1, 0], [0, 0, 0, 1]], np.float32) - r = rx.dot(ry).dot(rz) - # generate 4 points - pcenter = np.array([h / 2, w / 2, 0, 0], np.float32) - p1 = np.array([0, 0, 0, 0], np.float32) - pcenter - p2 = np.array([w, 0, 0, 0], np.float32) - pcenter - p3 = np.array([0, h, 0, 0], np.float32) - pcenter - p4 = np.array([w, h, 0, 0], np.float32) - pcenter - dst1 = r.dot(p1) - dst2 = r.dot(p2) - dst3 = r.dot(p3) - dst4 = r.dot(p4) - list_dst = np.array([dst1, dst2, dst3, dst4]) - org = np.array([[0, 0], [w, 0], [0, h], [w, h]], np.float32) - dst = np.zeros((4, 2), np.float32) - # Project onto the image plane - dst[:, 0] = list_dst[:, 0] * z / (z - list_dst[:, 2]) + pcenter[0] - dst[:, 1] = list_dst[:, 1] * z / (z - list_dst[:, 2]) + pcenter[1] - - warpR = cv2.getPerspectiveTransform(org, dst) - - dst1, dst2, dst3, dst4 = dst - r1 = int(min(dst1[1], dst2[1])) - r2 = int(max(dst3[1], dst4[1])) - c1 = int(min(dst1[0], dst3[0])) - c2 = int(max(dst2[0], dst4[0])) - - try: - ratio = min(1.0 * h / (r2 - r1), 1.0 * w / (c2 - c1)) - - dx = -c1 - dy = -r1 - T1 = np.float32([[1., 0, dx], [0, 1., dy], [0, 0, 1.0 / ratio]]) - ret = T1.dot(warpR) - except: - ratio = 1.0 - T1 = np.float32([[1., 0, 0], [0, 1., 0], [0, 0, 1.]]) - ret = T1 - return ret, (-r1, -c1), ratio, dst - - -def get_warpAffine(config): - """ - get_warpAffine - """ - anglez = config.anglez - rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0], - [-np.sin(rad(anglez)), np.cos(rad(anglez)), 0]], np.float32) - return rz - - -def warp(img, ang, use_tia=True, prob=0.4): - """ - warp - """ - h, w, _ = img.shape - config = Config(use_tia=use_tia) - config.make(w, h, ang) - new_img = img - - if config.distort: - img_height, img_width = img.shape[0:2] - if random.random() <= prob and img_height >= 20 and img_width >= 20: - new_img = tia_distort(new_img, random.randint(3, 6)) - - if config.stretch: - img_height, img_width = img.shape[0:2] - if random.random() <= prob and img_height >= 20 and img_width >= 20: - new_img = tia_stretch(new_img, random.randint(3, 6)) - - if config.perspective: - if random.random() <= prob: - new_img = tia_perspective(new_img) - - if config.crop: - img_height, img_width = img.shape[0:2] - if random.random() <= prob and img_height >= 20 and img_width >= 20: - new_img = get_crop(new_img) - - if config.blur: - if random.random() <= prob: - new_img = blur(new_img) - if config.color: - if random.random() <= prob: - new_img = cvtColor(new_img) - if config.jitter: - new_img = jitter(new_img) - if config.noise: - if random.random() <= prob: - new_img = add_gasuss_noise(new_img) - if config.reverse: - if random.random() <= prob: - new_img = 255 - new_img - return new_img diff --git a/backend/ppocr/data/imaug/sast_process.py b/backend/ppocr/data/imaug/sast_process.py deleted file mode 100644 index 08d03b1..0000000 --- a/backend/ppocr/data/imaug/sast_process.py +++ /dev/null @@ -1,777 +0,0 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -""" -This part code is refered from: -https://github.com/songdejia/EAST/blob/master/data_utils.py -""" -import math -import cv2 -import numpy as np -import json -import sys -import os - -__all__ = ['SASTProcessTrain'] - - -class SASTProcessTrain(object): - def __init__(self, - image_shape=[512, 512], - min_crop_size=24, - min_crop_side_ratio=0.3, - min_text_size=10, - max_text_size=512, - **kwargs): - self.input_size = image_shape[1] - self.min_crop_size = min_crop_size - self.min_crop_side_ratio = min_crop_side_ratio - self.min_text_size = min_text_size - self.max_text_size = max_text_size - - def quad_area(self, poly): - """ - compute area of a polygon - :param poly: - :return: - """ - edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), - (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), - (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), - (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])] - return np.sum(edge) / 2. - - def gen_quad_from_poly(self, poly): - """ - Generate min area quad from poly. - """ - point_num = poly.shape[0] - min_area_quad = np.zeros((4, 2), dtype=np.float32) - if True: - rect = cv2.minAreaRect(poly.astype( - np.int32)) # (center (x,y), (width, height), angle of rotation) - center_point = rect[0] - box = np.array(cv2.boxPoints(rect)) - - first_point_idx = 0 - min_dist = 1e4 - for i in range(4): - dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \ - np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \ - np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \ - np.linalg.norm(box[(i + 3) % 4] - poly[-1]) - if dist < min_dist: - min_dist = dist - first_point_idx = i - for i in range(4): - min_area_quad[i] = box[(first_point_idx + i) % 4] - - return min_area_quad - - def check_and_validate_polys(self, polys, tags, xxx_todo_changeme): - """ - check so that the text poly is in the same direction, - and also filter some invalid polygons - :param polys: - :param tags: - :return: - """ - (h, w) = xxx_todo_changeme - if polys.shape[0] == 0: - return polys, np.array([]), np.array([]) - polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) - polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1) - - validated_polys = [] - validated_tags = [] - hv_tags = [] - for poly, tag in zip(polys, tags): - quad = self.gen_quad_from_poly(poly) - p_area = self.quad_area(quad) - if abs(p_area) < 1: - print('invalid poly') - continue - if p_area > 0: - if tag == False: - print('poly in wrong direction') - tag = True # reversed cases should be ignore - poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, - 1), :] - quad = quad[(0, 3, 2, 1), :] - - len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] - - quad[2]) - len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - - quad[2]) - hv_tag = 1 - - if len_w * 2.0 < len_h: - hv_tag = 0 - - validated_polys.append(poly) - validated_tags.append(tag) - hv_tags.append(hv_tag) - return np.array(validated_polys), np.array(validated_tags), np.array( - hv_tags) - - def crop_area(self, - im, - polys, - tags, - hv_tags, - crop_background=False, - max_tries=25): - """ - make random crop from the input image - :param im: - :param polys: - :param tags: - :param crop_background: - :param max_tries: 50 -> 25 - :return: - """ - h, w, _ = im.shape - pad_h = h // 10 - pad_w = w // 10 - h_array = np.zeros((h + pad_h * 2), dtype=np.int32) - w_array = np.zeros((w + pad_w * 2), dtype=np.int32) - for poly in polys: - poly = np.round(poly, decimals=0).astype(np.int32) - minx = np.min(poly[:, 0]) - maxx = np.max(poly[:, 0]) - w_array[minx + pad_w:maxx + pad_w] = 1 - miny = np.min(poly[:, 1]) - maxy = np.max(poly[:, 1]) - h_array[miny + pad_h:maxy + pad_h] = 1 - # ensure the cropped area not across a text - h_axis = np.where(h_array == 0)[0] - w_axis = np.where(w_array == 0)[0] - if len(h_axis) == 0 or len(w_axis) == 0: - return im, polys, tags, hv_tags - for i in range(max_tries): - xx = np.random.choice(w_axis, size=2) - xmin = np.min(xx) - pad_w - xmax = np.max(xx) - pad_w - xmin = np.clip(xmin, 0, w - 1) - xmax = np.clip(xmax, 0, w - 1) - yy = np.random.choice(h_axis, size=2) - ymin = np.min(yy) - pad_h - ymax = np.max(yy) - pad_h - ymin = np.clip(ymin, 0, h - 1) - ymax = np.clip(ymax, 0, h - 1) - # if xmax - xmin < ARGS.min_crop_side_ratio * w or \ - # ymax - ymin < ARGS.min_crop_side_ratio * h: - if xmax - xmin < self.min_crop_size or \ - ymax - ymin < self.min_crop_size: - # area too small - continue - if polys.shape[0] != 0: - poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \ - & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax) - selected_polys = np.where( - np.sum(poly_axis_in_area, axis=1) == 4)[0] - else: - selected_polys = [] - if len(selected_polys) == 0: - # no text in this area - if crop_background: - return im[ymin : ymax + 1, xmin : xmax + 1, :], \ - polys[selected_polys], tags[selected_polys], hv_tags[selected_polys] - else: - continue - im = im[ymin:ymax + 1, xmin:xmax + 1, :] - polys = polys[selected_polys] - tags = tags[selected_polys] - hv_tags = hv_tags[selected_polys] - polys[:, :, 0] -= xmin - polys[:, :, 1] -= ymin - return im, polys, tags, hv_tags - - return im, polys, tags, hv_tags - - def generate_direction_map(self, poly_quads, direction_map): - """ - """ - width_list = [] - height_list = [] - for quad in poly_quads: - quad_w = (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) / 2.0 - quad_h = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[2] - quad[1])) / 2.0 - width_list.append(quad_w) - height_list.append(quad_h) - norm_width = max(sum(width_list) / (len(width_list) + 1e-6), 1.0) - average_height = max(sum(height_list) / (len(height_list) + 1e-6), 1.0) - - for quad in poly_quads: - direct_vector_full = ( - (quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0 - direct_vector = direct_vector_full / ( - np.linalg.norm(direct_vector_full) + 1e-6) * norm_width - direction_label = tuple( - map(float, [ - direct_vector[0], direct_vector[1], 1.0 / (average_height + - 1e-6) - ])) - cv2.fillPoly(direction_map, - quad.round().astype(np.int32)[np.newaxis, :, :], - direction_label) - return direction_map - - def calculate_average_height(self, poly_quads): - """ - """ - height_list = [] - for quad in poly_quads: - quad_h = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[2] - quad[1])) / 2.0 - height_list.append(quad_h) - average_height = max(sum(height_list) / len(height_list), 1.0) - return average_height - - def generate_tcl_label(self, - hw, - polys, - tags, - ds_ratio, - tcl_ratio=0.3, - shrink_ratio_of_width=0.15): - """ - Generate polygon. - """ - h, w = hw - h, w = int(h * ds_ratio), int(w * ds_ratio) - polys = polys * ds_ratio - - score_map = np.zeros( - ( - h, - w, ), dtype=np.float32) - tbo_map = np.zeros((h, w, 5), dtype=np.float32) - training_mask = np.ones( - ( - h, - w, ), dtype=np.float32) - direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape( - [1, 1, 3]).astype(np.float32) - - for poly_idx, poly_tag in enumerate(zip(polys, tags)): - poly = poly_tag[0] - tag = poly_tag[1] - - # generate min_area_quad - min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) - min_area_quad_h = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + - np.linalg.norm(min_area_quad[1] - min_area_quad[2])) - min_area_quad_w = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + - np.linalg.norm(min_area_quad[2] - min_area_quad[3])) - - if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \ - or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio: - continue - - if tag: - # continue - cv2.fillPoly(training_mask, - poly.astype(np.int32)[np.newaxis, :, :], 0.15) - else: - tcl_poly = self.poly2tcl(poly, tcl_ratio) - tcl_quads = self.poly2quads(tcl_poly) - poly_quads = self.poly2quads(poly) - # stcl map - stcl_quads, quad_index = self.shrink_poly_along_width( - tcl_quads, - shrink_ratio_of_width=shrink_ratio_of_width, - expand_height_ratio=1.0 / tcl_ratio) - # generate tcl map - cv2.fillPoly(score_map, - np.round(stcl_quads).astype(np.int32), 1.0) - - # generate tbo map - for idx, quad in enumerate(stcl_quads): - quad_mask = np.zeros((h, w), dtype=np.float32) - quad_mask = cv2.fillPoly( - quad_mask, - np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0) - tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]], - quad_mask, tbo_map) - return score_map, tbo_map, training_mask - - def generate_tvo_and_tco(self, - hw, - polys, - tags, - tcl_ratio=0.3, - ds_ratio=0.25): - """ - Generate tcl map, tvo map and tbo map. - """ - h, w = hw - h, w = int(h * ds_ratio), int(w * ds_ratio) - polys = polys * ds_ratio - poly_mask = np.zeros((h, w), dtype=np.float32) - - tvo_map = np.ones((9, h, w), dtype=np.float32) - tvo_map[0:-1:2] = np.tile(np.arange(0, w), (h, 1)) - tvo_map[1:-1:2] = np.tile(np.arange(0, w), (h, 1)).T - poly_tv_xy_map = np.zeros((8, h, w), dtype=np.float32) - - # tco map - tco_map = np.ones((3, h, w), dtype=np.float32) - tco_map[0] = np.tile(np.arange(0, w), (h, 1)) - tco_map[1] = np.tile(np.arange(0, w), (h, 1)).T - poly_tc_xy_map = np.zeros((2, h, w), dtype=np.float32) - - poly_short_edge_map = np.ones((h, w), dtype=np.float32) - - for poly, poly_tag in zip(polys, tags): - - if poly_tag == True: - continue - - # adjust point order for vertical poly - poly = self.adjust_point(poly) - - # generate min_area_quad - min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly) - min_area_quad_h = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[3]) + - np.linalg.norm(min_area_quad[1] - min_area_quad[2])) - min_area_quad_w = 0.5 * ( - np.linalg.norm(min_area_quad[0] - min_area_quad[1]) + - np.linalg.norm(min_area_quad[2] - min_area_quad[3])) - - # generate tcl map and text, 128 * 128 - tcl_poly = self.poly2tcl(poly, tcl_ratio) - - # generate poly_tv_xy_map - for idx in range(4): - cv2.fillPoly( - poly_tv_xy_map[2 * idx], - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(min(max(min_area_quad[idx, 0], 0), w))) - cv2.fillPoly( - poly_tv_xy_map[2 * idx + 1], - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(min(max(min_area_quad[idx, 1], 0), h))) - - # generate poly_tc_xy_map - for idx in range(2): - cv2.fillPoly( - poly_tc_xy_map[idx], - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(center_point[idx])) - - # generate poly_short_edge_map - cv2.fillPoly( - poly_short_edge_map, - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - float(max(min(min_area_quad_h, min_area_quad_w), 1.0))) - - # generate poly_mask and training_mask - cv2.fillPoly(poly_mask, - np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), - 1) - - tvo_map *= poly_mask - tvo_map[:8] -= poly_tv_xy_map - tvo_map[-1] /= poly_short_edge_map - tvo_map = tvo_map.transpose((1, 2, 0)) - - tco_map *= poly_mask - tco_map[:2] -= poly_tc_xy_map - tco_map[-1] /= poly_short_edge_map - tco_map = tco_map.transpose((1, 2, 0)) - - return tvo_map, tco_map - - def adjust_point(self, poly): - """ - adjust point order. - """ - point_num = poly.shape[0] - if point_num == 4: - len_1 = np.linalg.norm(poly[0] - poly[1]) - len_2 = np.linalg.norm(poly[1] - poly[2]) - len_3 = np.linalg.norm(poly[2] - poly[3]) - len_4 = np.linalg.norm(poly[3] - poly[0]) - - if (len_1 + len_3) * 1.5 < (len_2 + len_4): - poly = poly[[1, 2, 3, 0], :] - - elif point_num > 4: - vector_1 = poly[0] - poly[1] - vector_2 = poly[1] - poly[2] - cos_theta = np.dot(vector_1, vector_2) / ( - np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6) - theta = np.arccos(np.round(cos_theta, decimals=4)) - - if abs(theta) > (70 / 180 * math.pi): - index = list(range(1, point_num)) + [0] - poly = poly[np.array(index), :] - return poly - - def gen_min_area_quad_from_poly(self, poly): - """ - Generate min area quad from poly. - """ - point_num = poly.shape[0] - min_area_quad = np.zeros((4, 2), dtype=np.float32) - if point_num == 4: - min_area_quad = poly - center_point = np.sum(poly, axis=0) / 4 - else: - rect = cv2.minAreaRect(poly.astype( - np.int32)) # (center (x,y), (width, height), angle of rotation) - center_point = rect[0] - box = np.array(cv2.boxPoints(rect)) - - first_point_idx = 0 - min_dist = 1e4 - for i in range(4): - dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \ - np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \ - np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \ - np.linalg.norm(box[(i + 3) % 4] - poly[-1]) - if dist < min_dist: - min_dist = dist - first_point_idx = i - - for i in range(4): - min_area_quad[i] = box[(first_point_idx + i) % 4] - - return min_area_quad, center_point - - def shrink_quad_along_width(self, - quad, - begin_width_ratio=0., - end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - def shrink_poly_along_width(self, - quads, - shrink_ratio_of_width, - expand_height_ratio=1.0): - """ - shrink poly with given length. - """ - upper_edge_list = [] - - def get_cut_info(edge_len_list, cut_len): - for idx, edge_len in enumerate(edge_len_list): - cut_len -= edge_len - if cut_len <= 0.000001: - ratio = (cut_len + edge_len_list[idx]) / edge_len_list[idx] - return idx, ratio - - for quad in quads: - upper_edge_len = np.linalg.norm(quad[0] - quad[1]) - upper_edge_list.append(upper_edge_len) - - # length of left edge and right edge. - left_length = np.linalg.norm(quads[0][0] - quads[0][ - 3]) * expand_height_ratio - right_length = np.linalg.norm(quads[-1][1] - quads[-1][ - 2]) * expand_height_ratio - - shrink_length = min(left_length, right_length, - sum(upper_edge_list)) * shrink_ratio_of_width - # shrinking length - upper_len_left = shrink_length - upper_len_right = sum(upper_edge_list) - shrink_length - - left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left) - left_quad = self.shrink_quad_along_width( - quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1) - right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right) - right_quad = self.shrink_quad_along_width( - quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio) - - out_quad_list = [] - if left_idx == right_idx: - out_quad_list.append( - [left_quad[0], right_quad[1], right_quad[2], left_quad[3]]) - else: - out_quad_list.append(left_quad) - for idx in range(left_idx + 1, right_idx): - out_quad_list.append(quads[idx]) - out_quad_list.append(right_quad) - - return np.array(out_quad_list), list(range(left_idx, right_idx + 1)) - - def vector_angle(self, A, B): - """ - Calculate the angle between vector AB and x-axis positive direction. - """ - AB = np.array([B[1] - A[1], B[0] - A[0]]) - return np.arctan2(*AB) - - def theta_line_cross_point(self, theta, point): - """ - Calculate the line through given point and angle in ax + by + c =0 form. - """ - x, y = point - cos = np.cos(theta) - sin = np.sin(theta) - return [sin, -cos, cos * y - sin * x] - - def line_cross_two_point(self, A, B): - """ - Calculate the line through given point A and B in ax + by + c =0 form. - """ - angle = self.vector_angle(A, B) - return self.theta_line_cross_point(angle, A) - - def average_angle(self, poly): - """ - Calculate the average angle between left and right edge in given poly. - """ - p0, p1, p2, p3 = poly - angle30 = self.vector_angle(p3, p0) - angle21 = self.vector_angle(p2, p1) - return (angle30 + angle21) / 2 - - def line_cross_point(self, line1, line2): - """ - line1 and line2 in 0=ax+by+c form, compute the cross point of line1 and line2 - """ - a1, b1, c1 = line1 - a2, b2, c2 = line2 - d = a1 * b2 - a2 * b1 - - if d == 0: - #print("line1", line1) - #print("line2", line2) - print('Cross point does not exist') - return np.array([0, 0], dtype=np.float32) - else: - x = (b1 * c2 - b2 * c1) / d - y = (a2 * c1 - a1 * c2) / d - - return np.array([x, y], dtype=np.float32) - - def quad2tcl(self, poly, ratio): - """ - Generate center line by poly clock-wise point. (4, 2) - """ - ratio_pair = np.array( - [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) - p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair - p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair - return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]]) - - def poly2tcl(self, poly, ratio): - """ - Generate center line by poly clock-wise point. - """ - ratio_pair = np.array( - [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32) - tcl_poly = np.zeros_like(poly) - point_num = poly.shape[0] - - for idx in range(point_num // 2): - point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx] - ) * ratio_pair - tcl_poly[idx] = point_pair[0] - tcl_poly[point_num - 1 - idx] = point_pair[1] - return tcl_poly - - def gen_quad_tbo(self, quad, tcl_mask, tbo_map): - """ - Generate tbo_map for give quad. - """ - # upper and lower line function: ax + by + c = 0; - up_line = self.line_cross_two_point(quad[0], quad[1]) - lower_line = self.line_cross_two_point(quad[3], quad[2]) - - quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[1] - quad[2])) - quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) - - # average angle of left and right line. - angle = self.average_angle(quad) - - xy_in_poly = np.argwhere(tcl_mask == 1) - for y, x in xy_in_poly: - point = (x, y) - line = self.theta_line_cross_point(angle, point) - cross_point_upper = self.line_cross_point(up_line, line) - cross_point_lower = self.line_cross_point(lower_line, line) - ##FIX, offset reverse - upper_offset_x, upper_offset_y = cross_point_upper - point - lower_offset_x, lower_offset_y = cross_point_lower - point - tbo_map[y, x, 0] = upper_offset_y - tbo_map[y, x, 1] = upper_offset_x - tbo_map[y, x, 2] = lower_offset_y - tbo_map[y, x, 3] = lower_offset_x - tbo_map[y, x, 4] = 1.0 / max(min(quad_h, quad_w), 1.0) * 2 - return tbo_map - - def poly2quads(self, poly): - """ - Split poly into quads. - """ - quad_list = [] - point_num = poly.shape[0] - - # point pair - point_pair_list = [] - for idx in range(point_num // 2): - point_pair = [poly[idx], poly[point_num - 1 - idx]] - point_pair_list.append(point_pair) - - quad_num = point_num // 2 - 1 - for idx in range(quad_num): - # reshape and adjust to clock-wise - quad_list.append((np.array(point_pair_list)[[idx, idx + 1]] - ).reshape(4, 2)[[0, 2, 3, 1]]) - - return np.array(quad_list) - - def __call__(self, data): - im = data['image'] - text_polys = data['polys'] - text_tags = data['ignore_tags'] - if im is None: - return None - if text_polys.shape[0] == 0: - return None - - h, w, _ = im.shape - text_polys, text_tags, hv_tags = self.check_and_validate_polys( - text_polys, text_tags, (h, w)) - - if text_polys.shape[0] == 0: - return None - - #set aspect ratio and keep area fix - asp_scales = np.arange(1.0, 1.55, 0.1) - asp_scale = np.random.choice(asp_scales) - - if np.random.rand() < 0.5: - asp_scale = 1.0 / asp_scale - asp_scale = math.sqrt(asp_scale) - - asp_wx = asp_scale - asp_hy = 1.0 / asp_scale - im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy) - text_polys[:, :, 0] *= asp_wx - text_polys[:, :, 1] *= asp_hy - - h, w, _ = im.shape - if max(h, w) > 2048: - rd_scale = 2048.0 / max(h, w) - im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale) - text_polys *= rd_scale - h, w, _ = im.shape - if min(h, w) < 16: - return None - - #no background - im, text_polys, text_tags, hv_tags = self.crop_area(im, \ - text_polys, text_tags, hv_tags, crop_background=False) - - if text_polys.shape[0] == 0: - return None - #continue for all ignore case - if np.sum((text_tags * 1.0)) >= text_tags.size: - return None - new_h, new_w, _ = im.shape - if (new_h is None) or (new_w is None): - return None - #resize image - std_ratio = float(self.input_size) / max(new_w, new_h) - rand_scales = np.array( - [0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0]) - rz_scale = std_ratio * np.random.choice(rand_scales) - im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale) - text_polys[:, :, 0] *= rz_scale - text_polys[:, :, 1] *= rz_scale - - #add gaussian blur - if np.random.rand() < 0.1 * 0.5: - ks = np.random.permutation(5)[0] + 1 - ks = int(ks / 2) * 2 + 1 - im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0) - #add brighter - if np.random.rand() < 0.1 * 0.5: - im = im * (1.0 + np.random.rand() * 0.5) - im = np.clip(im, 0.0, 255.0) - #add darker - if np.random.rand() < 0.1 * 0.5: - im = im * (1.0 - np.random.rand() * 0.5) - im = np.clip(im, 0.0, 255.0) - - # Padding the im to [input_size, input_size] - new_h, new_w, _ = im.shape - if min(new_w, new_h) < self.input_size * 0.5: - return None - - im_padded = np.ones( - (self.input_size, self.input_size, 3), dtype=np.float32) - im_padded[:, :, 2] = 0.485 * 255 - im_padded[:, :, 1] = 0.456 * 255 - im_padded[:, :, 0] = 0.406 * 255 - - # Random the start position - del_h = self.input_size - new_h - del_w = self.input_size - new_w - sh, sw = 0, 0 - if del_h > 1: - sh = int(np.random.rand() * del_h) - if del_w > 1: - sw = int(np.random.rand() * del_w) - - # Padding - im_padded[sh:sh + new_h, sw:sw + new_w, :] = im.copy() - text_polys[:, :, 0] += sw - text_polys[:, :, 1] += sh - - score_map, border_map, training_mask = self.generate_tcl_label( - (self.input_size, self.input_size), text_polys, text_tags, 0.25) - - # SAST head - tvo_map, tco_map = self.generate_tvo_and_tco( - (self.input_size, self.input_size), - text_polys, - text_tags, - tcl_ratio=0.3, - ds_ratio=0.25) - # print("test--------tvo_map shape:", tvo_map.shape) - - im_padded[:, :, 2] -= 0.485 * 255 - im_padded[:, :, 1] -= 0.456 * 255 - im_padded[:, :, 0] -= 0.406 * 255 - im_padded[:, :, 2] /= (255.0 * 0.229) - im_padded[:, :, 1] /= (255.0 * 0.224) - im_padded[:, :, 0] /= (255.0 * 0.225) - im_padded = im_padded.transpose((2, 0, 1)) - - data['image'] = im_padded[::-1, :, :] - data['score_map'] = score_map[np.newaxis, :, :] - data['border_map'] = border_map.transpose((2, 0, 1)) - data['training_mask'] = training_mask[np.newaxis, :, :] - data['tvo_map'] = tvo_map.transpose((2, 0, 1)) - data['tco_map'] = tco_map.transpose((2, 0, 1)) - return data diff --git a/backend/ppocr/data/imaug/ssl_img_aug.py b/backend/ppocr/data/imaug/ssl_img_aug.py deleted file mode 100644 index f9ed6ac..0000000 --- a/backend/ppocr/data/imaug/ssl_img_aug.py +++ /dev/null @@ -1,60 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import cv2 -import numpy as np -import random -from PIL import Image - -from .rec_img_aug import resize_norm_img - - -class SSLRotateResize(object): - def __init__(self, - image_shape, - padding=False, - select_all=True, - mode="train", - **kwargs): - self.image_shape = image_shape - self.padding = padding - self.select_all = select_all - self.mode = mode - - def __call__(self, data): - img = data["image"] - - data["image_r90"] = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) - data["image_r180"] = cv2.rotate(data["image_r90"], - cv2.ROTATE_90_CLOCKWISE) - data["image_r270"] = cv2.rotate(data["image_r180"], - cv2.ROTATE_90_CLOCKWISE) - - images = [] - for key in ["image", "image_r90", "image_r180", "image_r270"]: - images.append( - resize_norm_img( - data.pop(key), - image_shape=self.image_shape, - padding=self.padding)[0]) - data["image"] = np.stack(images, axis=0) - data["label"] = np.array(list(range(4))) - if not self.select_all: - data["image"] = data["image"][0::2] # just choose 0 and 180 - data["label"] = data["label"][0:2] # label needs to be continuous - if self.mode == "test": - data["image"] = data["image"][0] - data["label"] = data["label"][0] - return data diff --git a/backend/ppocr/data/imaug/text_image_aug/__init__.py b/backend/ppocr/data/imaug/text_image_aug/__init__.py deleted file mode 100644 index bca2626..0000000 --- a/backend/ppocr/data/imaug/text_image_aug/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .augment import tia_perspective, tia_distort, tia_stretch - -__all__ = ['tia_distort', 'tia_stretch', 'tia_perspective'] diff --git a/backend/ppocr/data/imaug/text_image_aug/augment.py b/backend/ppocr/data/imaug/text_image_aug/augment.py deleted file mode 100644 index 2d15dd5..0000000 --- a/backend/ppocr/data/imaug/text_image_aug/augment.py +++ /dev/null @@ -1,120 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/augment.py -""" - -import numpy as np -from .warp_mls import WarpMLS - - -def tia_distort(src, segment=4): - img_h, img_w = src.shape[:2] - - cut = img_w // segment - thresh = cut // 3 - - src_pts = list() - dst_pts = list() - - src_pts.append([0, 0]) - src_pts.append([img_w, 0]) - src_pts.append([img_w, img_h]) - src_pts.append([0, img_h]) - - dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)]) - dst_pts.append( - [img_w - np.random.randint(thresh), np.random.randint(thresh)]) - dst_pts.append( - [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)]) - dst_pts.append( - [np.random.randint(thresh), img_h - np.random.randint(thresh)]) - - half_thresh = thresh * 0.5 - - for cut_idx in np.arange(1, segment, 1): - src_pts.append([cut * cut_idx, 0]) - src_pts.append([cut * cut_idx, img_h]) - dst_pts.append([ - cut * cut_idx + np.random.randint(thresh) - half_thresh, - np.random.randint(thresh) - half_thresh - ]) - dst_pts.append([ - cut * cut_idx + np.random.randint(thresh) - half_thresh, - img_h + np.random.randint(thresh) - half_thresh - ]) - - trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) - dst = trans.generate() - - return dst - - -def tia_stretch(src, segment=4): - img_h, img_w = src.shape[:2] - - cut = img_w // segment - thresh = cut * 4 // 5 - - src_pts = list() - dst_pts = list() - - src_pts.append([0, 0]) - src_pts.append([img_w, 0]) - src_pts.append([img_w, img_h]) - src_pts.append([0, img_h]) - - dst_pts.append([0, 0]) - dst_pts.append([img_w, 0]) - dst_pts.append([img_w, img_h]) - dst_pts.append([0, img_h]) - - half_thresh = thresh * 0.5 - - for cut_idx in np.arange(1, segment, 1): - move = np.random.randint(thresh) - half_thresh - src_pts.append([cut * cut_idx, 0]) - src_pts.append([cut * cut_idx, img_h]) - dst_pts.append([cut * cut_idx + move, 0]) - dst_pts.append([cut * cut_idx + move, img_h]) - - trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) - dst = trans.generate() - - return dst - - -def tia_perspective(src): - img_h, img_w = src.shape[:2] - - thresh = img_h // 2 - - src_pts = list() - dst_pts = list() - - src_pts.append([0, 0]) - src_pts.append([img_w, 0]) - src_pts.append([img_w, img_h]) - src_pts.append([0, img_h]) - - dst_pts.append([0, np.random.randint(thresh)]) - dst_pts.append([img_w, np.random.randint(thresh)]) - dst_pts.append([img_w, img_h - np.random.randint(thresh)]) - dst_pts.append([0, img_h - np.random.randint(thresh)]) - - trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h) - dst = trans.generate() - - return dst \ No newline at end of file diff --git a/backend/ppocr/data/imaug/text_image_aug/warp_mls.py b/backend/ppocr/data/imaug/text_image_aug/warp_mls.py deleted file mode 100644 index 75de111..0000000 --- a/backend/ppocr/data/imaug/text_image_aug/warp_mls.py +++ /dev/null @@ -1,168 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/warp_mls.py -""" - -import numpy as np - - -class WarpMLS: - def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.): - self.src = src - self.src_pts = src_pts - self.dst_pts = dst_pts - self.pt_count = len(self.dst_pts) - self.dst_w = dst_w - self.dst_h = dst_h - self.trans_ratio = trans_ratio - self.grid_size = 100 - self.rdx = np.zeros((self.dst_h, self.dst_w)) - self.rdy = np.zeros((self.dst_h, self.dst_w)) - - @staticmethod - def __bilinear_interp(x, y, v11, v12, v21, v22): - return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 * - (1 - y) + v22 * y) * x - - def generate(self): - self.calc_delta() - return self.gen_img() - - def calc_delta(self): - w = np.zeros(self.pt_count, dtype=np.float32) - - if self.pt_count < 2: - return - - i = 0 - while 1: - if self.dst_w <= i < self.dst_w + self.grid_size - 1: - i = self.dst_w - 1 - elif i >= self.dst_w: - break - - j = 0 - while 1: - if self.dst_h <= j < self.dst_h + self.grid_size - 1: - j = self.dst_h - 1 - elif j >= self.dst_h: - break - - sw = 0 - swp = np.zeros(2, dtype=np.float32) - swq = np.zeros(2, dtype=np.float32) - new_pt = np.zeros(2, dtype=np.float32) - cur_pt = np.array([i, j], dtype=np.float32) - - k = 0 - for k in range(self.pt_count): - if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: - break - - w[k] = 1. / ( - (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0]) + - (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1])) - - sw += w[k] - swp = swp + w[k] * np.array(self.dst_pts[k]) - swq = swq + w[k] * np.array(self.src_pts[k]) - - if k == self.pt_count - 1: - pstar = 1 / sw * swp - qstar = 1 / sw * swq - - miu_s = 0 - for k in range(self.pt_count): - if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: - continue - pt_i = self.dst_pts[k] - pstar - miu_s += w[k] * np.sum(pt_i * pt_i) - - cur_pt -= pstar - cur_pt_j = np.array([-cur_pt[1], cur_pt[0]]) - - for k in range(self.pt_count): - if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]: - continue - - pt_i = self.dst_pts[k] - pstar - pt_j = np.array([-pt_i[1], pt_i[0]]) - - tmp_pt = np.zeros(2, dtype=np.float32) - tmp_pt[0] = np.sum(pt_i * cur_pt) * self.src_pts[k][0] - \ - np.sum(pt_j * cur_pt) * self.src_pts[k][1] - tmp_pt[1] = -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0] + \ - np.sum(pt_j * cur_pt_j) * self.src_pts[k][1] - tmp_pt *= (w[k] / miu_s) - new_pt += tmp_pt - - new_pt += qstar - else: - new_pt = self.src_pts[k] - - self.rdx[j, i] = new_pt[0] - i - self.rdy[j, i] = new_pt[1] - j - - j += self.grid_size - i += self.grid_size - - def gen_img(self): - src_h, src_w = self.src.shape[:2] - dst = np.zeros_like(self.src, dtype=np.float32) - - for i in np.arange(0, self.dst_h, self.grid_size): - for j in np.arange(0, self.dst_w, self.grid_size): - ni = i + self.grid_size - nj = j + self.grid_size - w = h = self.grid_size - if ni >= self.dst_h: - ni = self.dst_h - 1 - h = ni - i + 1 - if nj >= self.dst_w: - nj = self.dst_w - 1 - w = nj - j + 1 - - di = np.reshape(np.arange(h), (-1, 1)) - dj = np.reshape(np.arange(w), (1, -1)) - delta_x = self.__bilinear_interp( - di / h, dj / w, self.rdx[i, j], self.rdx[i, nj], - self.rdx[ni, j], self.rdx[ni, nj]) - delta_y = self.__bilinear_interp( - di / h, dj / w, self.rdy[i, j], self.rdy[i, nj], - self.rdy[ni, j], self.rdy[ni, nj]) - nx = j + dj + delta_x * self.trans_ratio - ny = i + di + delta_y * self.trans_ratio - nx = np.clip(nx, 0, src_w - 1) - ny = np.clip(ny, 0, src_h - 1) - nxi = np.array(np.floor(nx), dtype=np.int32) - nyi = np.array(np.floor(ny), dtype=np.int32) - nxi1 = np.array(np.ceil(nx), dtype=np.int32) - nyi1 = np.array(np.ceil(ny), dtype=np.int32) - - if len(self.src.shape) == 3: - x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3)) - y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3)) - else: - x = ny - nyi - y = nx - nxi - dst[i:i + h, j:j + w] = self.__bilinear_interp( - x, y, self.src[nyi, nxi], self.src[nyi, nxi1], - self.src[nyi1, nxi], self.src[nyi1, nxi1]) - - dst = np.clip(dst, 0, 255) - dst = np.array(dst, dtype=np.uint8) - - return dst diff --git a/backend/ppocr/data/imaug/vqa/__init__.py b/backend/ppocr/data/imaug/vqa/__init__.py deleted file mode 100644 index a5025e7..0000000 --- a/backend/ppocr/data/imaug/vqa/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .token import VQATokenPad, VQASerTokenChunk, VQAReTokenChunk, VQAReTokenRelation - -__all__ = [ - 'VQATokenPad', 'VQASerTokenChunk', 'VQAReTokenChunk', 'VQAReTokenRelation' -] diff --git a/backend/ppocr/data/imaug/vqa/token/__init__.py b/backend/ppocr/data/imaug/vqa/token/__init__.py deleted file mode 100644 index 7c11566..0000000 --- a/backend/ppocr/data/imaug/vqa/token/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .vqa_token_chunk import VQASerTokenChunk, VQAReTokenChunk -from .vqa_token_pad import VQATokenPad -from .vqa_token_relation import VQAReTokenRelation diff --git a/backend/ppocr/data/imaug/vqa/token/vqa_token_chunk.py b/backend/ppocr/data/imaug/vqa/token/vqa_token_chunk.py deleted file mode 100644 index 1fa949e..0000000 --- a/backend/ppocr/data/imaug/vqa/token/vqa_token_chunk.py +++ /dev/null @@ -1,122 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import defaultdict - - -class VQASerTokenChunk(object): - def __init__(self, max_seq_len=512, infer_mode=False, **kwargs): - self.max_seq_len = max_seq_len - self.infer_mode = infer_mode - - def __call__(self, data): - encoded_inputs_all = [] - seq_len = len(data['input_ids']) - for index in range(0, seq_len, self.max_seq_len): - chunk_beg = index - chunk_end = min(index + self.max_seq_len, seq_len) - encoded_inputs_example = {} - for key in data: - if key in [ - 'label', 'input_ids', 'labels', 'token_type_ids', - 'bbox', 'attention_mask' - ]: - if self.infer_mode and key == 'labels': - encoded_inputs_example[key] = data[key] - else: - encoded_inputs_example[key] = data[key][chunk_beg: - chunk_end] - else: - encoded_inputs_example[key] = data[key] - - encoded_inputs_all.append(encoded_inputs_example) - if len(encoded_inputs_all) == 0: - return None - return encoded_inputs_all[0] - - -class VQAReTokenChunk(object): - def __init__(self, - max_seq_len=512, - entities_labels=None, - infer_mode=False, - **kwargs): - self.max_seq_len = max_seq_len - self.entities_labels = { - 'HEADER': 0, - 'QUESTION': 1, - 'ANSWER': 2 - } if entities_labels is None else entities_labels - self.infer_mode = infer_mode - - def __call__(self, data): - # prepare data - entities = data.pop('entities') - relations = data.pop('relations') - encoded_inputs_all = [] - for index in range(0, len(data["input_ids"]), self.max_seq_len): - item = {} - for key in data: - if key in [ - 'label', 'input_ids', 'labels', 'token_type_ids', - 'bbox', 'attention_mask' - ]: - if self.infer_mode and key == 'labels': - item[key] = data[key] - else: - item[key] = data[key][index:index + self.max_seq_len] - else: - item[key] = data[key] - # select entity in current chunk - entities_in_this_span = [] - global_to_local_map = {} # - for entity_id, entity in enumerate(entities): - if (index <= entity["start"] < index + self.max_seq_len and - index <= entity["end"] < index + self.max_seq_len): - entity["start"] = entity["start"] - index - entity["end"] = entity["end"] - index - global_to_local_map[entity_id] = len(entities_in_this_span) - entities_in_this_span.append(entity) - - # select relations in current chunk - relations_in_this_span = [] - for relation in relations: - if (index <= relation["start_index"] < index + self.max_seq_len - and index <= relation["end_index"] < - index + self.max_seq_len): - relations_in_this_span.append({ - "head": global_to_local_map[relation["head"]], - "tail": global_to_local_map[relation["tail"]], - "start_index": relation["start_index"] - index, - "end_index": relation["end_index"] - index, - }) - item.update({ - "entities": self.reformat(entities_in_this_span), - "relations": self.reformat(relations_in_this_span), - }) - if len(item['entities']) > 0: - item['entities']['label'] = [ - self.entities_labels[x] for x in item['entities']['label'] - ] - encoded_inputs_all.append(item) - if len(encoded_inputs_all) == 0: - return None - return encoded_inputs_all[0] - - def reformat(self, data): - new_data = defaultdict(list) - for item in data: - for k, v in item.items(): - new_data[k].append(v) - return new_data diff --git a/backend/ppocr/data/imaug/vqa/token/vqa_token_pad.py b/backend/ppocr/data/imaug/vqa/token/vqa_token_pad.py deleted file mode 100644 index 8e5a20f..0000000 --- a/backend/ppocr/data/imaug/vqa/token/vqa_token_pad.py +++ /dev/null @@ -1,104 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -import numpy as np - - -class VQATokenPad(object): - def __init__(self, - max_seq_len=512, - pad_to_max_seq_len=True, - return_attention_mask=True, - return_token_type_ids=True, - truncation_strategy="longest_first", - return_overflowing_tokens=False, - return_special_tokens_mask=False, - infer_mode=False, - **kwargs): - self.max_seq_len = max_seq_len - self.pad_to_max_seq_len = max_seq_len - self.return_attention_mask = return_attention_mask - self.return_token_type_ids = return_token_type_ids - self.truncation_strategy = truncation_strategy - self.return_overflowing_tokens = return_overflowing_tokens - self.return_special_tokens_mask = return_special_tokens_mask - self.pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index - self.infer_mode = infer_mode - - def __call__(self, data): - needs_to_be_padded = self.pad_to_max_seq_len and len(data[ - "input_ids"]) < self.max_seq_len - - if needs_to_be_padded: - if 'tokenizer_params' in data: - tokenizer_params = data.pop('tokenizer_params') - else: - tokenizer_params = dict( - padding_side='right', pad_token_type_id=0, pad_token_id=1) - - difference = self.max_seq_len - len(data["input_ids"]) - if tokenizer_params['padding_side'] == 'right': - if self.return_attention_mask: - data["attention_mask"] = [1] * len(data[ - "input_ids"]) + [0] * difference - if self.return_token_type_ids: - data["token_type_ids"] = ( - data["token_type_ids"] + - [tokenizer_params['pad_token_type_id']] * difference) - if self.return_special_tokens_mask: - data["special_tokens_mask"] = data[ - "special_tokens_mask"] + [1] * difference - data["input_ids"] = data["input_ids"] + [ - tokenizer_params['pad_token_id'] - ] * difference - if not self.infer_mode: - data["labels"] = data[ - "labels"] + [self.pad_token_label_id] * difference - data["bbox"] = data["bbox"] + [[0, 0, 0, 0]] * difference - elif tokenizer_params['padding_side'] == 'left': - if self.return_attention_mask: - data["attention_mask"] = [0] * difference + [ - 1 - ] * len(data["input_ids"]) - if self.return_token_type_ids: - data["token_type_ids"] = ( - [tokenizer_params['pad_token_type_id']] * difference + - data["token_type_ids"]) - if self.return_special_tokens_mask: - data["special_tokens_mask"] = [ - 1 - ] * difference + data["special_tokens_mask"] - data["input_ids"] = [tokenizer_params['pad_token_id'] - ] * difference + data["input_ids"] - if not self.infer_mode: - data["labels"] = [self.pad_token_label_id - ] * difference + data["labels"] - data["bbox"] = [[0, 0, 0, 0]] * difference + data["bbox"] - else: - if self.return_attention_mask: - data["attention_mask"] = [1] * len(data["input_ids"]) - - for key in data: - if key in [ - 'input_ids', 'labels', 'token_type_ids', 'bbox', - 'attention_mask' - ]: - if self.infer_mode: - if key != 'labels': - length = min(len(data[key]), self.max_seq_len) - data[key] = data[key][:length] - else: - continue - data[key] = np.array(data[key], dtype='int64') - return data diff --git a/backend/ppocr/data/imaug/vqa/token/vqa_token_relation.py b/backend/ppocr/data/imaug/vqa/token/vqa_token_relation.py deleted file mode 100644 index 293988f..0000000 --- a/backend/ppocr/data/imaug/vqa/token/vqa_token_relation.py +++ /dev/null @@ -1,67 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class VQAReTokenRelation(object): - def __init__(self, **kwargs): - pass - - def __call__(self, data): - """ - build relations - """ - entities = data['entities'] - relations = data['relations'] - id2label = data.pop('id2label') - empty_entity = data.pop('empty_entity') - entity_id_to_index_map = data.pop('entity_id_to_index_map') - - relations = list(set(relations)) - relations = [ - rel for rel in relations - if rel[0] not in empty_entity and rel[1] not in empty_entity - ] - kv_relations = [] - for rel in relations: - pair = [id2label[rel[0]], id2label[rel[1]]] - if pair == ["question", "answer"]: - kv_relations.append({ - "head": entity_id_to_index_map[rel[0]], - "tail": entity_id_to_index_map[rel[1]] - }) - elif pair == ["answer", "question"]: - kv_relations.append({ - "head": entity_id_to_index_map[rel[1]], - "tail": entity_id_to_index_map[rel[0]] - }) - else: - continue - relations = sorted( - [{ - "head": rel["head"], - "tail": rel["tail"], - "start_index": self.get_relation_span(rel, entities)[0], - "end_index": self.get_relation_span(rel, entities)[1], - } for rel in kv_relations], - key=lambda x: x["head"], ) - - data['relations'] = relations - return data - - def get_relation_span(self, rel, entities): - bound = [] - for entity_index in [rel["head"], rel["tail"]]: - bound.append(entities[entity_index]["start"]) - bound.append(entities[entity_index]["end"]) - return min(bound), max(bound) diff --git a/backend/ppocr/data/lmdb_dataset.py b/backend/ppocr/data/lmdb_dataset.py deleted file mode 100644 index e1b4980..0000000 --- a/backend/ppocr/data/lmdb_dataset.py +++ /dev/null @@ -1,118 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import os -from paddle.io import Dataset -import lmdb -import cv2 - -from .imaug import transform, create_operators - - -class LMDBDataSet(Dataset): - def __init__(self, config, mode, logger, seed=None): - super(LMDBDataSet, self).__init__() - - global_config = config['Global'] - dataset_config = config[mode]['dataset'] - loader_config = config[mode]['loader'] - batch_size = loader_config['batch_size_per_card'] - data_dir = dataset_config['data_dir'] - self.do_shuffle = loader_config['shuffle'] - - self.lmdb_sets = self.load_hierarchical_lmdb_dataset(data_dir) - logger.info("Initialize indexs of datasets:%s" % data_dir) - self.data_idx_order_list = self.dataset_traversal() - if self.do_shuffle: - np.random.shuffle(self.data_idx_order_list) - self.ops = create_operators(dataset_config['transforms'], global_config) - - ratio_list = dataset_config.get("ratio_list", [1.0]) - self.need_reset = True in [x < 1 for x in ratio_list] - - def load_hierarchical_lmdb_dataset(self, data_dir): - lmdb_sets = {} - dataset_idx = 0 - for dirpath, dirnames, filenames in os.walk(data_dir + '/'): - if not dirnames: - env = lmdb.open( - dirpath, - max_readers=32, - readonly=True, - lock=False, - readahead=False, - meminit=False) - txn = env.begin(write=False) - num_samples = int(txn.get('num-samples'.encode())) - lmdb_sets[dataset_idx] = {"dirpath":dirpath, "env":env, \ - "txn":txn, "num_samples":num_samples} - dataset_idx += 1 - return lmdb_sets - - def dataset_traversal(self): - lmdb_num = len(self.lmdb_sets) - total_sample_num = 0 - for lno in range(lmdb_num): - total_sample_num += self.lmdb_sets[lno]['num_samples'] - data_idx_order_list = np.zeros((total_sample_num, 2)) - beg_idx = 0 - for lno in range(lmdb_num): - tmp_sample_num = self.lmdb_sets[lno]['num_samples'] - end_idx = beg_idx + tmp_sample_num - data_idx_order_list[beg_idx:end_idx, 0] = lno - data_idx_order_list[beg_idx:end_idx, 1] \ - = list(range(tmp_sample_num)) - data_idx_order_list[beg_idx:end_idx, 1] += 1 - beg_idx = beg_idx + tmp_sample_num - return data_idx_order_list - - def get_img_data(self, value): - """get_img_data""" - if not value: - return None - imgdata = np.frombuffer(value, dtype='uint8') - if imgdata is None: - return None - imgori = cv2.imdecode(imgdata, 1) - if imgori is None: - return None - return imgori - - def get_lmdb_sample_info(self, txn, index): - label_key = 'label-%09d'.encode() % index - label = txn.get(label_key) - if label is None: - return None - label = label.decode('utf-8') - img_key = 'image-%09d'.encode() % index - imgbuf = txn.get(img_key) - return imgbuf, label - - def __getitem__(self, idx): - lmdb_idx, file_idx = self.data_idx_order_list[idx] - lmdb_idx = int(lmdb_idx) - file_idx = int(file_idx) - sample_info = self.get_lmdb_sample_info(self.lmdb_sets[lmdb_idx]['txn'], - file_idx) - if sample_info is None: - return self.__getitem__(np.random.randint(self.__len__())) - img, label = sample_info - data = {'image': img, 'label': label} - outs = transform(data, self.ops) - if outs is None: - return self.__getitem__(np.random.randint(self.__len__())) - return outs - - def __len__(self): - return self.data_idx_order_list.shape[0] diff --git a/backend/ppocr/data/pgnet_dataset.py b/backend/ppocr/data/pgnet_dataset.py deleted file mode 100644 index 6f80179..0000000 --- a/backend/ppocr/data/pgnet_dataset.py +++ /dev/null @@ -1,106 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import os -from paddle.io import Dataset -from .imaug import transform, create_operators -import random - - -class PGDataSet(Dataset): - def __init__(self, config, mode, logger, seed=None): - super(PGDataSet, self).__init__() - - self.logger = logger - self.seed = seed - self.mode = mode - global_config = config['Global'] - dataset_config = config[mode]['dataset'] - loader_config = config[mode]['loader'] - - self.delimiter = dataset_config.get('delimiter', '\t') - label_file_list = dataset_config.pop('label_file_list') - data_source_num = len(label_file_list) - ratio_list = dataset_config.get("ratio_list", [1.0]) - if isinstance(ratio_list, (float, int)): - ratio_list = [float(ratio_list)] * int(data_source_num) - assert len( - ratio_list - ) == data_source_num, "The length of ratio_list should be the same as the file_list." - self.data_dir = dataset_config['data_dir'] - self.do_shuffle = loader_config['shuffle'] - - logger.info("Initialize indexs of datasets:%s" % label_file_list) - self.data_lines = self.get_image_info_list(label_file_list, ratio_list) - self.data_idx_order_list = list(range(len(self.data_lines))) - if mode.lower() == "train": - self.shuffle_data_random() - - self.ops = create_operators(dataset_config['transforms'], global_config) - - self.need_reset = True in [x < 1 for x in ratio_list] - - def shuffle_data_random(self): - if self.do_shuffle: - random.seed(self.seed) - random.shuffle(self.data_lines) - return - - def get_image_info_list(self, file_list, ratio_list): - if isinstance(file_list, str): - file_list = [file_list] - data_lines = [] - for idx, file in enumerate(file_list): - with open(file, "rb") as f: - lines = f.readlines() - if self.mode == "train" or ratio_list[idx] < 1.0: - random.seed(self.seed) - lines = random.sample(lines, - round(len(lines) * ratio_list[idx])) - data_lines.extend(lines) - return data_lines - - def __getitem__(self, idx): - file_idx = self.data_idx_order_list[idx] - data_line = self.data_lines[file_idx] - img_id = 0 - try: - data_line = data_line.decode('utf-8') - substr = data_line.strip("\n").split(self.delimiter) - file_name = substr[0] - label = substr[1] - img_path = os.path.join(self.data_dir, file_name) - if self.mode.lower() == 'eval': - try: - img_id = int(data_line.split(".")[0][7:]) - except: - img_id = 0 - data = {'img_path': img_path, 'label': label, 'img_id': img_id} - if not os.path.exists(img_path): - raise Exception("{} does not exist!".format(img_path)) - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - outs = transform(data, self.ops) - except Exception as e: - self.logger.error( - "When parsing line {}, error happened with msg: {}".format( - self.data_idx_order_list[idx], e)) - outs = None - if outs is None: - return self.__getitem__(np.random.randint(self.__len__())) - return outs - - def __len__(self): - return len(self.data_idx_order_list) diff --git a/backend/ppocr/data/pubtab_dataset.py b/backend/ppocr/data/pubtab_dataset.py deleted file mode 100644 index 671cda7..0000000 --- a/backend/ppocr/data/pubtab_dataset.py +++ /dev/null @@ -1,114 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import os -import random -from paddle.io import Dataset -import json - -from .imaug import transform, create_operators - - -class PubTabDataSet(Dataset): - def __init__(self, config, mode, logger, seed=None): - super(PubTabDataSet, self).__init__() - self.logger = logger - - global_config = config['Global'] - dataset_config = config[mode]['dataset'] - loader_config = config[mode]['loader'] - - label_file_path = dataset_config.pop('label_file_path') - - self.data_dir = dataset_config['data_dir'] - self.do_shuffle = loader_config['shuffle'] - self.do_hard_select = False - if 'hard_select' in loader_config: - self.do_hard_select = loader_config['hard_select'] - self.hard_prob = loader_config['hard_prob'] - if self.do_hard_select: - self.img_select_prob = self.load_hard_select_prob() - self.table_select_type = None - if 'table_select_type' in loader_config: - self.table_select_type = loader_config['table_select_type'] - self.table_select_prob = loader_config['table_select_prob'] - - self.seed = seed - logger.info("Initialize indexs of datasets:%s" % label_file_path) - with open(label_file_path, "rb") as f: - self.data_lines = f.readlines() - self.data_idx_order_list = list(range(len(self.data_lines))) - if mode.lower() == "train": - self.shuffle_data_random() - self.ops = create_operators(dataset_config['transforms'], global_config) - - ratio_list = dataset_config.get("ratio_list", [1.0]) - self.need_reset = True in [x < 1 for x in ratio_list] - - def shuffle_data_random(self): - if self.do_shuffle: - random.seed(self.seed) - random.shuffle(self.data_lines) - return - - def __getitem__(self, idx): - try: - data_line = self.data_lines[idx] - data_line = data_line.decode('utf-8').strip("\n") - info = json.loads(data_line) - file_name = info['filename'] - select_flag = True - if self.do_hard_select: - prob = self.img_select_prob[file_name] - if prob < random.uniform(0, 1): - select_flag = False - - if self.table_select_type: - structure = info['html']['structure']['tokens'].copy() - structure_str = ''.join(structure) - table_type = "simple" - if 'colspan' in structure_str or 'rowspan' in structure_str: - table_type = "complex" - if table_type == "complex": - if self.table_select_prob < random.uniform(0, 1): - select_flag = False - - if select_flag: - cells = info['html']['cells'].copy() - structure = info['html']['structure'].copy() - img_path = os.path.join(self.data_dir, file_name) - data = { - 'img_path': img_path, - 'cells': cells, - 'structure': structure - } - if not os.path.exists(img_path): - raise Exception("{} does not exist!".format(img_path)) - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - outs = transform(data, self.ops) - else: - outs = None - except Exception as e: - self.logger.error( - "When parsing line {}, error happened with msg: {}".format( - data_line, e)) - outs = None - if outs is None: - return self.__getitem__(np.random.randint(self.__len__())) - return outs - - def __len__(self): - return len(self.data_idx_order_list) diff --git a/backend/ppocr/data/simple_dataset.py b/backend/ppocr/data/simple_dataset.py deleted file mode 100644 index b5da9b8..0000000 --- a/backend/ppocr/data/simple_dataset.py +++ /dev/null @@ -1,151 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import os -import json -import random -import traceback -from paddle.io import Dataset -from .imaug import transform, create_operators - - -class SimpleDataSet(Dataset): - def __init__(self, config, mode, logger, seed=None): - super(SimpleDataSet, self).__init__() - self.logger = logger - self.mode = mode.lower() - - global_config = config['Global'] - dataset_config = config[mode]['dataset'] - loader_config = config[mode]['loader'] - - self.delimiter = dataset_config.get('delimiter', '\t') - label_file_list = dataset_config.pop('label_file_list') - data_source_num = len(label_file_list) - ratio_list = dataset_config.get("ratio_list", [1.0]) - if isinstance(ratio_list, (float, int)): - ratio_list = [float(ratio_list)] * int(data_source_num) - - assert len( - ratio_list - ) == data_source_num, "The length of ratio_list should be the same as the file_list." - self.data_dir = dataset_config['data_dir'] - self.do_shuffle = loader_config['shuffle'] - self.seed = seed - logger.info("Initialize indexs of datasets:%s" % label_file_list) - self.data_lines = self.get_image_info_list(label_file_list, ratio_list) - self.data_idx_order_list = list(range(len(self.data_lines))) - if self.mode == "train" and self.do_shuffle: - self.shuffle_data_random() - self.ops = create_operators(dataset_config['transforms'], global_config) - self.ext_op_transform_idx = dataset_config.get("ext_op_transform_idx", - 2) - self.need_reset = True in [x < 1 for x in ratio_list] - - def get_image_info_list(self, file_list, ratio_list): - if isinstance(file_list, str): - file_list = [file_list] - data_lines = [] - for idx, file in enumerate(file_list): - with open(file, "rb") as f: - lines = f.readlines() - if self.mode == "train" or ratio_list[idx] < 1.0: - random.seed(self.seed) - lines = random.sample(lines, - round(len(lines) * ratio_list[idx])) - data_lines.extend(lines) - return data_lines - - def shuffle_data_random(self): - random.seed(self.seed) - random.shuffle(self.data_lines) - return - - def _try_parse_filename_list(self, file_name): - # multiple images -> one gt label - if len(file_name) > 0 and file_name[0] == "[": - try: - info = json.loads(file_name) - file_name = random.choice(info) - except: - pass - return file_name - - def get_ext_data(self): - ext_data_num = 0 - for op in self.ops: - if hasattr(op, 'ext_data_num'): - ext_data_num = getattr(op, 'ext_data_num') - break - load_data_ops = self.ops[:self.ext_op_transform_idx] - ext_data = [] - - while len(ext_data) < ext_data_num: - file_idx = self.data_idx_order_list[np.random.randint(self.__len__( - ))] - data_line = self.data_lines[file_idx] - data_line = data_line.decode('utf-8') - substr = data_line.strip("\n").split(self.delimiter) - file_name = substr[0] - file_name = self._try_parse_filename_list(file_name) - label = substr[1] - img_path = os.path.join(self.data_dir, file_name) - data = {'img_path': img_path, 'label': label} - if not os.path.exists(img_path): - continue - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - data = transform(data, load_data_ops) - - if data is None: - continue - if 'polys' in data.keys(): - if data['polys'].shape[1] != 4: - continue - ext_data.append(data) - return ext_data - - def __getitem__(self, idx): - file_idx = self.data_idx_order_list[idx] - data_line = self.data_lines[file_idx] - try: - data_line = data_line.decode('utf-8') - substr = data_line.strip("\n").split(self.delimiter) - file_name = substr[0] - file_name = self._try_parse_filename_list(file_name) - label = substr[1] - img_path = os.path.join(self.data_dir, file_name) - data = {'img_path': img_path, 'label': label} - if not os.path.exists(img_path): - raise Exception("{} does not exist!".format(img_path)) - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - data['ext_data'] = self.get_ext_data() - outs = transform(data, self.ops) - except: - self.logger.error( - "When parsing line {}, error happened with msg: {}".format( - data_line, traceback.format_exc())) - outs = None - if outs is None: - # during evaluation, we should fix the idx to get same results for many times of evaluation. - rnd_idx = np.random.randint(self.__len__( - )) if self.mode == "train" else (idx + 1) % self.__len__() - return self.__getitem__(rnd_idx) - return outs - - def __len__(self): - return len(self.data_idx_order_list) diff --git a/backend/ppocr/losses/__init__.py b/backend/ppocr/losses/__init__.py deleted file mode 100755 index de8419b..0000000 --- a/backend/ppocr/losses/__init__.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import paddle -import paddle.nn as nn - -# basic_loss -from .basic_loss import LossFromOutput - -# det loss -from .det_db_loss import DBLoss -from .det_east_loss import EASTLoss -from .det_sast_loss import SASTLoss -from .det_pse_loss import PSELoss -from .det_fce_loss import FCELoss - -# rec loss -from .rec_ctc_loss import CTCLoss -from .rec_att_loss import AttentionLoss -from .rec_srn_loss import SRNLoss -from .rec_nrtr_loss import NRTRLoss -from .rec_sar_loss import SARLoss -from .rec_aster_loss import AsterLoss -from .rec_pren_loss import PRENLoss -from .rec_multi_loss import MultiLoss - -# cls loss -from .cls_loss import ClsLoss - -# e2e loss -from .e2e_pg_loss import PGLoss -from .kie_sdmgr_loss import SDMGRLoss - -# basic loss function -from .basic_loss import DistanceLoss - -# combined loss function -from .combined_loss import CombinedLoss - -# table loss -from .table_att_loss import TableAttentionLoss - -# vqa token loss -from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss - - -def build_loss(config): - support_dict = [ - 'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'FCELoss', 'CTCLoss', - 'ClsLoss', 'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss', - 'NRTRLoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss', - 'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss' - ] - config = copy.deepcopy(config) - module_name = config.pop('name') - assert module_name in support_dict, Exception('loss only support {}'.format( - support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/losses/ace_loss.py b/backend/ppocr/losses/ace_loss.py deleted file mode 100644 index 915b99e..0000000 --- a/backend/ppocr/losses/ace_loss.py +++ /dev/null @@ -1,52 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This code is refer from: https://github.com/viig99/LS-ACELoss - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.nn as nn - - -class ACELoss(nn.Layer): - def __init__(self, **kwargs): - super().__init__() - self.loss_func = nn.CrossEntropyLoss( - weight=None, - ignore_index=0, - reduction='none', - soft_label=True, - axis=-1) - - def __call__(self, predicts, batch): - if isinstance(predicts, (list, tuple)): - predicts = predicts[-1] - - B, N = predicts.shape[:2] - div = paddle.to_tensor([N]).astype('float32') - - predicts = nn.functional.softmax(predicts, axis=-1) - aggregation_preds = paddle.sum(predicts, axis=1) - aggregation_preds = paddle.divide(aggregation_preds, div) - - length = batch[2].astype("float32") - batch = batch[3].astype("float32") - batch[:, 0] = paddle.subtract(div, length) - batch = paddle.divide(batch, div) - - loss = self.loss_func(aggregation_preds, batch) - return {"loss_ace": loss} diff --git a/backend/ppocr/losses/basic_loss.py b/backend/ppocr/losses/basic_loss.py deleted file mode 100644 index 2df96ea..0000000 --- a/backend/ppocr/losses/basic_loss.py +++ /dev/null @@ -1,155 +0,0 @@ -#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F - -from paddle.nn import L1Loss -from paddle.nn import MSELoss as L2Loss -from paddle.nn import SmoothL1Loss - - -class CELoss(nn.Layer): - def __init__(self, epsilon=None): - super().__init__() - if epsilon is not None and (epsilon <= 0 or epsilon >= 1): - epsilon = None - self.epsilon = epsilon - - def _labelsmoothing(self, target, class_num): - if target.shape[-1] != class_num: - one_hot_target = F.one_hot(target, class_num) - else: - one_hot_target = target - soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon) - soft_target = paddle.reshape(soft_target, shape=[-1, class_num]) - return soft_target - - def forward(self, x, label): - loss_dict = {} - if self.epsilon is not None: - class_num = x.shape[-1] - label = self._labelsmoothing(label, class_num) - x = -F.log_softmax(x, axis=-1) - loss = paddle.sum(x * label, axis=-1) - else: - if label.shape[-1] == x.shape[-1]: - label = F.softmax(label, axis=-1) - soft_label = True - else: - soft_label = False - loss = F.cross_entropy(x, label=label, soft_label=soft_label) - return loss - - -class KLJSLoss(object): - def __init__(self, mode='kl'): - assert mode in ['kl', 'js', 'KL', 'JS' - ], "mode can only be one of ['kl', 'js', 'KL', 'JS']" - self.mode = mode - - def __call__(self, p1, p2, reduction="mean"): - - loss = paddle.multiply(p2, paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5)) - - if self.mode.lower() == "js": - loss += paddle.multiply( - p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5)) - loss *= 0.5 - if reduction == "mean": - loss = paddle.mean(loss, axis=[1, 2]) - elif reduction == "none" or reduction is None: - return loss - else: - loss = paddle.sum(loss, axis=[1, 2]) - - return loss - - -class DMLLoss(nn.Layer): - """ - DMLLoss - """ - - def __init__(self, act=None, use_log=False): - super().__init__() - if act is not None: - assert act in ["softmax", "sigmoid"] - if act == "softmax": - self.act = nn.Softmax(axis=-1) - elif act == "sigmoid": - self.act = nn.Sigmoid() - else: - self.act = None - - self.use_log = use_log - self.jskl_loss = KLJSLoss(mode="js") - - def _kldiv(self, x, target): - eps = 1.0e-10 - loss = target * (paddle.log(target + eps) - x) - # batch mean loss - loss = paddle.sum(loss) / loss.shape[0] - return loss - - def forward(self, out1, out2): - if self.act is not None: - out1 = self.act(out1) + 1e-10 - out2 = self.act(out2) + 1e-10 - if self.use_log: - # for recognition distillation, log is needed for feature map - log_out1 = paddle.log(out1) - log_out2 = paddle.log(out2) - loss = ( - self._kldiv(log_out1, out2) + self._kldiv(log_out2, out1)) / 2.0 - else: - # for detection distillation log is not needed - loss = self.jskl_loss(out1, out2) - return loss - - -class DistanceLoss(nn.Layer): - """ - DistanceLoss: - mode: loss mode - """ - - def __init__(self, mode="l2", **kargs): - super().__init__() - assert mode in ["l1", "l2", "smooth_l1"] - if mode == "l1": - self.loss_func = nn.L1Loss(**kargs) - elif mode == "l2": - self.loss_func = nn.MSELoss(**kargs) - elif mode == "smooth_l1": - self.loss_func = nn.SmoothL1Loss(**kargs) - - def forward(self, x, y): - return self.loss_func(x, y) - - -class LossFromOutput(nn.Layer): - def __init__(self, key='loss', reduction='none'): - super().__init__() - self.key = key - self.reduction = reduction - - def forward(self, predicts, batch): - loss = predicts[self.key] - if self.reduction == 'mean': - loss = paddle.mean(loss) - elif self.reduction == 'sum': - loss = paddle.sum(loss) - return {'loss': loss} diff --git a/backend/ppocr/losses/center_loss.py b/backend/ppocr/losses/center_loss.py deleted file mode 100644 index f62b8af..0000000 --- a/backend/ppocr/losses/center_loss.py +++ /dev/null @@ -1,88 +0,0 @@ -#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -# This code is refer from: https://github.com/KaiyangZhou/pytorch-center-loss - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import pickle - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F - - -class CenterLoss(nn.Layer): - """ - Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. - """ - - def __init__(self, num_classes=6625, feat_dim=96, center_file_path=None): - super().__init__() - self.num_classes = num_classes - self.feat_dim = feat_dim - self.centers = paddle.randn( - shape=[self.num_classes, self.feat_dim]).astype("float64") - - if center_file_path is not None: - assert os.path.exists( - center_file_path - ), f"center path({center_file_path}) must exist when it is not None." - with open(center_file_path, 'rb') as f: - char_dict = pickle.load(f) - for key in char_dict.keys(): - self.centers[key] = paddle.to_tensor(char_dict[key]) - - def __call__(self, predicts, batch): - assert isinstance(predicts, (list, tuple)) - features, predicts = predicts - - feats_reshape = paddle.reshape( - features, [-1, features.shape[-1]]).astype("float64") - label = paddle.argmax(predicts, axis=2) - label = paddle.reshape(label, [label.shape[0] * label.shape[1]]) - - batch_size = feats_reshape.shape[0] - - #calc l2 distance between feats and centers - square_feat = paddle.sum(paddle.square(feats_reshape), - axis=1, - keepdim=True) - square_feat = paddle.expand(square_feat, [batch_size, self.num_classes]) - - square_center = paddle.sum(paddle.square(self.centers), - axis=1, - keepdim=True) - square_center = paddle.expand( - square_center, [self.num_classes, batch_size]).astype("float64") - square_center = paddle.transpose(square_center, [1, 0]) - - distmat = paddle.add(square_feat, square_center) - feat_dot_center = paddle.matmul(feats_reshape, - paddle.transpose(self.centers, [1, 0])) - distmat = distmat - 2.0 * feat_dot_center - - #generate the mask - classes = paddle.arange(self.num_classes).astype("int64") - label = paddle.expand( - paddle.unsqueeze(label, 1), (batch_size, self.num_classes)) - mask = paddle.equal( - paddle.expand(classes, [batch_size, self.num_classes]), - label).astype("float64") - dist = paddle.multiply(distmat, mask) - - loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size - return {'loss_center': loss} diff --git a/backend/ppocr/losses/cls_loss.py b/backend/ppocr/losses/cls_loss.py deleted file mode 100755 index abc5e5b..0000000 --- a/backend/ppocr/losses/cls_loss.py +++ /dev/null @@ -1,30 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn - - -class ClsLoss(nn.Layer): - def __init__(self, **kwargs): - super(ClsLoss, self).__init__() - self.loss_func = nn.CrossEntropyLoss(reduction='mean') - - def forward(self, predicts, batch): - label = batch[1].astype("int64") - loss = self.loss_func(input=predicts, label=label) - return {'loss': loss} diff --git a/backend/ppocr/losses/combined_loss.py b/backend/ppocr/losses/combined_loss.py deleted file mode 100644 index f4cdee8..0000000 --- a/backend/ppocr/losses/combined_loss.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -import paddle.nn as nn - -from .rec_ctc_loss import CTCLoss -from .center_loss import CenterLoss -from .ace_loss import ACELoss -from .rec_sar_loss import SARLoss - -from .distillation_loss import DistillationCTCLoss -from .distillation_loss import DistillationSARLoss -from .distillation_loss import DistillationDMLLoss -from .distillation_loss import DistillationDistanceLoss, DistillationDBLoss, DistillationDilaDBLoss - - -class CombinedLoss(nn.Layer): - """ - CombinedLoss: - a combionation of loss function - """ - - def __init__(self, loss_config_list=None): - super().__init__() - self.loss_func = [] - self.loss_weight = [] - assert isinstance(loss_config_list, list), ( - 'operator config should be a list') - for config in loss_config_list: - assert isinstance(config, - dict) and len(config) == 1, "yaml format error" - name = list(config)[0] - param = config[name] - assert "weight" in param, "weight must be in param, but param just contains {}".format( - param.keys()) - self.loss_weight.append(param.pop("weight")) - self.loss_func.append(eval(name)(**param)) - - def forward(self, input, batch, **kargs): - loss_dict = {} - loss_all = 0. - for idx, loss_func in enumerate(self.loss_func): - loss = loss_func(input, batch, **kargs) - if isinstance(loss, paddle.Tensor): - loss = {"loss_{}_{}".format(str(loss), idx): loss} - - weight = self.loss_weight[idx] - - loss = {key: loss[key] * weight for key in loss} - - if "loss" in loss: - loss_all += loss["loss"] - else: - loss_all += paddle.add_n(list(loss.values())) - loss_dict.update(loss) - loss_dict["loss"] = loss_all - return loss_dict diff --git a/backend/ppocr/losses/det_basic_loss.py b/backend/ppocr/losses/det_basic_loss.py deleted file mode 100644 index 61ea579..0000000 --- a/backend/ppocr/losses/det_basic_loss.py +++ /dev/null @@ -1,153 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/basic_loss.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import paddle -from paddle import nn -import paddle.nn.functional as F - - -class BalanceLoss(nn.Layer): - def __init__(self, - balance_loss=True, - main_loss_type='DiceLoss', - negative_ratio=3, - return_origin=False, - eps=1e-6, - **kwargs): - """ - The BalanceLoss for Differentiable Binarization text detection - args: - balance_loss (bool): whether balance loss or not, default is True - main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss', - 'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'. - negative_ratio (int|float): float, default is 3. - return_origin (bool): whether return unbalanced loss or not, default is False. - eps (float): default is 1e-6. - """ - super(BalanceLoss, self).__init__() - self.balance_loss = balance_loss - self.main_loss_type = main_loss_type - self.negative_ratio = negative_ratio - self.return_origin = return_origin - self.eps = eps - - if self.main_loss_type == "CrossEntropy": - self.loss = nn.CrossEntropyLoss() - elif self.main_loss_type == "Euclidean": - self.loss = nn.MSELoss() - elif self.main_loss_type == "DiceLoss": - self.loss = DiceLoss(self.eps) - elif self.main_loss_type == "BCELoss": - self.loss = BCELoss(reduction='none') - elif self.main_loss_type == "MaskL1Loss": - self.loss = MaskL1Loss(self.eps) - else: - loss_type = [ - 'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss' - ] - raise Exception( - "main_loss_type in BalanceLoss() can only be one of {}".format( - loss_type)) - - def forward(self, pred, gt, mask=None): - """ - The BalanceLoss for Differentiable Binarization text detection - args: - pred (variable): predicted feature maps. - gt (variable): ground truth feature maps. - mask (variable): masked maps. - return: (variable) balanced loss - """ - positive = gt * mask - negative = (1 - gt) * mask - - positive_count = int(positive.sum()) - negative_count = int( - min(negative.sum(), positive_count * self.negative_ratio)) - loss = self.loss(pred, gt, mask=mask) - - if not self.balance_loss: - return loss - - positive_loss = positive * loss - negative_loss = negative * loss - negative_loss = paddle.reshape(negative_loss, shape=[-1]) - if negative_count > 0: - sort_loss = negative_loss.sort(descending=True) - negative_loss = sort_loss[:negative_count] - # negative_loss, _ = paddle.topk(negative_loss, k=negative_count_int) - balance_loss = (positive_loss.sum() + negative_loss.sum()) / ( - positive_count + negative_count + self.eps) - else: - balance_loss = positive_loss.sum() / (positive_count + self.eps) - if self.return_origin: - return balance_loss, loss - - return balance_loss - - -class DiceLoss(nn.Layer): - def __init__(self, eps=1e-6): - super(DiceLoss, self).__init__() - self.eps = eps - - def forward(self, pred, gt, mask, weights=None): - """ - DiceLoss function. - """ - - assert pred.shape == gt.shape - assert pred.shape == mask.shape - if weights is not None: - assert weights.shape == mask.shape - mask = weights * mask - intersection = paddle.sum(pred * gt * mask) - - union = paddle.sum(pred * mask) + paddle.sum(gt * mask) + self.eps - loss = 1 - 2.0 * intersection / union - assert loss <= 1 - return loss - - -class MaskL1Loss(nn.Layer): - def __init__(self, eps=1e-6): - super(MaskL1Loss, self).__init__() - self.eps = eps - - def forward(self, pred, gt, mask): - """ - Mask L1 Loss - """ - loss = (paddle.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps) - loss = paddle.mean(loss) - return loss - - -class BCELoss(nn.Layer): - def __init__(self, reduction='mean'): - super(BCELoss, self).__init__() - self.reduction = reduction - - def forward(self, input, label, mask=None, weight=None, name=None): - loss = F.binary_cross_entropy(input, label, reduction=self.reduction) - return loss diff --git a/backend/ppocr/losses/det_db_loss.py b/backend/ppocr/losses/det_db_loss.py deleted file mode 100755 index 708ffbd..0000000 --- a/backend/ppocr/losses/det_db_loss.py +++ /dev/null @@ -1,76 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/DB_loss.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn - -from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss - - -class DBLoss(nn.Layer): - """ - Differentiable Binarization (DB) Loss Function - args: - param (dict): the super paramter for DB Loss - """ - - def __init__(self, - balance_loss=True, - main_loss_type='DiceLoss', - alpha=5, - beta=10, - ohem_ratio=3, - eps=1e-6, - **kwargs): - super(DBLoss, self).__init__() - self.alpha = alpha - self.beta = beta - self.dice_loss = DiceLoss(eps=eps) - self.l1_loss = MaskL1Loss(eps=eps) - self.bce_loss = BalanceLoss( - balance_loss=balance_loss, - main_loss_type=main_loss_type, - negative_ratio=ohem_ratio) - - def forward(self, predicts, labels): - predict_maps = predicts['maps'] - label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = labels[ - 1:] - shrink_maps = predict_maps[:, 0, :, :] - threshold_maps = predict_maps[:, 1, :, :] - binary_maps = predict_maps[:, 2, :, :] - - loss_shrink_maps = self.bce_loss(shrink_maps, label_shrink_map, - label_shrink_mask) - loss_threshold_maps = self.l1_loss(threshold_maps, label_threshold_map, - label_threshold_mask) - loss_binary_maps = self.dice_loss(binary_maps, label_shrink_map, - label_shrink_mask) - loss_shrink_maps = self.alpha * loss_shrink_maps - loss_threshold_maps = self.beta * loss_threshold_maps - - loss_all = loss_shrink_maps + loss_threshold_maps \ - + loss_binary_maps - losses = {'loss': loss_all, \ - "loss_shrink_maps": loss_shrink_maps, \ - "loss_threshold_maps": loss_threshold_maps, \ - "loss_binary_maps": loss_binary_maps} - return losses diff --git a/backend/ppocr/losses/det_east_loss.py b/backend/ppocr/losses/det_east_loss.py deleted file mode 100644 index bcf5372..0000000 --- a/backend/ppocr/losses/det_east_loss.py +++ /dev/null @@ -1,63 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -from .det_basic_loss import DiceLoss - - -class EASTLoss(nn.Layer): - """ - """ - - def __init__(self, - eps=1e-6, - **kwargs): - super(EASTLoss, self).__init__() - self.dice_loss = DiceLoss(eps=eps) - - def forward(self, predicts, labels): - l_score, l_geo, l_mask = labels[1:] - f_score = predicts['f_score'] - f_geo = predicts['f_geo'] - - dice_loss = self.dice_loss(f_score, l_score, l_mask) - - #smoooth_l1_loss - channels = 8 - l_geo_split = paddle.split( - l_geo, num_or_sections=channels + 1, axis=1) - f_geo_split = paddle.split(f_geo, num_or_sections=channels, axis=1) - smooth_l1 = 0 - for i in range(0, channels): - geo_diff = l_geo_split[i] - f_geo_split[i] - abs_geo_diff = paddle.abs(geo_diff) - smooth_l1_sign = paddle.less_than(abs_geo_diff, l_score) - smooth_l1_sign = paddle.cast(smooth_l1_sign, dtype='float32') - in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \ - (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign) - out_loss = l_geo_split[-1] / channels * in_loss * l_score - smooth_l1 += out_loss - smooth_l1_loss = paddle.mean(smooth_l1 * l_score) - - dice_loss = dice_loss * 0.01 - total_loss = dice_loss + smooth_l1_loss - losses = {"loss":total_loss, \ - "dice_loss":dice_loss,\ - "smooth_l1_loss":smooth_l1_loss} - return losses diff --git a/backend/ppocr/losses/det_fce_loss.py b/backend/ppocr/losses/det_fce_loss.py deleted file mode 100644 index d7dfb5a..0000000 --- a/backend/ppocr/losses/det_fce_loss.py +++ /dev/null @@ -1,227 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/losses/fce_loss.py -""" - -import numpy as np -from paddle import nn -import paddle -import paddle.nn.functional as F -from functools import partial - - -def multi_apply(func, *args, **kwargs): - pfunc = partial(func, **kwargs) if kwargs else func - map_results = map(pfunc, *args) - return tuple(map(list, zip(*map_results))) - - -class FCELoss(nn.Layer): - """The class for implementing FCENet loss - FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped - Text Detection - - [https://arxiv.org/abs/2104.10442] - - Args: - fourier_degree (int) : The maximum Fourier transform degree k. - num_sample (int) : The sampling points number of regression - loss. If it is too small, fcenet tends to be overfitting. - ohem_ratio (float): the negative/positive ratio in OHEM. - """ - - def __init__(self, fourier_degree, num_sample, ohem_ratio=3.): - super().__init__() - self.fourier_degree = fourier_degree - self.num_sample = num_sample - self.ohem_ratio = ohem_ratio - - def forward(self, preds, labels): - assert isinstance(preds, dict) - preds = preds['levels'] - - p3_maps, p4_maps, p5_maps = labels[1:] - assert p3_maps[0].shape[0] == 4 * self.fourier_degree + 5,\ - 'fourier degree not equal in FCEhead and FCEtarget' - - # to tensor - gts = [p3_maps, p4_maps, p5_maps] - for idx, maps in enumerate(gts): - gts[idx] = paddle.to_tensor(np.stack(maps)) - - losses = multi_apply(self.forward_single, preds, gts) - - loss_tr = paddle.to_tensor(0.).astype('float32') - loss_tcl = paddle.to_tensor(0.).astype('float32') - loss_reg_x = paddle.to_tensor(0.).astype('float32') - loss_reg_y = paddle.to_tensor(0.).astype('float32') - loss_all = paddle.to_tensor(0.).astype('float32') - - for idx, loss in enumerate(losses): - loss_all += sum(loss) - if idx == 0: - loss_tr += sum(loss) - elif idx == 1: - loss_tcl += sum(loss) - elif idx == 2: - loss_reg_x += sum(loss) - else: - loss_reg_y += sum(loss) - - results = dict( - loss=loss_all, - loss_text=loss_tr, - loss_center=loss_tcl, - loss_reg_x=loss_reg_x, - loss_reg_y=loss_reg_y, ) - return results - - def forward_single(self, pred, gt): - cls_pred = paddle.transpose(pred[0], (0, 2, 3, 1)) - reg_pred = paddle.transpose(pred[1], (0, 2, 3, 1)) - gt = paddle.transpose(gt, (0, 2, 3, 1)) - - k = 2 * self.fourier_degree + 1 - tr_pred = paddle.reshape(cls_pred[:, :, :, :2], (-1, 2)) - tcl_pred = paddle.reshape(cls_pred[:, :, :, 2:], (-1, 2)) - x_pred = paddle.reshape(reg_pred[:, :, :, 0:k], (-1, k)) - y_pred = paddle.reshape(reg_pred[:, :, :, k:2 * k], (-1, k)) - - tr_mask = gt[:, :, :, :1].reshape([-1]) - tcl_mask = gt[:, :, :, 1:2].reshape([-1]) - train_mask = gt[:, :, :, 2:3].reshape([-1]) - x_map = paddle.reshape(gt[:, :, :, 3:3 + k], (-1, k)) - y_map = paddle.reshape(gt[:, :, :, 3 + k:], (-1, k)) - - tr_train_mask = (train_mask * tr_mask).astype('bool') - tr_train_mask2 = paddle.concat( - [tr_train_mask.unsqueeze(1), tr_train_mask.unsqueeze(1)], axis=1) - # tr loss - loss_tr = self.ohem(tr_pred, tr_mask, train_mask) - # tcl loss - loss_tcl = paddle.to_tensor(0.).astype('float32') - tr_neg_mask = tr_train_mask.logical_not() - tr_neg_mask2 = paddle.concat( - [tr_neg_mask.unsqueeze(1), tr_neg_mask.unsqueeze(1)], axis=1) - if tr_train_mask.sum().item() > 0: - loss_tcl_pos = F.cross_entropy( - tcl_pred.masked_select(tr_train_mask2).reshape([-1, 2]), - tcl_mask.masked_select(tr_train_mask).astype('int64')) - loss_tcl_neg = F.cross_entropy( - tcl_pred.masked_select(tr_neg_mask2).reshape([-1, 2]), - tcl_mask.masked_select(tr_neg_mask).astype('int64')) - loss_tcl = loss_tcl_pos + 0.5 * loss_tcl_neg - - # regression loss - loss_reg_x = paddle.to_tensor(0.).astype('float32') - loss_reg_y = paddle.to_tensor(0.).astype('float32') - if tr_train_mask.sum().item() > 0: - weight = (tr_mask.masked_select(tr_train_mask.astype('bool')) - .astype('float32') + tcl_mask.masked_select( - tr_train_mask.astype('bool')).astype('float32')) / 2 - weight = weight.reshape([-1, 1]) - - ft_x, ft_y = self.fourier2poly(x_map, y_map) - ft_x_pre, ft_y_pre = self.fourier2poly(x_pred, y_pred) - - dim = ft_x.shape[1] - - tr_train_mask3 = paddle.concat( - [tr_train_mask.unsqueeze(1) for i in range(dim)], axis=1) - - loss_reg_x = paddle.mean(weight * F.smooth_l1_loss( - ft_x_pre.masked_select(tr_train_mask3).reshape([-1, dim]), - ft_x.masked_select(tr_train_mask3).reshape([-1, dim]), - reduction='none')) - loss_reg_y = paddle.mean(weight * F.smooth_l1_loss( - ft_y_pre.masked_select(tr_train_mask3).reshape([-1, dim]), - ft_y.masked_select(tr_train_mask3).reshape([-1, dim]), - reduction='none')) - - return loss_tr, loss_tcl, loss_reg_x, loss_reg_y - - def ohem(self, predict, target, train_mask): - - pos = (target * train_mask).astype('bool') - neg = ((1 - target) * train_mask).astype('bool') - - pos2 = paddle.concat([pos.unsqueeze(1), pos.unsqueeze(1)], axis=1) - neg2 = paddle.concat([neg.unsqueeze(1), neg.unsqueeze(1)], axis=1) - - n_pos = pos.astype('float32').sum() - - if n_pos.item() > 0: - loss_pos = F.cross_entropy( - predict.masked_select(pos2).reshape([-1, 2]), - target.masked_select(pos).astype('int64'), - reduction='sum') - loss_neg = F.cross_entropy( - predict.masked_select(neg2).reshape([-1, 2]), - target.masked_select(neg).astype('int64'), - reduction='none') - n_neg = min( - int(neg.astype('float32').sum().item()), - int(self.ohem_ratio * n_pos.astype('float32'))) - else: - loss_pos = paddle.to_tensor(0.) - loss_neg = F.cross_entropy( - predict.masked_select(neg2).reshape([-1, 2]), - target.masked_select(neg).astype('int64'), - reduction='none') - n_neg = 100 - if len(loss_neg) > n_neg: - loss_neg, _ = paddle.topk(loss_neg, n_neg) - - return (loss_pos + loss_neg.sum()) / (n_pos + n_neg).astype('float32') - - def fourier2poly(self, real_maps, imag_maps): - """Transform Fourier coefficient maps to polygon maps. - - Args: - real_maps (tensor): A map composed of the real parts of the - Fourier coefficients, whose shape is (-1, 2k+1) - imag_maps (tensor):A map composed of the imag parts of the - Fourier coefficients, whose shape is (-1, 2k+1) - - Returns - x_maps (tensor): A map composed of the x value of the polygon - represented by n sample points (xn, yn), whose shape is (-1, n) - y_maps (tensor): A map composed of the y value of the polygon - represented by n sample points (xn, yn), whose shape is (-1, n) - """ - - k_vect = paddle.arange( - -self.fourier_degree, self.fourier_degree + 1, - dtype='float32').reshape([-1, 1]) - i_vect = paddle.arange( - 0, self.num_sample, dtype='float32').reshape([1, -1]) - - transform_matrix = 2 * np.pi / self.num_sample * paddle.matmul(k_vect, - i_vect) - - x1 = paddle.einsum('ak, kn-> an', real_maps, - paddle.cos(transform_matrix)) - x2 = paddle.einsum('ak, kn-> an', imag_maps, - paddle.sin(transform_matrix)) - y1 = paddle.einsum('ak, kn-> an', real_maps, - paddle.sin(transform_matrix)) - y2 = paddle.einsum('ak, kn-> an', imag_maps, - paddle.cos(transform_matrix)) - - x_maps = x1 - x2 - y_maps = y1 + y2 - - return x_maps, y_maps diff --git a/backend/ppocr/losses/det_pse_loss.py b/backend/ppocr/losses/det_pse_loss.py deleted file mode 100644 index 6b31343..0000000 --- a/backend/ppocr/losses/det_pse_loss.py +++ /dev/null @@ -1,149 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py -""" - -import paddle -from paddle import nn -from paddle.nn import functional as F -import numpy as np -from ppocr.utils.iou import iou - - -class PSELoss(nn.Layer): - def __init__(self, - alpha, - ohem_ratio=3, - kernel_sample_mask='pred', - reduction='sum', - eps=1e-6, - **kwargs): - """Implement PSE Loss. - """ - super(PSELoss, self).__init__() - assert reduction in ['sum', 'mean', 'none'] - self.alpha = alpha - self.ohem_ratio = ohem_ratio - self.kernel_sample_mask = kernel_sample_mask - self.reduction = reduction - self.eps = eps - - def forward(self, outputs, labels): - predicts = outputs['maps'] - predicts = F.interpolate(predicts, scale_factor=4) - - texts = predicts[:, 0, :, :] - kernels = predicts[:, 1:, :, :] - gt_texts, gt_kernels, training_masks = labels[1:] - - # text loss - selected_masks = self.ohem_batch(texts, gt_texts, training_masks) - - loss_text = self.dice_loss(texts, gt_texts, selected_masks) - iou_text = iou((texts > 0).astype('int64'), - gt_texts, - training_masks, - reduce=False) - losses = dict(loss_text=loss_text, iou_text=iou_text) - - # kernel loss - loss_kernels = [] - if self.kernel_sample_mask == 'gt': - selected_masks = gt_texts * training_masks - elif self.kernel_sample_mask == 'pred': - selected_masks = ( - F.sigmoid(texts) > 0.5).astype('float32') * training_masks - - for i in range(kernels.shape[1]): - kernel_i = kernels[:, i, :, :] - gt_kernel_i = gt_kernels[:, i, :, :] - loss_kernel_i = self.dice_loss(kernel_i, gt_kernel_i, - selected_masks) - loss_kernels.append(loss_kernel_i) - loss_kernels = paddle.mean(paddle.stack(loss_kernels, axis=1), axis=1) - iou_kernel = iou((kernels[:, -1, :, :] > 0).astype('int64'), - gt_kernels[:, -1, :, :], - training_masks * gt_texts, - reduce=False) - losses.update(dict(loss_kernels=loss_kernels, iou_kernel=iou_kernel)) - loss = self.alpha * loss_text + (1 - self.alpha) * loss_kernels - losses['loss'] = loss - if self.reduction == 'sum': - losses = {x: paddle.sum(v) for x, v in losses.items()} - elif self.reduction == 'mean': - losses = {x: paddle.mean(v) for x, v in losses.items()} - return losses - - def dice_loss(self, input, target, mask): - input = F.sigmoid(input) - - input = input.reshape([input.shape[0], -1]) - target = target.reshape([target.shape[0], -1]) - mask = mask.reshape([mask.shape[0], -1]) - - input = input * mask - target = target * mask - - a = paddle.sum(input * target, 1) - b = paddle.sum(input * input, 1) + self.eps - c = paddle.sum(target * target, 1) + self.eps - d = (2 * a) / (b + c) - return 1 - d - - def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3): - pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int( - paddle.sum( - paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5)) - .astype('float32'))) - - if pos_num == 0: - selected_mask = training_mask - selected_mask = selected_mask.reshape( - [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( - 'float32') - return selected_mask - - neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32'))) - neg_num = int(min(pos_num * ohem_ratio, neg_num)) - - if neg_num == 0: - selected_mask = training_mask - selected_mask = selected_mask.reshape( - [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( - 'float32') - return selected_mask - - neg_score = paddle.masked_select(score, gt_text <= 0.5) - neg_score_sorted = paddle.sort(-neg_score) - threshold = -neg_score_sorted[neg_num - 1] - - selected_mask = paddle.logical_and( - paddle.logical_or((score >= threshold), (gt_text > 0.5)), - (training_mask > 0.5)) - selected_mask = selected_mask.reshape( - [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( - 'float32') - return selected_mask - - def ohem_batch(self, scores, gt_texts, training_masks, ohem_ratio=3): - selected_masks = [] - for i in range(scores.shape[0]): - selected_masks.append( - self.ohem_single(scores[i, :, :], gt_texts[i, :, :], - training_masks[i, :, :], ohem_ratio)) - - selected_masks = paddle.concat(selected_masks, 0).astype('float32') - return selected_masks diff --git a/backend/ppocr/losses/det_sast_loss.py b/backend/ppocr/losses/det_sast_loss.py deleted file mode 100644 index 2e0c756..0000000 --- a/backend/ppocr/losses/det_sast_loss.py +++ /dev/null @@ -1,121 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -from .det_basic_loss import DiceLoss -import numpy as np - - -class SASTLoss(nn.Layer): - """ - """ - - def __init__(self, eps=1e-6, **kwargs): - super(SASTLoss, self).__init__() - self.dice_loss = DiceLoss(eps=eps) - - def forward(self, predicts, labels): - """ - tcl_pos: N x 128 x 3 - tcl_mask: N x 128 x 1 - tcl_label: N x X list or LoDTensor - """ - - f_score = predicts['f_score'] - f_border = predicts['f_border'] - f_tvo = predicts['f_tvo'] - f_tco = predicts['f_tco'] - - l_score, l_border, l_mask, l_tvo, l_tco = labels[1:] - - #score_loss - intersection = paddle.sum(f_score * l_score * l_mask) - union = paddle.sum(f_score * l_mask) + paddle.sum(l_score * l_mask) - score_loss = 1.0 - 2 * intersection / (union + 1e-5) - - #border loss - l_border_split, l_border_norm = paddle.split( - l_border, num_or_sections=[4, 1], axis=1) - f_border_split = f_border - border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1]) - l_border_norm_split = paddle.expand( - x=l_border_norm, shape=border_ex_shape) - l_border_score = paddle.expand(x=l_score, shape=border_ex_shape) - l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape) - - border_diff = l_border_split - f_border_split - abs_border_diff = paddle.abs(border_diff) - border_sign = abs_border_diff < 1.0 - border_sign = paddle.cast(border_sign, dtype='float32') - border_sign.stop_gradient = True - border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \ - (abs_border_diff - 0.5) * (1.0 - border_sign) - border_out_loss = l_border_norm_split * border_in_loss - border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \ - (paddle.sum(l_border_score * l_border_mask) + 1e-5) - - #tvo_loss - l_tvo_split, l_tvo_norm = paddle.split( - l_tvo, num_or_sections=[8, 1], axis=1) - f_tvo_split = f_tvo - tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1]) - l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape) - l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape) - l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape) - # - tvo_geo_diff = l_tvo_split - f_tvo_split - abs_tvo_geo_diff = paddle.abs(tvo_geo_diff) - tvo_sign = abs_tvo_geo_diff < 1.0 - tvo_sign = paddle.cast(tvo_sign, dtype='float32') - tvo_sign.stop_gradient = True - tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \ - (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign) - tvo_out_loss = l_tvo_norm_split * tvo_in_loss - tvo_loss = paddle.sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \ - (paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5) - - #tco_loss - l_tco_split, l_tco_norm = paddle.split( - l_tco, num_or_sections=[2, 1], axis=1) - f_tco_split = f_tco - tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1]) - l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape) - l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape) - l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape) - - tco_geo_diff = l_tco_split - f_tco_split - abs_tco_geo_diff = paddle.abs(tco_geo_diff) - tco_sign = abs_tco_geo_diff < 1.0 - tco_sign = paddle.cast(tco_sign, dtype='float32') - tco_sign.stop_gradient = True - tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \ - (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign) - tco_out_loss = l_tco_norm_split * tco_in_loss - tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \ - (paddle.sum(l_tco_score * l_tco_mask) + 1e-5) - - # total loss - tvo_lw, tco_lw = 1.5, 1.5 - score_lw, border_lw = 1.0, 1.0 - total_loss = score_loss * score_lw + border_loss * border_lw + \ - tvo_loss * tvo_lw + tco_loss * tco_lw - - losses = {'loss':total_loss, "score_loss":score_loss,\ - "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss} - return losses diff --git a/backend/ppocr/losses/distillation_loss.py b/backend/ppocr/losses/distillation_loss.py deleted file mode 100644 index 565b066..0000000 --- a/backend/ppocr/losses/distillation_loss.py +++ /dev/null @@ -1,324 +0,0 @@ -#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import paddle -import paddle.nn as nn -import numpy as np -import cv2 - -from .rec_ctc_loss import CTCLoss -from .rec_sar_loss import SARLoss -from .basic_loss import DMLLoss -from .basic_loss import DistanceLoss -from .det_db_loss import DBLoss -from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss - - -def _sum_loss(loss_dict): - if "loss" in loss_dict.keys(): - return loss_dict - else: - loss_dict["loss"] = 0. - for k, value in loss_dict.items(): - if k == "loss": - continue - else: - loss_dict["loss"] += value - return loss_dict - - -class DistillationDMLLoss(DMLLoss): - """ - """ - - def __init__(self, - model_name_pairs=[], - act=None, - use_log=False, - key=None, - multi_head=False, - dis_head='ctc', - maps_name=None, - name="dml"): - super().__init__(act=act, use_log=use_log) - assert isinstance(model_name_pairs, list) - self.key = key - self.multi_head = multi_head - self.dis_head = dis_head - self.model_name_pairs = self._check_model_name_pairs(model_name_pairs) - self.name = name - self.maps_name = self._check_maps_name(maps_name) - - def _check_model_name_pairs(self, model_name_pairs): - if not isinstance(model_name_pairs, list): - return [] - elif isinstance(model_name_pairs[0], list) and isinstance( - model_name_pairs[0][0], str): - return model_name_pairs - else: - return [model_name_pairs] - - def _check_maps_name(self, maps_name): - if maps_name is None: - return None - elif type(maps_name) == str: - return [maps_name] - elif type(maps_name) == list: - return [maps_name] - else: - return None - - def _slice_out(self, outs): - new_outs = {} - for k in self.maps_name: - if k == "thrink_maps": - new_outs[k] = outs[:, 0, :, :] - elif k == "threshold_maps": - new_outs[k] = outs[:, 1, :, :] - elif k == "binary_maps": - new_outs[k] = outs[:, 2, :, :] - else: - continue - return new_outs - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, pair in enumerate(self.model_name_pairs): - out1 = predicts[pair[0]] - out2 = predicts[pair[1]] - if self.key is not None: - out1 = out1[self.key] - out2 = out2[self.key] - - if self.maps_name is None: - if self.multi_head: - loss = super().forward(out1[self.dis_head], - out2[self.dis_head]) - else: - loss = super().forward(out1, out2) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], - idx)] = loss[key] - else: - loss_dict["{}_{}".format(self.name, idx)] = loss - else: - outs1 = self._slice_out(out1) - outs2 = self._slice_out(out2) - for _c, k in enumerate(outs1.keys()): - loss = super().forward(outs1[k], outs2[k]) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}_{}_{}".format(key, pair[ - 0], pair[1], self.maps_name, idx)] = loss[key] - else: - loss_dict["{}_{}_{}".format(self.name, self.maps_name[ - _c], idx)] = loss - - loss_dict = _sum_loss(loss_dict) - - return loss_dict - - -class DistillationCTCLoss(CTCLoss): - def __init__(self, - model_name_list=[], - key=None, - multi_head=False, - name="loss_ctc"): - super().__init__() - self.model_name_list = model_name_list - self.key = key - self.name = name - self.multi_head = multi_head - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, model_name in enumerate(self.model_name_list): - out = predicts[model_name] - if self.key is not None: - out = out[self.key] - if self.multi_head: - assert 'ctc' in out, 'multi head has multi out' - loss = super().forward(out['ctc'], batch[:2] + batch[3:]) - else: - loss = super().forward(out, batch) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}".format(self.name, model_name, - idx)] = loss[key] - else: - loss_dict["{}_{}".format(self.name, model_name)] = loss - return loss_dict - - -class DistillationSARLoss(SARLoss): - def __init__(self, - model_name_list=[], - key=None, - multi_head=False, - name="loss_sar", - **kwargs): - ignore_index = kwargs.get('ignore_index', 92) - super().__init__(ignore_index=ignore_index) - self.model_name_list = model_name_list - self.key = key - self.name = name - self.multi_head = multi_head - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, model_name in enumerate(self.model_name_list): - out = predicts[model_name] - if self.key is not None: - out = out[self.key] - if self.multi_head: - assert 'sar' in out, 'multi head has multi out' - loss = super().forward(out['sar'], batch[:1] + batch[2:]) - else: - loss = super().forward(out, batch) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}".format(self.name, model_name, - idx)] = loss[key] - else: - loss_dict["{}_{}".format(self.name, model_name)] = loss - return loss_dict - - -class DistillationDBLoss(DBLoss): - def __init__(self, - model_name_list=[], - balance_loss=True, - main_loss_type='DiceLoss', - alpha=5, - beta=10, - ohem_ratio=3, - eps=1e-6, - name="db", - **kwargs): - super().__init__() - self.model_name_list = model_name_list - self.name = name - self.key = None - - def forward(self, predicts, batch): - loss_dict = {} - for idx, model_name in enumerate(self.model_name_list): - out = predicts[model_name] - if self.key is not None: - out = out[self.key] - loss = super().forward(out, batch) - - if isinstance(loss, dict): - for key in loss.keys(): - if key == "loss": - continue - name = "{}_{}_{}".format(self.name, model_name, key) - loss_dict[name] = loss[key] - else: - loss_dict["{}_{}".format(self.name, model_name)] = loss - - loss_dict = _sum_loss(loss_dict) - return loss_dict - - -class DistillationDilaDBLoss(DBLoss): - def __init__(self, - model_name_pairs=[], - key=None, - balance_loss=True, - main_loss_type='DiceLoss', - alpha=5, - beta=10, - ohem_ratio=3, - eps=1e-6, - name="dila_dbloss"): - super().__init__() - self.model_name_pairs = model_name_pairs - self.name = name - self.key = key - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, pair in enumerate(self.model_name_pairs): - stu_outs = predicts[pair[0]] - tch_outs = predicts[pair[1]] - if self.key is not None: - stu_preds = stu_outs[self.key] - tch_preds = tch_outs[self.key] - - stu_shrink_maps = stu_preds[:, 0, :, :] - stu_binary_maps = stu_preds[:, 2, :, :] - - # dilation to teacher prediction - dilation_w = np.array([[1, 1], [1, 1]]) - th_shrink_maps = tch_preds[:, 0, :, :] - th_shrink_maps = th_shrink_maps.numpy() > 0.3 # thresh = 0.3 - dilate_maps = np.zeros_like(th_shrink_maps).astype(np.float32) - for i in range(th_shrink_maps.shape[0]): - dilate_maps[i] = cv2.dilate( - th_shrink_maps[i, :, :].astype(np.uint8), dilation_w) - th_shrink_maps = paddle.to_tensor(dilate_maps) - - label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = batch[ - 1:] - - # calculate the shrink map loss - bce_loss = self.alpha * self.bce_loss( - stu_shrink_maps, th_shrink_maps, label_shrink_mask) - loss_binary_maps = self.dice_loss(stu_binary_maps, th_shrink_maps, - label_shrink_mask) - - # k = f"{self.name}_{pair[0]}_{pair[1]}" - k = "{}_{}_{}".format(self.name, pair[0], pair[1]) - loss_dict[k] = bce_loss + loss_binary_maps - - loss_dict = _sum_loss(loss_dict) - return loss_dict - - -class DistillationDistanceLoss(DistanceLoss): - """ - """ - - def __init__(self, - mode="l2", - model_name_pairs=[], - key=None, - name="loss_distance", - **kargs): - super().__init__(mode=mode, **kargs) - assert isinstance(model_name_pairs, list) - self.key = key - self.model_name_pairs = model_name_pairs - self.name = name + "_l2" - - def forward(self, predicts, batch): - loss_dict = dict() - for idx, pair in enumerate(self.model_name_pairs): - out1 = predicts[pair[0]] - out2 = predicts[pair[1]] - if self.key is not None: - out1 = out1[self.key] - out2 = out2[self.key] - loss = super().forward(out1, out2) - if isinstance(loss, dict): - for key in loss: - loss_dict["{}_{}_{}".format(self.name, key, idx)] = loss[ - key] - else: - loss_dict["{}_{}_{}_{}".format(self.name, pair[0], pair[1], - idx)] = loss - return loss_dict diff --git a/backend/ppocr/losses/e2e_pg_loss.py b/backend/ppocr/losses/e2e_pg_loss.py deleted file mode 100644 index 10a8ed0..0000000 --- a/backend/ppocr/losses/e2e_pg_loss.py +++ /dev/null @@ -1,140 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn -import paddle - -from .det_basic_loss import DiceLoss -from ppocr.utils.e2e_utils.extract_batchsize import pre_process - - -class PGLoss(nn.Layer): - def __init__(self, - tcl_bs, - max_text_length, - max_text_nums, - pad_num, - eps=1e-6, - **kwargs): - super(PGLoss, self).__init__() - self.tcl_bs = tcl_bs - self.max_text_nums = max_text_nums - self.max_text_length = max_text_length - self.pad_num = pad_num - self.dice_loss = DiceLoss(eps=eps) - - def border_loss(self, f_border, l_border, l_score, l_mask): - l_border_split, l_border_norm = paddle.tensor.split( - l_border, num_or_sections=[4, 1], axis=1) - f_border_split = f_border - b, c, h, w = l_border_norm.shape - l_border_norm_split = paddle.expand( - x=l_border_norm, shape=[b, 4 * c, h, w]) - b, c, h, w = l_score.shape - l_border_score = paddle.expand(x=l_score, shape=[b, 4 * c, h, w]) - b, c, h, w = l_mask.shape - l_border_mask = paddle.expand(x=l_mask, shape=[b, 4 * c, h, w]) - border_diff = l_border_split - f_border_split - abs_border_diff = paddle.abs(border_diff) - border_sign = abs_border_diff < 1.0 - border_sign = paddle.cast(border_sign, dtype='float32') - border_sign.stop_gradient = True - border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \ - (abs_border_diff - 0.5) * (1.0 - border_sign) - border_out_loss = l_border_norm_split * border_in_loss - border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \ - (paddle.sum(l_border_score * l_border_mask) + 1e-5) - return border_loss - - def direction_loss(self, f_direction, l_direction, l_score, l_mask): - l_direction_split, l_direction_norm = paddle.tensor.split( - l_direction, num_or_sections=[2, 1], axis=1) - f_direction_split = f_direction - b, c, h, w = l_direction_norm.shape - l_direction_norm_split = paddle.expand( - x=l_direction_norm, shape=[b, 2 * c, h, w]) - b, c, h, w = l_score.shape - l_direction_score = paddle.expand(x=l_score, shape=[b, 2 * c, h, w]) - b, c, h, w = l_mask.shape - l_direction_mask = paddle.expand(x=l_mask, shape=[b, 2 * c, h, w]) - direction_diff = l_direction_split - f_direction_split - abs_direction_diff = paddle.abs(direction_diff) - direction_sign = abs_direction_diff < 1.0 - direction_sign = paddle.cast(direction_sign, dtype='float32') - direction_sign.stop_gradient = True - direction_in_loss = 0.5 * abs_direction_diff * abs_direction_diff * direction_sign + \ - (abs_direction_diff - 0.5) * (1.0 - direction_sign) - direction_out_loss = l_direction_norm_split * direction_in_loss - direction_loss = paddle.sum(direction_out_loss * l_direction_score * l_direction_mask) / \ - (paddle.sum(l_direction_score * l_direction_mask) + 1e-5) - return direction_loss - - def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t): - f_char = paddle.transpose(f_char, [0, 2, 3, 1]) - tcl_pos = paddle.reshape(tcl_pos, [-1, 3]) - tcl_pos = paddle.cast(tcl_pos, dtype=int) - f_tcl_char = paddle.gather_nd(f_char, tcl_pos) - f_tcl_char = paddle.reshape(f_tcl_char, - [-1, 64, 37]) # len(Lexicon_Table)+1 - f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2) - f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0 - b, c, l = tcl_mask.shape - tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l]) - tcl_mask_fg.stop_gradient = True - f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * ( - -20.0) - f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2) - f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2)) - N, B, _ = f_tcl_char_ld.shape - input_lengths = paddle.to_tensor([N] * B, dtype='int64') - cost = paddle.nn.functional.ctc_loss( - log_probs=f_tcl_char_ld, - labels=tcl_label, - input_lengths=input_lengths, - label_lengths=label_t, - blank=self.pad_num, - reduction='none') - cost = cost.mean() - return cost - - def forward(self, predicts, labels): - images, tcl_maps, tcl_label_maps, border_maps \ - , direction_maps, training_masks, label_list, pos_list, pos_mask = labels - # for all the batch_size - pos_list, pos_mask, label_list, label_t = pre_process( - label_list, pos_list, pos_mask, self.max_text_length, - self.max_text_nums, self.pad_num, self.tcl_bs) - - f_score, f_border, f_direction, f_char = predicts['f_score'], predicts['f_border'], predicts['f_direction'], \ - predicts['f_char'] - score_loss = self.dice_loss(f_score, tcl_maps, training_masks) - border_loss = self.border_loss(f_border, border_maps, tcl_maps, - training_masks) - direction_loss = self.direction_loss(f_direction, direction_maps, - tcl_maps, training_masks) - ctc_loss = self.ctcloss(f_char, pos_list, pos_mask, label_list, label_t) - loss_all = score_loss + border_loss + direction_loss + 5 * ctc_loss - - losses = { - 'loss': loss_all, - "score_loss": score_loss, - "border_loss": border_loss, - "direction_loss": direction_loss, - "ctc_loss": ctc_loss - } - return losses diff --git a/backend/ppocr/losses/kie_sdmgr_loss.py b/backend/ppocr/losses/kie_sdmgr_loss.py deleted file mode 100644 index 745671f..0000000 --- a/backend/ppocr/losses/kie_sdmgr_loss.py +++ /dev/null @@ -1,115 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/losses/sdmgr_loss.py - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn -import paddle - - -class SDMGRLoss(nn.Layer): - def __init__(self, node_weight=1.0, edge_weight=1.0, ignore=0): - super().__init__() - self.loss_node = nn.CrossEntropyLoss(ignore_index=ignore) - self.loss_edge = nn.CrossEntropyLoss(ignore_index=-1) - self.node_weight = node_weight - self.edge_weight = edge_weight - self.ignore = ignore - - def pre_process(self, gts, tag): - gts, tag = gts.numpy(), tag.numpy().tolist() - temp_gts = [] - batch = len(tag) - for i in range(batch): - num, recoder_len = tag[i][0], tag[i][1] - temp_gts.append( - paddle.to_tensor( - gts[i, :num, :num + 1], dtype='int64')) - return temp_gts - - def accuracy(self, pred, target, topk=1, thresh=None): - """Calculate accuracy according to the prediction and target. - - Args: - pred (torch.Tensor): The model prediction, shape (N, num_class) - target (torch.Tensor): The target of each prediction, shape (N, ) - topk (int | tuple[int], optional): If the predictions in ``topk`` - matches the target, the predictions will be regarded as - correct ones. Defaults to 1. - thresh (float, optional): If not None, predictions with scores under - this threshold are considered incorrect. Default to None. - - Returns: - float | tuple[float]: If the input ``topk`` is a single integer, - the function will return a single float as accuracy. If - ``topk`` is a tuple containing multiple integers, the - function will return a tuple containing accuracies of - each ``topk`` number. - """ - assert isinstance(topk, (int, tuple)) - if isinstance(topk, int): - topk = (topk, ) - return_single = True - else: - return_single = False - - maxk = max(topk) - if pred.shape[0] == 0: - accu = [pred.new_tensor(0.) for i in range(len(topk))] - return accu[0] if return_single else accu - pred_value, pred_label = paddle.topk(pred, maxk, axis=1) - pred_label = pred_label.transpose( - [1, 0]) # transpose to shape (maxk, N) - correct = paddle.equal(pred_label, - (target.reshape([1, -1]).expand_as(pred_label))) - res = [] - for k in topk: - correct_k = paddle.sum(correct[:k].reshape([-1]).astype('float32'), - axis=0, - keepdim=True) - res.append( - paddle.multiply(correct_k, - paddle.to_tensor(100.0 / pred.shape[0]))) - return res[0] if return_single else res - - def forward(self, pred, batch): - node_preds, edge_preds = pred - gts, tag = batch[4], batch[5] - gts = self.pre_process(gts, tag) - node_gts, edge_gts = [], [] - for gt in gts: - node_gts.append(gt[:, 0]) - edge_gts.append(gt[:, 1:].reshape([-1])) - node_gts = paddle.concat(node_gts) - edge_gts = paddle.concat(edge_gts) - - node_valids = paddle.nonzero(node_gts != self.ignore).reshape([-1]) - edge_valids = paddle.nonzero(edge_gts != -1).reshape([-1]) - loss_node = self.loss_node(node_preds, node_gts) - loss_edge = self.loss_edge(edge_preds, edge_gts) - loss = self.node_weight * loss_node + self.edge_weight * loss_edge - return dict( - loss=loss, - loss_node=loss_node, - loss_edge=loss_edge, - acc_node=self.accuracy( - paddle.gather(node_preds, node_valids), - paddle.gather(node_gts, node_valids)), - acc_edge=self.accuracy( - paddle.gather(edge_preds, edge_valids), - paddle.gather(edge_gts, edge_valids))) diff --git a/backend/ppocr/losses/rec_aster_loss.py b/backend/ppocr/losses/rec_aster_loss.py deleted file mode 100644 index fbb99d2..0000000 --- a/backend/ppocr/losses/rec_aster_loss.py +++ /dev/null @@ -1,99 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class CosineEmbeddingLoss(nn.Layer): - def __init__(self, margin=0.): - super(CosineEmbeddingLoss, self).__init__() - self.margin = margin - self.epsilon = 1e-12 - - def forward(self, x1, x2, target): - similarity = paddle.fluid.layers.reduce_sum( - x1 * x2, dim=-1) / (paddle.norm( - x1, axis=-1) * paddle.norm( - x2, axis=-1) + self.epsilon) - one_list = paddle.full_like(target, fill_value=1) - out = paddle.fluid.layers.reduce_mean( - paddle.where( - paddle.equal(target, one_list), 1. - similarity, - paddle.maximum( - paddle.zeros_like(similarity), similarity - self.margin))) - - return out - - -class AsterLoss(nn.Layer): - def __init__(self, - weight=None, - size_average=True, - ignore_index=-100, - sequence_normalize=False, - sample_normalize=True, - **kwargs): - super(AsterLoss, self).__init__() - self.weight = weight - self.size_average = size_average - self.ignore_index = ignore_index - self.sequence_normalize = sequence_normalize - self.sample_normalize = sample_normalize - self.loss_sem = CosineEmbeddingLoss() - self.is_cosin_loss = True - self.loss_func_rec = nn.CrossEntropyLoss(weight=None, reduction='none') - - def forward(self, predicts, batch): - targets = batch[1].astype("int64") - label_lengths = batch[2].astype('int64') - sem_target = batch[3].astype('float32') - embedding_vectors = predicts['embedding_vectors'] - rec_pred = predicts['rec_pred'] - - if not self.is_cosin_loss: - sem_loss = paddle.sum(self.loss_sem(embedding_vectors, sem_target)) - else: - label_target = paddle.ones([embedding_vectors.shape[0]]) - sem_loss = paddle.sum( - self.loss_sem(embedding_vectors, sem_target, label_target)) - - # rec loss - batch_size, def_max_length = targets.shape[0], targets.shape[1] - - mask = paddle.zeros([batch_size, def_max_length]) - for i in range(batch_size): - mask[i, :label_lengths[i]] = 1 - mask = paddle.cast(mask, "float32") - max_length = max(label_lengths) - assert max_length == rec_pred.shape[1] - targets = targets[:, :max_length] - mask = mask[:, :max_length] - rec_pred = paddle.reshape(rec_pred, [-1, rec_pred.shape[2]]) - input = nn.functional.log_softmax(rec_pred, axis=1) - targets = paddle.reshape(targets, [-1, 1]) - mask = paddle.reshape(mask, [-1, 1]) - output = -paddle.index_sample(input, index=targets) * mask - output = paddle.sum(output) - if self.sequence_normalize: - output = output / paddle.sum(mask) - if self.sample_normalize: - output = output / batch_size - - loss = output + sem_loss * 0.1 - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_att_loss.py b/backend/ppocr/losses/rec_att_loss.py deleted file mode 100644 index 6e2f674..0000000 --- a/backend/ppocr/losses/rec_att_loss.py +++ /dev/null @@ -1,39 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class AttentionLoss(nn.Layer): - def __init__(self, **kwargs): - super(AttentionLoss, self).__init__() - self.loss_func = nn.CrossEntropyLoss(weight=None, reduction='none') - - def forward(self, predicts, batch): - targets = batch[1].astype("int64") - label_lengths = batch[2].astype('int64') - batch_size, num_steps, num_classes = predicts.shape[0], predicts.shape[ - 1], predicts.shape[2] - assert len(targets.shape) == len(list(predicts.shape)) - 1, \ - "The target's shape and inputs's shape is [N, d] and [N, num_steps]" - - inputs = paddle.reshape(predicts, [-1, predicts.shape[-1]]) - targets = paddle.reshape(targets, [-1]) - - return {'loss': paddle.sum(self.loss_func(inputs, targets))} diff --git a/backend/ppocr/losses/rec_ctc_loss.py b/backend/ppocr/losses/rec_ctc_loss.py deleted file mode 100755 index 502fc8c..0000000 --- a/backend/ppocr/losses/rec_ctc_loss.py +++ /dev/null @@ -1,45 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class CTCLoss(nn.Layer): - def __init__(self, use_focal_loss=False, **kwargs): - super(CTCLoss, self).__init__() - self.loss_func = nn.CTCLoss(blank=0, reduction='none') - self.use_focal_loss = use_focal_loss - - def forward(self, predicts, batch): - if isinstance(predicts, (list, tuple)): - predicts = predicts[-1] - predicts = predicts.transpose((1, 0, 2)) - N, B, _ = predicts.shape - preds_lengths = paddle.to_tensor( - [N] * B, dtype='int64', place=paddle.CPUPlace()) - labels = batch[1].astype("int32") - label_lengths = batch[2].astype('int64') - loss = self.loss_func(predicts, labels, preds_lengths, label_lengths) - if self.use_focal_loss: - weight = paddle.exp(-loss) - weight = paddle.subtract(paddle.to_tensor([1.0]), weight) - weight = paddle.square(weight) - loss = paddle.multiply(loss, weight) - loss = loss.mean() - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_enhanced_ctc_loss.py b/backend/ppocr/losses/rec_enhanced_ctc_loss.py deleted file mode 100644 index b57be64..0000000 --- a/backend/ppocr/losses/rec_enhanced_ctc_loss.py +++ /dev/null @@ -1,70 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -from .ace_loss import ACELoss -from .center_loss import CenterLoss -from .rec_ctc_loss import CTCLoss - - -class EnhancedCTCLoss(nn.Layer): - def __init__(self, - use_focal_loss=False, - use_ace_loss=False, - ace_loss_weight=0.1, - use_center_loss=False, - center_loss_weight=0.05, - num_classes=6625, - feat_dim=96, - init_center=False, - center_file_path=None, - **kwargs): - super(EnhancedCTCLoss, self).__init__() - self.ctc_loss_func = CTCLoss(use_focal_loss=use_focal_loss) - - self.use_ace_loss = False - if use_ace_loss: - self.use_ace_loss = use_ace_loss - self.ace_loss_func = ACELoss() - self.ace_loss_weight = ace_loss_weight - - self.use_center_loss = False - if use_center_loss: - self.use_center_loss = use_center_loss - self.center_loss_func = CenterLoss( - num_classes=num_classes, - feat_dim=feat_dim, - init_center=init_center, - center_file_path=center_file_path) - self.center_loss_weight = center_loss_weight - - def __call__(self, predicts, batch): - loss = self.ctc_loss_func(predicts, batch)["loss"] - - if self.use_center_loss: - center_loss = self.center_loss_func( - predicts, batch)["loss_center"] * self.center_loss_weight - loss = loss + center_loss - - if self.use_ace_loss: - ace_loss = self.ace_loss_func( - predicts, batch)["loss_ace"] * self.ace_loss_weight - loss = loss + ace_loss - - return {'enhanced_ctc_loss': loss} diff --git a/backend/ppocr/losses/rec_multi_loss.py b/backend/ppocr/losses/rec_multi_loss.py deleted file mode 100644 index 09f007a..0000000 --- a/backend/ppocr/losses/rec_multi_loss.py +++ /dev/null @@ -1,58 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - -from .rec_ctc_loss import CTCLoss -from .rec_sar_loss import SARLoss - - -class MultiLoss(nn.Layer): - def __init__(self, **kwargs): - super().__init__() - self.loss_funcs = {} - self.loss_list = kwargs.pop('loss_config_list') - self.weight_1 = kwargs.get('weight_1', 1.0) - self.weight_2 = kwargs.get('weight_2', 1.0) - self.gtc_loss = kwargs.get('gtc_loss', 'sar') - for loss_info in self.loss_list: - for name, param in loss_info.items(): - if param is not None: - kwargs.update(param) - loss = eval(name)(**kwargs) - self.loss_funcs[name] = loss - - def forward(self, predicts, batch): - self.total_loss = {} - total_loss = 0.0 - # batch [image, label_ctc, label_sar, length, valid_ratio] - for name, loss_func in self.loss_funcs.items(): - if name == 'CTCLoss': - loss = loss_func(predicts['ctc'], - batch[:2] + batch[3:])['loss'] * self.weight_1 - elif name == 'SARLoss': - loss = loss_func(predicts['sar'], - batch[:1] + batch[2:])['loss'] * self.weight_2 - else: - raise NotImplementedError( - '{} is not supported in MultiLoss yet'.format(name)) - self.total_loss[name] = loss - total_loss += loss - self.total_loss['loss'] = total_loss - return self.total_loss diff --git a/backend/ppocr/losses/rec_nrtr_loss.py b/backend/ppocr/losses/rec_nrtr_loss.py deleted file mode 100644 index 200a6d0..0000000 --- a/backend/ppocr/losses/rec_nrtr_loss.py +++ /dev/null @@ -1,30 +0,0 @@ -import paddle -from paddle import nn -import paddle.nn.functional as F - - -class NRTRLoss(nn.Layer): - def __init__(self, smoothing=True, **kwargs): - super(NRTRLoss, self).__init__() - self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0) - self.smoothing = smoothing - - def forward(self, pred, batch): - pred = pred.reshape([-1, pred.shape[2]]) - max_len = batch[2].max() - tgt = batch[1][:, 1:2 + max_len] - tgt = tgt.reshape([-1]) - if self.smoothing: - eps = 0.1 - n_class = pred.shape[1] - one_hot = F.one_hot(tgt, pred.shape[1]) - one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) - log_prb = F.log_softmax(pred, axis=1) - non_pad_mask = paddle.not_equal( - tgt, paddle.zeros( - tgt.shape, dtype=tgt.dtype)) - loss = -(one_hot * log_prb).sum(axis=1) - loss = loss.masked_select(non_pad_mask).mean() - else: - loss = self.loss_func(pred, tgt) - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_pren_loss.py b/backend/ppocr/losses/rec_pren_loss.py deleted file mode 100644 index 7bc53d2..0000000 --- a/backend/ppocr/losses/rec_pren_loss.py +++ /dev/null @@ -1,30 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn - - -class PRENLoss(nn.Layer): - def __init__(self, **kwargs): - super(PRENLoss, self).__init__() - # note: 0 is padding idx - self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0) - - def forward(self, predicts, batch): - loss = self.loss_func(predicts, batch[1].astype('int64')) - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_sar_loss.py b/backend/ppocr/losses/rec_sar_loss.py deleted file mode 100644 index a4f83f0..0000000 --- a/backend/ppocr/losses/rec_sar_loss.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class SARLoss(nn.Layer): - def __init__(self, **kwargs): - super(SARLoss, self).__init__() - ignore_index = kwargs.get('ignore_index', 92) # 6626 - self.loss_func = paddle.nn.loss.CrossEntropyLoss( - reduction="mean", ignore_index=ignore_index) - - def forward(self, predicts, batch): - predict = predicts[:, : - -1, :] # ignore last index of outputs to be in same seq_len with targets - label = batch[1].astype( - "int64")[:, 1:] # ignore first index of target in loss calculation - batch_size, num_steps, num_classes = predict.shape[0], predict.shape[ - 1], predict.shape[2] - assert len(label.shape) == len(list(predict.shape)) - 1, \ - "The target's shape and inputs's shape is [N, d] and [N, num_steps]" - - inputs = paddle.reshape(predict, [-1, num_classes]) - targets = paddle.reshape(label, [-1]) - loss = self.loss_func(inputs, targets) - return {'loss': loss} diff --git a/backend/ppocr/losses/rec_srn_loss.py b/backend/ppocr/losses/rec_srn_loss.py deleted file mode 100644 index 7d5b65e..0000000 --- a/backend/ppocr/losses/rec_srn_loss.py +++ /dev/null @@ -1,47 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - - -class SRNLoss(nn.Layer): - def __init__(self, **kwargs): - super(SRNLoss, self).__init__() - self.loss_func = paddle.nn.loss.CrossEntropyLoss(reduction="sum") - - def forward(self, predicts, batch): - predict = predicts['predict'] - word_predict = predicts['word_out'] - gsrm_predict = predicts['gsrm_out'] - label = batch[1] - - casted_label = paddle.cast(x=label, dtype='int64') - casted_label = paddle.reshape(x=casted_label, shape=[-1, 1]) - - cost_word = self.loss_func(word_predict, label=casted_label) - cost_gsrm = self.loss_func(gsrm_predict, label=casted_label) - cost_vsfd = self.loss_func(predict, label=casted_label) - - cost_word = paddle.reshape(x=paddle.sum(cost_word), shape=[1]) - cost_gsrm = paddle.reshape(x=paddle.sum(cost_gsrm), shape=[1]) - cost_vsfd = paddle.reshape(x=paddle.sum(cost_vsfd), shape=[1]) - - sum_cost = cost_word * 3.0 + cost_vsfd + cost_gsrm * 0.15 - - return {'loss': sum_cost, 'word_loss': cost_word, 'img_loss': cost_vsfd} diff --git a/backend/ppocr/losses/table_att_loss.py b/backend/ppocr/losses/table_att_loss.py deleted file mode 100644 index d7fd99e..0000000 --- a/backend/ppocr/losses/table_att_loss.py +++ /dev/null @@ -1,109 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -from paddle.nn import functional as F -from paddle import fluid - -class TableAttentionLoss(nn.Layer): - def __init__(self, structure_weight, loc_weight, use_giou=False, giou_weight=1.0, **kwargs): - super(TableAttentionLoss, self).__init__() - self.loss_func = nn.CrossEntropyLoss(weight=None, reduction='none') - self.structure_weight = structure_weight - self.loc_weight = loc_weight - self.use_giou = use_giou - self.giou_weight = giou_weight - - def giou_loss(self, preds, bbox, eps=1e-7, reduction='mean'): - ''' - :param preds:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,] - :param bbox:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,] - :return: loss - ''' - ix1 = fluid.layers.elementwise_max(preds[:, 0], bbox[:, 0]) - iy1 = fluid.layers.elementwise_max(preds[:, 1], bbox[:, 1]) - ix2 = fluid.layers.elementwise_min(preds[:, 2], bbox[:, 2]) - iy2 = fluid.layers.elementwise_min(preds[:, 3], bbox[:, 3]) - - iw = fluid.layers.clip(ix2 - ix1 + 1e-3, 0., 1e10) - ih = fluid.layers.clip(iy2 - iy1 + 1e-3, 0., 1e10) - - # overlap - inters = iw * ih - - # union - uni = (preds[:, 2] - preds[:, 0] + 1e-3) * (preds[:, 3] - preds[:, 1] + 1e-3 - ) + (bbox[:, 2] - bbox[:, 0] + 1e-3) * ( - bbox[:, 3] - bbox[:, 1] + 1e-3) - inters + eps - - # ious - ious = inters / uni - - ex1 = fluid.layers.elementwise_min(preds[:, 0], bbox[:, 0]) - ey1 = fluid.layers.elementwise_min(preds[:, 1], bbox[:, 1]) - ex2 = fluid.layers.elementwise_max(preds[:, 2], bbox[:, 2]) - ey2 = fluid.layers.elementwise_max(preds[:, 3], bbox[:, 3]) - ew = fluid.layers.clip(ex2 - ex1 + 1e-3, 0., 1e10) - eh = fluid.layers.clip(ey2 - ey1 + 1e-3, 0., 1e10) - - # enclose erea - enclose = ew * eh + eps - giou = ious - (enclose - uni) / enclose - - loss = 1 - giou - - if reduction == 'mean': - loss = paddle.mean(loss) - elif reduction == 'sum': - loss = paddle.sum(loss) - else: - raise NotImplementedError - return loss - - def forward(self, predicts, batch): - structure_probs = predicts['structure_probs'] - structure_targets = batch[1].astype("int64") - structure_targets = structure_targets[:, 1:] - if len(batch) == 6: - structure_mask = batch[5].astype("int64") - structure_mask = structure_mask[:, 1:] - structure_mask = paddle.reshape(structure_mask, [-1]) - structure_probs = paddle.reshape(structure_probs, [-1, structure_probs.shape[-1]]) - structure_targets = paddle.reshape(structure_targets, [-1]) - structure_loss = self.loss_func(structure_probs, structure_targets) - - if len(batch) == 6: - structure_loss = structure_loss * structure_mask - -# structure_loss = paddle.sum(structure_loss) * self.structure_weight - structure_loss = paddle.mean(structure_loss) * self.structure_weight - - loc_preds = predicts['loc_preds'] - loc_targets = batch[2].astype("float32") - loc_targets_mask = batch[4].astype("float32") - loc_targets = loc_targets[:, 1:, :] - loc_targets_mask = loc_targets_mask[:, 1:, :] - loc_loss = F.mse_loss(loc_preds * loc_targets_mask, loc_targets) * self.loc_weight - if self.use_giou: - loc_loss_giou = self.giou_loss(loc_preds * loc_targets_mask, loc_targets) * self.giou_weight - total_loss = structure_loss + loc_loss + loc_loss_giou - return {'loss':total_loss, "structure_loss":structure_loss, "loc_loss":loc_loss, "loc_loss_giou":loc_loss_giou} - else: - total_loss = structure_loss + loc_loss - return {'loss':total_loss, "structure_loss":structure_loss, "loc_loss":loc_loss} \ No newline at end of file diff --git a/backend/ppocr/losses/vqa_token_layoutlm_loss.py b/backend/ppocr/losses/vqa_token_layoutlm_loss.py deleted file mode 100755 index 244893d..0000000 --- a/backend/ppocr/losses/vqa_token_layoutlm_loss.py +++ /dev/null @@ -1,42 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn - - -class VQASerTokenLayoutLMLoss(nn.Layer): - def __init__(self, num_classes): - super().__init__() - self.loss_class = nn.CrossEntropyLoss() - self.num_classes = num_classes - self.ignore_index = self.loss_class.ignore_index - - def forward(self, predicts, batch): - labels = batch[1] - attention_mask = batch[4] - if attention_mask is not None: - active_loss = attention_mask.reshape([-1, ]) == 1 - active_outputs = predicts.reshape( - [-1, self.num_classes])[active_loss] - active_labels = labels.reshape([-1, ])[active_loss] - loss = self.loss_class(active_outputs, active_labels) - else: - loss = self.loss_class( - predicts.reshape([-1, self.num_classes]), - labels.reshape([-1, ])) - return {'loss': loss} diff --git a/backend/ppocr/metrics/__init__.py b/backend/ppocr/metrics/__init__.py deleted file mode 100644 index c244066..0000000 --- a/backend/ppocr/metrics/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import copy - -__all__ = ["build_metric"] - -from .det_metric import DetMetric, DetFCEMetric -from .rec_metric import RecMetric -from .cls_metric import ClsMetric -from .e2e_metric import E2EMetric -from .distillation_metric import DistillationMetric -from .table_metric import TableMetric -from .kie_metric import KIEMetric -from .vqa_token_ser_metric import VQASerTokenMetric -from .vqa_token_re_metric import VQAReTokenMetric - - -def build_metric(config): - support_dict = [ - "DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric", - "DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric', - 'VQAReTokenMetric' - ] - - config = copy.deepcopy(config) - module_name = config.pop("name") - assert module_name in support_dict, Exception( - "metric only support {}".format(support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/metrics/cls_metric.py b/backend/ppocr/metrics/cls_metric.py deleted file mode 100644 index 6c07751..0000000 --- a/backend/ppocr/metrics/cls_metric.py +++ /dev/null @@ -1,46 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class ClsMetric(object): - def __init__(self, main_indicator='acc', **kwargs): - self.main_indicator = main_indicator - self.eps = 1e-5 - self.reset() - - def __call__(self, pred_label, *args, **kwargs): - preds, labels = pred_label - correct_num = 0 - all_num = 0 - for (pred, pred_conf), (target, _) in zip(preds, labels): - if pred == target: - correct_num += 1 - all_num += 1 - self.correct_num += correct_num - self.all_num += all_num - return {'acc': correct_num / (all_num + self.eps), } - - def get_metric(self): - """ - return metrics { - 'acc': 0 - } - """ - acc = self.correct_num / (self.all_num + self.eps) - self.reset() - return {'acc': acc} - - def reset(self): - self.correct_num = 0 - self.all_num = 0 diff --git a/backend/ppocr/metrics/det_metric.py b/backend/ppocr/metrics/det_metric.py deleted file mode 100644 index dca94c0..0000000 --- a/backend/ppocr/metrics/det_metric.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -__all__ = ['DetMetric', 'DetFCEMetric'] - -from .eval_det_iou import DetectionIoUEvaluator - - -class DetMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.evaluator = DetectionIoUEvaluator() - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - ''' - batch: a list produced by dataloaders. - image: np.ndarray of shape (N, C, H, W). - ratio_list: np.ndarray of shape(N,2) - polygons: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. - ignore_tags: np.ndarray of shape (N, K), indicates whether a region is ignorable or not. - preds: a list of dict produced by post process - points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. - ''' - gt_polyons_batch = batch[2] - ignore_tags_batch = batch[3] - for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch, - ignore_tags_batch): - # prepare gt - gt_info_list = [{ - 'points': gt_polyon, - 'text': '', - 'ignore': ignore_tag - } for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)] - # prepare det - det_info_list = [{ - 'points': det_polyon, - 'text': '' - } for det_polyon in pred['points']] - result = self.evaluator.evaluate_image(gt_info_list, det_info_list) - self.results.append(result) - - def get_metric(self): - """ - return metrics { - 'precision': 0, - 'recall': 0, - 'hmean': 0 - } - """ - - metrics = self.evaluator.combine_results(self.results) - self.reset() - return metrics - - def reset(self): - self.results = [] # clear results - - -class DetFCEMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.evaluator = DetectionIoUEvaluator() - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - ''' - batch: a list produced by dataloaders. - image: np.ndarray of shape (N, C, H, W). - ratio_list: np.ndarray of shape(N,2) - polygons: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. - ignore_tags: np.ndarray of shape (N, K), indicates whether a region is ignorable or not. - preds: a list of dict produced by post process - points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. - ''' - gt_polyons_batch = batch[2] - ignore_tags_batch = batch[3] - - for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch, - ignore_tags_batch): - # prepare gt - gt_info_list = [{ - 'points': gt_polyon, - 'text': '', - 'ignore': ignore_tag - } for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)] - # prepare det - det_info_list = [{ - 'points': det_polyon, - 'text': '', - 'score': score - } for det_polyon, score in zip(pred['points'], pred['scores'])] - - for score_thr in self.results.keys(): - det_info_list_thr = [ - det_info for det_info in det_info_list - if det_info['score'] >= score_thr - ] - result = self.evaluator.evaluate_image(gt_info_list, - det_info_list_thr) - self.results[score_thr].append(result) - - def get_metric(self): - """ - return metrics {'heman':0, - 'thr 0.3':'precision: 0 recall: 0 hmean: 0', - 'thr 0.4':'precision: 0 recall: 0 hmean: 0', - 'thr 0.5':'precision: 0 recall: 0 hmean: 0', - 'thr 0.6':'precision: 0 recall: 0 hmean: 0', - 'thr 0.7':'precision: 0 recall: 0 hmean: 0', - 'thr 0.8':'precision: 0 recall: 0 hmean: 0', - 'thr 0.9':'precision: 0 recall: 0 hmean: 0', - } - """ - metrics = {} - hmean = 0 - for score_thr in self.results.keys(): - metric = self.evaluator.combine_results(self.results[score_thr]) - # for key, value in metric.items(): - # metrics['{}_{}'.format(key, score_thr)] = value - metric_str = 'precision:{:.5f} recall:{:.5f} hmean:{:.5f}'.format( - metric['precision'], metric['recall'], metric['hmean']) - metrics['thr {}'.format(score_thr)] = metric_str - hmean = max(hmean, metric['hmean']) - metrics['hmean'] = hmean - - self.reset() - return metrics - - def reset(self): - self.results = { - 0.3: [], - 0.4: [], - 0.5: [], - 0.6: [], - 0.7: [], - 0.8: [], - 0.9: [] - } # clear results diff --git a/backend/ppocr/metrics/distillation_metric.py b/backend/ppocr/metrics/distillation_metric.py deleted file mode 100644 index c440ceb..0000000 --- a/backend/ppocr/metrics/distillation_metric.py +++ /dev/null @@ -1,73 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import importlib -import copy - -from .rec_metric import RecMetric -from .det_metric import DetMetric -from .e2e_metric import E2EMetric -from .cls_metric import ClsMetric - - -class DistillationMetric(object): - def __init__(self, - key=None, - base_metric_name=None, - main_indicator=None, - **kwargs): - self.main_indicator = main_indicator - self.key = key - self.main_indicator = main_indicator - self.base_metric_name = base_metric_name - self.kwargs = kwargs - self.metrics = None - - def _init_metrcis(self, preds): - self.metrics = dict() - mod = importlib.import_module(__name__) - for key in preds: - self.metrics[key] = getattr(mod, self.base_metric_name)( - main_indicator=self.main_indicator, **self.kwargs) - self.metrics[key].reset() - - def __call__(self, preds, batch, **kwargs): - assert isinstance(preds, dict) - if self.metrics is None: - self._init_metrcis(preds) - output = dict() - for key in preds: - self.metrics[key].__call__(preds[key], batch, **kwargs) - - def get_metric(self): - """ - return metrics { - 'acc': 0, - 'norm_edit_dis': 0, - } - """ - output = dict() - for key in self.metrics: - metric = self.metrics[key].get_metric() - # main indicator - if key == self.key: - output.update(metric) - else: - for sub_key in metric: - output["{}_{}".format(key, sub_key)] = metric[sub_key] - return output - - def reset(self): - for key in self.metrics: - self.metrics[key].reset() diff --git a/backend/ppocr/metrics/e2e_metric.py b/backend/ppocr/metrics/e2e_metric.py deleted file mode 100644 index 2f8ba3b..0000000 --- a/backend/ppocr/metrics/e2e_metric.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -__all__ = ['E2EMetric'] - -from ppocr.utils.e2e_metric.Deteval import get_socre_A, get_socre_B, combine_results -from ppocr.utils.e2e_utils.extract_textpoint_slow import get_dict - - -class E2EMetric(object): - def __init__(self, - mode, - gt_mat_dir, - character_dict_path, - main_indicator='f_score_e2e', - **kwargs): - self.mode = mode - self.gt_mat_dir = gt_mat_dir - self.label_list = get_dict(character_dict_path) - self.max_index = len(self.label_list) - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - if self.mode == 'A': - gt_polyons_batch = batch[2] - temp_gt_strs_batch = batch[3][0] - ignore_tags_batch = batch[4] - gt_strs_batch = [] - - for temp_list in temp_gt_strs_batch: - t = "" - for index in temp_list: - if index < self.max_index: - t += self.label_list[index] - gt_strs_batch.append(t) - - for pred, gt_polyons, gt_strs, ignore_tags in zip( - [preds], gt_polyons_batch, [gt_strs_batch], ignore_tags_batch): - # prepare gt - gt_info_list = [{ - 'points': gt_polyon, - 'text': gt_str, - 'ignore': ignore_tag - } for gt_polyon, gt_str, ignore_tag in - zip(gt_polyons, gt_strs, ignore_tags)] - # prepare det - e2e_info_list = [{ - 'points': det_polyon, - 'texts': pred_str - } for det_polyon, pred_str in - zip(pred['points'], pred['texts'])] - - result = get_socre_A(gt_info_list, e2e_info_list) - self.results.append(result) - else: - img_id = batch[5][0] - e2e_info_list = [{ - 'points': det_polyon, - 'texts': pred_str - } for det_polyon, pred_str in zip(preds['points'], preds['texts'])] - result = get_socre_B(self.gt_mat_dir, img_id, e2e_info_list) - self.results.append(result) - - def get_metric(self): - metrics = combine_results(self.results) - self.reset() - return metrics - - def reset(self): - self.results = [] # clear results diff --git a/backend/ppocr/metrics/eval_det_iou.py b/backend/ppocr/metrics/eval_det_iou.py deleted file mode 100644 index bc05e7d..0000000 --- a/backend/ppocr/metrics/eval_det_iou.py +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from collections import namedtuple -import numpy as np -from shapely.geometry import Polygon -""" -reference from : -https://github.com/MhLiao/DB/blob/3c32b808d4412680310d3d28eeb6a2d5bf1566c5/concern/icdar2015_eval/detection/iou.py#L8 -""" - - -class DetectionIoUEvaluator(object): - def __init__(self, iou_constraint=0.5, area_precision_constraint=0.5): - self.iou_constraint = iou_constraint - self.area_precision_constraint = area_precision_constraint - - def evaluate_image(self, gt, pred): - def get_union(pD, pG): - return Polygon(pD).union(Polygon(pG)).area - - def get_intersection_over_union(pD, pG): - return get_intersection(pD, pG) / get_union(pD, pG) - - def get_intersection(pD, pG): - return Polygon(pD).intersection(Polygon(pG)).area - - def compute_ap(confList, matchList, numGtCare): - correct = 0 - AP = 0 - if len(confList) > 0: - confList = np.array(confList) - matchList = np.array(matchList) - sorted_ind = np.argsort(-confList) - confList = confList[sorted_ind] - matchList = matchList[sorted_ind] - for n in range(len(confList)): - match = matchList[n] - if match: - correct += 1 - AP += float(correct) / (n + 1) - - if numGtCare > 0: - AP /= numGtCare - - return AP - - perSampleMetrics = {} - - matchedSum = 0 - - Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax') - - numGlobalCareGt = 0 - numGlobalCareDet = 0 - - arrGlobalConfidences = [] - arrGlobalMatches = [] - - recall = 0 - precision = 0 - hmean = 0 - - detMatched = 0 - - iouMat = np.empty([1, 1]) - - gtPols = [] - detPols = [] - - gtPolPoints = [] - detPolPoints = [] - - # Array of Ground Truth Polygons' keys marked as don't Care - gtDontCarePolsNum = [] - # Array of Detected Polygons' matched with a don't Care GT - detDontCarePolsNum = [] - - pairs = [] - detMatchedNums = [] - - arrSampleConfidences = [] - arrSampleMatch = [] - - evaluationLog = "" - - # print(len(gt)) - for n in range(len(gt)): - points = gt[n]['points'] - # transcription = gt[n]['text'] - dontCare = gt[n]['ignore'] - # points = Polygon(points) - # points = points.buffer(0) - if not Polygon(points).is_valid or not Polygon(points).is_simple: - continue - - gtPol = points - gtPols.append(gtPol) - gtPolPoints.append(points) - if dontCare: - gtDontCarePolsNum.append(len(gtPols) - 1) - - evaluationLog += "GT polygons: " + str(len(gtPols)) + ( - " (" + str(len(gtDontCarePolsNum)) + " don't care)\n" - if len(gtDontCarePolsNum) > 0 else "\n") - - for n in range(len(pred)): - points = pred[n]['points'] - # points = Polygon(points) - # points = points.buffer(0) - if not Polygon(points).is_valid or not Polygon(points).is_simple: - continue - - detPol = points - detPols.append(detPol) - detPolPoints.append(points) - if len(gtDontCarePolsNum) > 0: - for dontCarePol in gtDontCarePolsNum: - dontCarePol = gtPols[dontCarePol] - intersected_area = get_intersection(dontCarePol, detPol) - pdDimensions = Polygon(detPol).area - precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions - if (precision > self.area_precision_constraint): - detDontCarePolsNum.append(len(detPols) - 1) - break - - evaluationLog += "DET polygons: " + str(len(detPols)) + ( - " (" + str(len(detDontCarePolsNum)) + " don't care)\n" - if len(detDontCarePolsNum) > 0 else "\n") - - if len(gtPols) > 0 and len(detPols) > 0: - # Calculate IoU and precision matrixs - outputShape = [len(gtPols), len(detPols)] - iouMat = np.empty(outputShape) - gtRectMat = np.zeros(len(gtPols), np.int8) - detRectMat = np.zeros(len(detPols), np.int8) - for gtNum in range(len(gtPols)): - for detNum in range(len(detPols)): - pG = gtPols[gtNum] - pD = detPols[detNum] - iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG) - - for gtNum in range(len(gtPols)): - for detNum in range(len(detPols)): - if gtRectMat[gtNum] == 0 and detRectMat[ - detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum: - if iouMat[gtNum, detNum] > self.iou_constraint: - gtRectMat[gtNum] = 1 - detRectMat[detNum] = 1 - detMatched += 1 - pairs.append({'gt': gtNum, 'det': detNum}) - detMatchedNums.append(detNum) - evaluationLog += "Match GT #" + \ - str(gtNum) + " with Det #" + str(detNum) + "\n" - - numGtCare = (len(gtPols) - len(gtDontCarePolsNum)) - numDetCare = (len(detPols) - len(detDontCarePolsNum)) - if numGtCare == 0: - recall = float(1) - precision = float(0) if numDetCare > 0 else float(1) - else: - recall = float(detMatched) / numGtCare - precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare - - hmean = 0 if (precision + recall) == 0 else 2.0 * \ - precision * recall / (precision + recall) - - matchedSum += detMatched - numGlobalCareGt += numGtCare - numGlobalCareDet += numDetCare - - perSampleMetrics = { - 'gtCare': numGtCare, - 'detCare': numDetCare, - 'detMatched': detMatched, - } - return perSampleMetrics - - def combine_results(self, results): - numGlobalCareGt = 0 - numGlobalCareDet = 0 - matchedSum = 0 - for result in results: - numGlobalCareGt += result['gtCare'] - numGlobalCareDet += result['detCare'] - matchedSum += result['detMatched'] - - methodRecall = 0 if numGlobalCareGt == 0 else float( - matchedSum) / numGlobalCareGt - methodPrecision = 0 if numGlobalCareDet == 0 else float( - matchedSum) / numGlobalCareDet - methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * \ - methodRecall * methodPrecision / ( - methodRecall + methodPrecision) - # print(methodRecall, methodPrecision, methodHmean) - # sys.exit(-1) - methodMetrics = { - 'precision': methodPrecision, - 'recall': methodRecall, - 'hmean': methodHmean - } - - return methodMetrics - - -if __name__ == '__main__': - evaluator = DetectionIoUEvaluator() - gts = [[{ - 'points': [(0, 0), (1, 0), (1, 1), (0, 1)], - 'text': 1234, - 'ignore': False, - }, { - 'points': [(2, 2), (3, 2), (3, 3), (2, 3)], - 'text': 5678, - 'ignore': False, - }]] - preds = [[{ - 'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)], - 'text': 123, - 'ignore': False, - }]] - results = [] - for gt, pred in zip(gts, preds): - results.append(evaluator.evaluate_image(gt, pred)) - metrics = evaluator.combine_results(results) - print(metrics) diff --git a/backend/ppocr/metrics/kie_metric.py b/backend/ppocr/metrics/kie_metric.py deleted file mode 100644 index 28ab22b..0000000 --- a/backend/ppocr/metrics/kie_metric.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# The code is refer from: https://github.com/open-mmlab/mmocr/blob/main/mmocr/core/evaluation/kie_metric.py - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import paddle - -__all__ = ['KIEMetric'] - - -class KIEMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.main_indicator = main_indicator - self.reset() - self.node = [] - self.gt = [] - - def __call__(self, preds, batch, **kwargs): - nodes, _ = preds - gts, tag = batch[4].squeeze(0), batch[5].tolist()[0] - gts = gts[:tag[0], :1].reshape([-1]) - self.node.append(nodes.numpy()) - self.gt.append(gts) - # result = self.compute_f1_score(nodes, gts) - # self.results.append(result) - - def compute_f1_score(self, preds, gts): - ignores = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25] - C = preds.shape[1] - classes = np.array(sorted(set(range(C)) - set(ignores))) - hist = np.bincount( - (gts * C).astype('int64') + preds.argmax(1), minlength=C - **2).reshape([C, C]).astype('float32') - diag = np.diag(hist) - recalls = diag / hist.sum(1).clip(min=1) - precisions = diag / hist.sum(0).clip(min=1) - f1 = 2 * recalls * precisions / (recalls + precisions).clip(min=1e-8) - return f1[classes] - - def combine_results(self, results): - node = np.concatenate(self.node, 0) - gts = np.concatenate(self.gt, 0) - results = self.compute_f1_score(node, gts) - data = {'hmean': results.mean()} - return data - - def get_metric(self): - - metrics = self.combine_results(self.results) - self.reset() - return metrics - - def reset(self): - self.results = [] # clear results - self.node = [] - self.gt = [] diff --git a/backend/ppocr/metrics/rec_metric.py b/backend/ppocr/metrics/rec_metric.py deleted file mode 100644 index 515b937..0000000 --- a/backend/ppocr/metrics/rec_metric.py +++ /dev/null @@ -1,76 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import Levenshtein -import string - - -class RecMetric(object): - def __init__(self, - main_indicator='acc', - is_filter=False, - ignore_space=True, - **kwargs): - self.main_indicator = main_indicator - self.is_filter = is_filter - self.ignore_space = ignore_space - self.eps = 1e-5 - self.reset() - - def _normalize_text(self, text): - text = ''.join( - filter(lambda x: x in (string.digits + string.ascii_letters), text)) - return text.lower() - - def __call__(self, pred_label, *args, **kwargs): - preds, labels = pred_label - correct_num = 0 - all_num = 0 - norm_edit_dis = 0.0 - for (pred, pred_conf), (target, _) in zip(preds, labels): - if self.ignore_space: - pred = pred.replace(" ", "") - target = target.replace(" ", "") - if self.is_filter: - pred = self._normalize_text(pred) - target = self._normalize_text(target) - norm_edit_dis += Levenshtein.distance(pred, target) / max( - len(pred), len(target), 1) - if pred == target: - correct_num += 1 - all_num += 1 - self.correct_num += correct_num - self.all_num += all_num - self.norm_edit_dis += norm_edit_dis - return { - 'acc': correct_num / (all_num + self.eps), - 'norm_edit_dis': 1 - norm_edit_dis / (all_num + self.eps) - } - - def get_metric(self): - """ - return metrics { - 'acc': 0, - 'norm_edit_dis': 0, - } - """ - acc = 1.0 * self.correct_num / (self.all_num + self.eps) - norm_edit_dis = 1 - self.norm_edit_dis / (self.all_num + self.eps) - self.reset() - return {'acc': acc, 'norm_edit_dis': norm_edit_dis} - - def reset(self): - self.correct_num = 0 - self.all_num = 0 - self.norm_edit_dis = 0 diff --git a/backend/ppocr/metrics/table_metric.py b/backend/ppocr/metrics/table_metric.py deleted file mode 100644 index ca4d647..0000000 --- a/backend/ppocr/metrics/table_metric.py +++ /dev/null @@ -1,51 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np - - -class TableMetric(object): - def __init__(self, main_indicator='acc', **kwargs): - self.main_indicator = main_indicator - self.eps = 1e-5 - self.reset() - - def __call__(self, pred, batch, *args, **kwargs): - structure_probs = pred['structure_probs'].numpy() - structure_labels = batch[1] - correct_num = 0 - all_num = 0 - structure_probs = np.argmax(structure_probs, axis=2) - structure_labels = structure_labels[:, 1:] - batch_size = structure_probs.shape[0] - for bno in range(batch_size): - all_num += 1 - if (structure_probs[bno] == structure_labels[bno]).all(): - correct_num += 1 - self.correct_num += correct_num - self.all_num += all_num - return {'acc': correct_num * 1.0 / (all_num + self.eps), } - - def get_metric(self): - """ - return metrics { - 'acc': 0, - } - """ - acc = 1.0 * self.correct_num / (self.all_num + self.eps) - self.reset() - return {'acc': acc} - - def reset(self): - self.correct_num = 0 - self.all_num = 0 diff --git a/backend/ppocr/metrics/vqa_token_re_metric.py b/backend/ppocr/metrics/vqa_token_re_metric.py deleted file mode 100644 index 8a13bc0..0000000 --- a/backend/ppocr/metrics/vqa_token_re_metric.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import paddle - -__all__ = ['KIEMetric'] - - -class VQAReTokenMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - pred_relations, relations, entities = preds - self.pred_relations_list.extend(pred_relations) - self.relations_list.extend(relations) - self.entities_list.extend(entities) - - def get_metric(self): - gt_relations = [] - for b in range(len(self.relations_list)): - rel_sent = [] - for head, tail in zip(self.relations_list[b]["head"], - self.relations_list[b]["tail"]): - rel = {} - rel["head_id"] = head - rel["head"] = (self.entities_list[b]["start"][rel["head_id"]], - self.entities_list[b]["end"][rel["head_id"]]) - rel["head_type"] = self.entities_list[b]["label"][rel[ - "head_id"]] - - rel["tail_id"] = tail - rel["tail"] = (self.entities_list[b]["start"][rel["tail_id"]], - self.entities_list[b]["end"][rel["tail_id"]]) - rel["tail_type"] = self.entities_list[b]["label"][rel[ - "tail_id"]] - - rel["type"] = 1 - rel_sent.append(rel) - gt_relations.append(rel_sent) - re_metrics = self.re_score( - self.pred_relations_list, gt_relations, mode="boundaries") - metrics = { - "precision": re_metrics["ALL"]["p"], - "recall": re_metrics["ALL"]["r"], - "hmean": re_metrics["ALL"]["f1"], - } - self.reset() - return metrics - - def reset(self): - self.pred_relations_list = [] - self.relations_list = [] - self.entities_list = [] - - def re_score(self, pred_relations, gt_relations, mode="strict"): - """Evaluate RE predictions - - Args: - pred_relations (list) : list of list of predicted relations (several relations in each sentence) - gt_relations (list) : list of list of ground truth relations - - rel = { "head": (start_idx (inclusive), end_idx (exclusive)), - "tail": (start_idx (inclusive), end_idx (exclusive)), - "head_type": ent_type, - "tail_type": ent_type, - "type": rel_type} - - vocab (Vocab) : dataset vocabulary - mode (str) : in 'strict' or 'boundaries'""" - - assert mode in ["strict", "boundaries"] - - relation_types = [v for v in [0, 1] if not v == 0] - scores = { - rel: { - "tp": 0, - "fp": 0, - "fn": 0 - } - for rel in relation_types + ["ALL"] - } - - # Count GT relations and Predicted relations - n_sents = len(gt_relations) - n_rels = sum([len([rel for rel in sent]) for sent in gt_relations]) - n_found = sum([len([rel for rel in sent]) for sent in pred_relations]) - - # Count TP, FP and FN per type - for pred_sent, gt_sent in zip(pred_relations, gt_relations): - for rel_type in relation_types: - # strict mode takes argument types into account - if mode == "strict": - pred_rels = {(rel["head"], rel["head_type"], rel["tail"], - rel["tail_type"]) - for rel in pred_sent - if rel["type"] == rel_type} - gt_rels = {(rel["head"], rel["head_type"], rel["tail"], - rel["tail_type"]) - for rel in gt_sent if rel["type"] == rel_type} - - # boundaries mode only takes argument spans into account - elif mode == "boundaries": - pred_rels = {(rel["head"], rel["tail"]) - for rel in pred_sent - if rel["type"] == rel_type} - gt_rels = {(rel["head"], rel["tail"]) - for rel in gt_sent if rel["type"] == rel_type} - - scores[rel_type]["tp"] += len(pred_rels & gt_rels) - scores[rel_type]["fp"] += len(pred_rels - gt_rels) - scores[rel_type]["fn"] += len(gt_rels - pred_rels) - - # Compute per entity Precision / Recall / F1 - for rel_type in scores.keys(): - if scores[rel_type]["tp"]: - scores[rel_type]["p"] = scores[rel_type]["tp"] / ( - scores[rel_type]["fp"] + scores[rel_type]["tp"]) - scores[rel_type]["r"] = scores[rel_type]["tp"] / ( - scores[rel_type]["fn"] + scores[rel_type]["tp"]) - else: - scores[rel_type]["p"], scores[rel_type]["r"] = 0, 0 - - if not scores[rel_type]["p"] + scores[rel_type]["r"] == 0: - scores[rel_type]["f1"] = ( - 2 * scores[rel_type]["p"] * scores[rel_type]["r"] / - (scores[rel_type]["p"] + scores[rel_type]["r"])) - else: - scores[rel_type]["f1"] = 0 - - # Compute micro F1 Scores - tp = sum([scores[rel_type]["tp"] for rel_type in relation_types]) - fp = sum([scores[rel_type]["fp"] for rel_type in relation_types]) - fn = sum([scores[rel_type]["fn"] for rel_type in relation_types]) - - if tp: - precision = tp / (tp + fp) - recall = tp / (tp + fn) - f1 = 2 * precision * recall / (precision + recall) - - else: - precision, recall, f1 = 0, 0, 0 - - scores["ALL"]["p"] = precision - scores["ALL"]["r"] = recall - scores["ALL"]["f1"] = f1 - scores["ALL"]["tp"] = tp - scores["ALL"]["fp"] = fp - scores["ALL"]["fn"] = fn - - # Compute Macro F1 Scores - scores["ALL"]["Macro_f1"] = np.mean( - [scores[ent_type]["f1"] for ent_type in relation_types]) - scores["ALL"]["Macro_p"] = np.mean( - [scores[ent_type]["p"] for ent_type in relation_types]) - scores["ALL"]["Macro_r"] = np.mean( - [scores[ent_type]["r"] for ent_type in relation_types]) - - return scores diff --git a/backend/ppocr/metrics/vqa_token_ser_metric.py b/backend/ppocr/metrics/vqa_token_ser_metric.py deleted file mode 100644 index 286d8ad..0000000 --- a/backend/ppocr/metrics/vqa_token_ser_metric.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import paddle - -__all__ = ['KIEMetric'] - - -class VQASerTokenMetric(object): - def __init__(self, main_indicator='hmean', **kwargs): - self.main_indicator = main_indicator - self.reset() - - def __call__(self, preds, batch, **kwargs): - preds, labels = preds - self.pred_list.extend(preds) - self.gt_list.extend(labels) - - def get_metric(self): - from seqeval.metrics import f1_score, precision_score, recall_score - metrics = { - "precision": precision_score(self.gt_list, self.pred_list), - "recall": recall_score(self.gt_list, self.pred_list), - "hmean": f1_score(self.gt_list, self.pred_list), - } - self.reset() - return metrics - - def reset(self): - self.pred_list = [] - self.gt_list = [] diff --git a/backend/ppocr/modeling/architectures/__init__.py b/backend/ppocr/modeling/architectures/__init__.py deleted file mode 100755 index e9a01cf..0000000 --- a/backend/ppocr/modeling/architectures/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import importlib - -from .base_model import BaseModel -from .distillation_model import DistillationModel - -__all__ = ['build_model'] - - -def build_model(config): - config = copy.deepcopy(config) - if not "name" in config: - arch = BaseModel(config) - else: - name = config.pop("name") - mod = importlib.import_module(__name__) - arch = getattr(mod, name)(config) - return arch diff --git a/backend/ppocr/modeling/architectures/base_model.py b/backend/ppocr/modeling/architectures/base_model.py deleted file mode 100644 index c6b50d4..0000000 --- a/backend/ppocr/modeling/architectures/base_model.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from paddle import nn -from ppocr.modeling.transforms import build_transform -from ppocr.modeling.backbones import build_backbone -from ppocr.modeling.necks import build_neck -from ppocr.modeling.heads import build_head - -__all__ = ['BaseModel'] - - -class BaseModel(nn.Layer): - def __init__(self, config): - """ - the module for OCR. - args: - config (dict): the super parameters for module. - """ - super(BaseModel, self).__init__() - in_channels = config.get('in_channels', 3) - model_type = config['model_type'] - # build transfrom, - # for rec, transfrom can be TPS,None - # for det and cls, transfrom shoule to be None, - # if you make model differently, you can use transfrom in det and cls - if 'Transform' not in config or config['Transform'] is None: - self.use_transform = False - else: - self.use_transform = True - config['Transform']['in_channels'] = in_channels - self.transform = build_transform(config['Transform']) - in_channels = self.transform.out_channels - - # build backbone, backbone is need for del, rec and cls - config["Backbone"]['in_channels'] = in_channels - self.backbone = build_backbone(config["Backbone"], model_type) - in_channels = self.backbone.out_channels - - # build neck - # for rec, neck can be cnn,rnn or reshape(None) - # for det, neck can be FPN, BIFPN and so on. - # for cls, neck should be none - if 'Neck' not in config or config['Neck'] is None: - self.use_neck = False - else: - self.use_neck = True - config['Neck']['in_channels'] = in_channels - self.neck = build_neck(config['Neck']) - in_channels = self.neck.out_channels - - # # build head, head is need for det, rec and cls - if 'Head' not in config or config['Head'] is None: - self.use_head = False - else: - self.use_head = True - config["Head"]['in_channels'] = in_channels - self.head = build_head(config["Head"]) - - self.return_all_feats = config.get("return_all_feats", False) - - def forward(self, x, data=None): - y = dict() - if self.use_transform: - x = self.transform(x) - x = self.backbone(x) - y["backbone_out"] = x - if self.use_neck: - x = self.neck(x) - y["neck_out"] = x - if self.use_head: - x = self.head(x, targets=data) - # for multi head, save ctc neck out for udml - if isinstance(x, dict) and 'ctc_neck' in x.keys(): - y["neck_out"] = x["ctc_neck"] - y["head_out"] = x - elif isinstance(x, dict): - y.update(x) - else: - y["head_out"] = x - if self.return_all_feats: - if self.training: - return y - else: - return {"head_out": y["head_out"]} - else: - return x diff --git a/backend/ppocr/modeling/architectures/distillation_model.py b/backend/ppocr/modeling/architectures/distillation_model.py deleted file mode 100644 index cce8fd3..0000000 --- a/backend/ppocr/modeling/architectures/distillation_model.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn -from ppocr.modeling.transforms import build_transform -from ppocr.modeling.backbones import build_backbone -from ppocr.modeling.necks import build_neck -from ppocr.modeling.heads import build_head -from .base_model import BaseModel -from ppocr.utils.save_load import load_pretrained_params - -__all__ = ['DistillationModel'] - - -class DistillationModel(nn.Layer): - def __init__(self, config): - """ - the module for OCR distillation. - args: - config (dict): the super parameters for module. - """ - super().__init__() - self.model_list = [] - self.model_name_list = [] - for key in config["Models"]: - model_config = config["Models"][key] - freeze_params = False - pretrained = None - if "freeze_params" in model_config: - freeze_params = model_config.pop("freeze_params") - if "pretrained" in model_config: - pretrained = model_config.pop("pretrained") - model = BaseModel(model_config) - if pretrained is not None: - load_pretrained_params(model, pretrained) - if freeze_params: - for param in model.parameters(): - param.trainable = False - self.model_list.append(self.add_sublayer(key, model)) - self.model_name_list.append(key) - - def forward(self, x, data=None): - result_dict = dict() - for idx, model_name in enumerate(self.model_name_list): - result_dict[model_name] = self.model_list[idx](x, data) - return result_dict diff --git a/backend/ppocr/modeling/backbones/__init__.py b/backend/ppocr/modeling/backbones/__init__.py deleted file mode 100755 index 072d6e0..0000000 --- a/backend/ppocr/modeling/backbones/__init__.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ["build_backbone"] - - -def build_backbone(config, model_type): - if model_type == "det" or model_type == "table": - from .det_mobilenet_v3 import MobileNetV3 - from .det_resnet_vd import ResNet - from .det_resnet_vd_sast import ResNet_SAST - support_dict = ["MobileNetV3", "ResNet", "ResNet_SAST"] - elif model_type == "rec" or model_type == "cls": - from .rec_mobilenet_v3 import MobileNetV3 - from .rec_resnet_vd import ResNet - from .rec_resnet_fpn import ResNetFPN - from .rec_mv1_enhance import MobileNetV1Enhance - from .rec_nrtr_mtb import MTB - from .rec_resnet_31 import ResNet31 - from .rec_resnet_aster import ResNet_ASTER - from .rec_micronet import MicroNet - from .rec_efficientb3_pren import EfficientNetb3_PREN - from .rec_svtrnet import SVTRNet - support_dict = [ - 'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB', - "ResNet31", "ResNet_ASTER", 'MicroNet', 'EfficientNetb3_PREN', - 'SVTRNet' - ] - elif model_type == "e2e": - from .e2e_resnet_vd_pg import ResNet - support_dict = ['ResNet'] - elif model_type == 'kie': - from .kie_unet_sdmgr import Kie_backbone - support_dict = ['Kie_backbone'] - elif model_type == "table": - from .table_resnet_vd import ResNet - from .table_mobilenet_v3 import MobileNetV3 - support_dict = ["ResNet", "MobileNetV3"] - elif model_type == 'vqa': - from .vqa_layoutlm import LayoutLMForSer, LayoutLMv2ForSer, LayoutLMv2ForRe, LayoutXLMForSer, LayoutXLMForRe - support_dict = [ - "LayoutLMForSer", "LayoutLMv2ForSer", 'LayoutLMv2ForRe', - "LayoutXLMForSer", 'LayoutXLMForRe' - ] - else: - raise NotImplementedError - - module_name = config.pop("name") - assert module_name in support_dict, Exception( - "when model typs is {}, backbone only support {}".format(model_type, - support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/modeling/backbones/det_mobilenet_v3.py b/backend/ppocr/modeling/backbones/det_mobilenet_v3.py deleted file mode 100755 index 05113ea..0000000 --- a/backend/ppocr/modeling/backbones/det_mobilenet_v3.py +++ /dev/null @@ -1,268 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - -__all__ = ['MobileNetV3'] - - -def make_divisible(v, divisor=8, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -class MobileNetV3(nn.Layer): - def __init__(self, - in_channels=3, - model_name='large', - scale=0.5, - disable_se=False, - **kwargs): - """ - the MobilenetV3 backbone network for detection module. - Args: - params(dict): the super parameters for build network - """ - super(MobileNetV3, self).__init__() - - self.disable_se = disable_se - - if model_name == "large": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, 'relu', 1], - [3, 64, 24, False, 'relu', 2], - [3, 72, 24, False, 'relu', 1], - [5, 72, 40, True, 'relu', 2], - [5, 120, 40, True, 'relu', 1], - [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hardswish', 2], - [3, 200, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 480, 112, True, 'hardswish', 1], - [3, 672, 112, True, 'hardswish', 1], - [5, 672, 160, True, 'hardswish', 2], - [5, 960, 160, True, 'hardswish', 1], - [5, 960, 160, True, 'hardswish', 1], - ] - cls_ch_squeeze = 960 - elif model_name == "small": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, 'relu', 2], - [3, 72, 24, False, 'relu', 2], - [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hardswish', 2], - [5, 240, 40, True, 'hardswish', 1], - [5, 240, 40, True, 'hardswish', 1], - [5, 120, 48, True, 'hardswish', 1], - [5, 144, 48, True, 'hardswish', 1], - [5, 288, 96, True, 'hardswish', 2], - [5, 576, 96, True, 'hardswish', 1], - [5, 576, 96, True, 'hardswish', 1], - ] - cls_ch_squeeze = 576 - else: - raise NotImplementedError("mode[" + model_name + - "_model] is not implemented!") - - supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] - assert scale in supported_scale, \ - "supported scale are {} but input scale is {}".format(supported_scale, scale) - inplanes = 16 - # conv1 - self.conv = ConvBNLayer( - in_channels=in_channels, - out_channels=make_divisible(inplanes * scale), - kernel_size=3, - stride=2, - padding=1, - groups=1, - if_act=True, - act='hardswish') - - self.stages = [] - self.out_channels = [] - block_list = [] - i = 0 - inplanes = make_divisible(inplanes * scale) - for (k, exp, c, se, nl, s) in cfg: - se = se and not self.disable_se - start_idx = 2 if model_name == 'large' else 0 - if s == 2 and i > start_idx: - self.out_channels.append(inplanes) - self.stages.append(nn.Sequential(*block_list)) - block_list = [] - block_list.append( - ResidualUnit( - in_channels=inplanes, - mid_channels=make_divisible(scale * exp), - out_channels=make_divisible(scale * c), - kernel_size=k, - stride=s, - use_se=se, - act=nl)) - inplanes = make_divisible(scale * c) - i += 1 - block_list.append( - ConvBNLayer( - in_channels=inplanes, - out_channels=make_divisible(scale * cls_ch_squeeze), - kernel_size=1, - stride=1, - padding=0, - groups=1, - if_act=True, - act='hardswish')) - self.stages.append(nn.Sequential(*block_list)) - self.out_channels.append(make_divisible(scale * cls_ch_squeeze)) - for i, stage in enumerate(self.stages): - self.add_sublayer(sublayer=stage, name="stage{}".format(i)) - - def forward(self, x): - x = self.conv(x) - out_list = [] - for stage in self.stages: - x = stage(x) - out_list.append(x) - return out_list - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - bias_attr=False) - - self.bn = nn.BatchNorm(num_channels=out_channels, act=None) - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - if self.if_act: - if self.act == "relu": - x = F.relu(x) - elif self.act == "hardswish": - x = F.hardswish(x) - else: - print("The activation function({}) is selected incorrectly.". - format(self.act)) - exit() - return x - - -class ResidualUnit(nn.Layer): - def __init__(self, - in_channels, - mid_channels, - out_channels, - kernel_size, - stride, - use_se, - act=None): - super(ResidualUnit, self).__init__() - self.if_shortcut = stride == 1 and in_channels == out_channels - self.if_se = use_se - - self.expand_conv = ConvBNLayer( - in_channels=in_channels, - out_channels=mid_channels, - kernel_size=1, - stride=1, - padding=0, - if_act=True, - act=act) - self.bottleneck_conv = ConvBNLayer( - in_channels=mid_channels, - out_channels=mid_channels, - kernel_size=kernel_size, - stride=stride, - padding=int((kernel_size - 1) // 2), - groups=mid_channels, - if_act=True, - act=act) - if self.if_se: - self.mid_se = SEModule(mid_channels) - self.linear_conv = ConvBNLayer( - in_channels=mid_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - padding=0, - if_act=False, - act=None) - - def forward(self, inputs): - x = self.expand_conv(inputs) - x = self.bottleneck_conv(x) - if self.if_se: - x = self.mid_se(x) - x = self.linear_conv(x) - if self.if_shortcut: - x = paddle.add(inputs, x) - return x - - -class SEModule(nn.Layer): - def __init__(self, in_channels, reduction=4): - super(SEModule, self).__init__() - self.avg_pool = nn.AdaptiveAvgPool2D(1) - self.conv1 = nn.Conv2D( - in_channels=in_channels, - out_channels=in_channels // reduction, - kernel_size=1, - stride=1, - padding=0) - self.conv2 = nn.Conv2D( - in_channels=in_channels // reduction, - out_channels=in_channels, - kernel_size=1, - stride=1, - padding=0) - - def forward(self, inputs): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = F.relu(outputs) - outputs = self.conv2(outputs) - outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5) - return inputs * outputs diff --git a/backend/ppocr/modeling/backbones/det_resnet_vd.py b/backend/ppocr/modeling/backbones/det_resnet_vd.py deleted file mode 100644 index 8c955a4..0000000 --- a/backend/ppocr/modeling/backbones/det_resnet_vd.py +++ /dev/null @@ -1,351 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -from paddle.vision.ops import DeformConv2D -from paddle.regularizer import L2Decay -from paddle.nn.initializer import Normal, Constant, XavierUniform - -__all__ = ["ResNet"] - - -class DeformableConvV2(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - weight_attr=None, - bias_attr=None, - lr_scale=1, - regularizer=None, - skip_quant=False, - dcn_bias_regularizer=L2Decay(0.), - dcn_bias_lr_scale=2.): - super(DeformableConvV2, self).__init__() - self.offset_channel = 2 * kernel_size**2 * groups - self.mask_channel = kernel_size**2 * groups - - if bias_attr: - # in FCOS-DCN head, specifically need learning_rate and regularizer - dcn_bias_attr = ParamAttr( - initializer=Constant(value=0), - regularizer=dcn_bias_regularizer, - learning_rate=dcn_bias_lr_scale) - else: - # in ResNet backbone, do not need bias - dcn_bias_attr = False - self.conv_dcn = DeformConv2D( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2 * dilation, - dilation=dilation, - deformable_groups=groups, - weight_attr=weight_attr, - bias_attr=dcn_bias_attr) - - if lr_scale == 1 and regularizer is None: - offset_bias_attr = ParamAttr(initializer=Constant(0.)) - else: - offset_bias_attr = ParamAttr( - initializer=Constant(0.), - learning_rate=lr_scale, - regularizer=regularizer) - self.conv_offset = nn.Conv2D( - in_channels, - groups * 3 * kernel_size**2, - kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - weight_attr=ParamAttr(initializer=Constant(0.0)), - bias_attr=offset_bias_attr) - if skip_quant: - self.conv_offset.skip_quant = True - - def forward(self, x): - offset_mask = self.conv_offset(x) - offset, mask = paddle.split( - offset_mask, - num_or_sections=[self.offset_channel, self.mask_channel], - axis=1) - mask = F.sigmoid(mask) - y = self.conv_dcn(x, offset, mask=mask) - return y - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - is_dcn=False): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - if not is_dcn: - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - bias_attr=False) - else: - self._conv = DeformableConvV2( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=2, #groups, - bias_attr=False) - self._batch_norm = nn.BatchNorm(out_channels, act=act) - - def forward(self, inputs): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - is_dcn=False, ): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu') - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - is_dcn=is_dcn) - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None) - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True) - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, ): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu') - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None) - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True) - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet(nn.Layer): - def __init__(self, - in_channels=3, - layers=50, - dcn_stage=None, - out_indices=None, - **kwargs): - super(ResNet, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, - 1024] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.dcn_stage = dcn_stage if dcn_stage is not None else [ - False, False, False, False - ] - self.out_indices = out_indices if out_indices is not None else [ - 0, 1, 2, 3 - ] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=32, - kernel_size=3, - stride=2, - act='relu') - self.conv1_2 = ConvBNLayer( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - act='relu') - self.conv1_3 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=1, - act='relu') - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.stages = [] - self.out_channels = [] - if layers >= 50: - for block in range(len(depth)): - block_list = [] - shortcut = False - is_dcn = self.dcn_stage[block] - for i in range(depth[block]): - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - is_dcn=is_dcn)) - shortcut = True - block_list.append(bottleneck_block) - if block in self.out_indices: - self.out_channels.append(num_filters[block] * 4) - self.stages.append(nn.Sequential(*block_list)) - else: - for block in range(len(depth)): - block_list = [] - shortcut = False - # is_dcn = self.dcn_stage[block] - for i in range(depth[block]): - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0)) - shortcut = True - block_list.append(basic_block) - if block in self.out_indices: - self.out_channels.append(num_filters[block]) - self.stages.append(nn.Sequential(*block_list)) - - def forward(self, inputs): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - out = [] - for i, block in enumerate(self.stages): - y = block(y) - if i in self.out_indices: - out.append(y) - return out diff --git a/backend/ppocr/modeling/backbones/det_resnet_vd_sast.py b/backend/ppocr/modeling/backbones/det_resnet_vd_sast.py deleted file mode 100644 index c9376a8..0000000 --- a/backend/ppocr/modeling/backbones/det_resnet_vd_sast.py +++ /dev/null @@ -1,285 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ["ResNet_SAST"] - - -class ConvBNLayer(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet_SAST(nn.Layer): - def __init__(self, in_channels=3, layers=50, **kwargs): - super(ResNet_SAST, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - # depth = [3, 4, 6, 3] - depth = [3, 4, 6, 3, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - # num_channels = [64, 256, 512, - # 1024] if layers >= 50 else [64, 64, 128, 256] - # num_filters = [64, 128, 256, 512] - num_channels = [64, 256, 512, - 1024, 2048] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512, 512] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=32, - kernel_size=3, - stride=2, - act='relu', - name="conv1_1") - self.conv1_2 = ConvBNLayer( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - act='relu', - name="conv1_2") - self.conv1_3 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=1, - act='relu', - name="conv1_3") - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.stages = [] - self.out_channels = [3, 64] - if layers >= 50: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(bottleneck_block) - self.out_channels.append(num_filters[block] * 4) - self.stages.append(nn.Sequential(*block_list)) - else: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(basic_block) - self.out_channels.append(num_filters[block]) - self.stages.append(nn.Sequential(*block_list)) - - def forward(self, inputs): - out = [inputs] - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - out.append(y) - y = self.pool2d_max(y) - for block in self.stages: - y = block(y) - out.append(y) - return out \ No newline at end of file diff --git a/backend/ppocr/modeling/backbones/e2e_resnet_vd_pg.py b/backend/ppocr/modeling/backbones/e2e_resnet_vd_pg.py deleted file mode 100644 index 97afd34..0000000 --- a/backend/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +++ /dev/null @@ -1,265 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ["ResNet"] - - -class ConvBNLayer(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=stride, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet(nn.Layer): - def __init__(self, in_channels=3, layers=50, **kwargs): - super(ResNet, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - # depth = [3, 4, 6, 3] - depth = [3, 4, 6, 3, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, 1024, - 2048] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512, 512] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=7, - stride=2, - act='relu', - name="conv1_1") - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.stages = [] - self.out_channels = [3, 64] - # num_filters = [64, 128, 256, 512, 512] - if layers >= 50: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(bottleneck_block) - self.out_channels.append(num_filters[block] * 4) - self.stages.append(nn.Sequential(*block_list)) - else: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(basic_block) - self.out_channels.append(num_filters[block]) - self.stages.append(nn.Sequential(*block_list)) - - def forward(self, inputs): - out = [inputs] - y = self.conv1_1(inputs) - out.append(y) - y = self.pool2d_max(y) - for block in self.stages: - y = block(y) - out.append(y) - return out diff --git a/backend/ppocr/modeling/backbones/kie_unet_sdmgr.py b/backend/ppocr/modeling/backbones/kie_unet_sdmgr.py deleted file mode 100644 index 545e4e7..0000000 --- a/backend/ppocr/modeling/backbones/kie_unet_sdmgr.py +++ /dev/null @@ -1,186 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import numpy as np -import cv2 - -__all__ = ["Kie_backbone"] - - -class Encoder(nn.Layer): - def __init__(self, num_channels, num_filters): - super(Encoder, self).__init__() - self.conv1 = nn.Conv2D( - num_channels, - num_filters, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False) - self.bn1 = nn.BatchNorm(num_filters, act='relu') - - self.conv2 = nn.Conv2D( - num_filters, - num_filters, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False) - self.bn2 = nn.BatchNorm(num_filters, act='relu') - - self.pool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - def forward(self, inputs): - x = self.conv1(inputs) - x = self.bn1(x) - x = self.conv2(x) - x = self.bn2(x) - x_pooled = self.pool(x) - return x, x_pooled - - -class Decoder(nn.Layer): - def __init__(self, num_channels, num_filters): - super(Decoder, self).__init__() - - self.conv1 = nn.Conv2D( - num_channels, - num_filters, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False) - self.bn1 = nn.BatchNorm(num_filters, act='relu') - - self.conv2 = nn.Conv2D( - num_filters, - num_filters, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False) - self.bn2 = nn.BatchNorm(num_filters, act='relu') - - self.conv0 = nn.Conv2D( - num_channels, - num_filters, - kernel_size=1, - stride=1, - padding=0, - bias_attr=False) - self.bn0 = nn.BatchNorm(num_filters, act='relu') - - def forward(self, inputs_prev, inputs): - x = self.conv0(inputs) - x = self.bn0(x) - x = paddle.nn.functional.interpolate( - x, scale_factor=2, mode='bilinear', align_corners=False) - x = paddle.concat([inputs_prev, x], axis=1) - x = self.conv1(x) - x = self.bn1(x) - x = self.conv2(x) - x = self.bn2(x) - return x - - -class UNet(nn.Layer): - def __init__(self): - super(UNet, self).__init__() - self.down1 = Encoder(num_channels=3, num_filters=16) - self.down2 = Encoder(num_channels=16, num_filters=32) - self.down3 = Encoder(num_channels=32, num_filters=64) - self.down4 = Encoder(num_channels=64, num_filters=128) - self.down5 = Encoder(num_channels=128, num_filters=256) - - self.up1 = Decoder(32, 16) - self.up2 = Decoder(64, 32) - self.up3 = Decoder(128, 64) - self.up4 = Decoder(256, 128) - self.out_channels = 16 - - def forward(self, inputs): - x1, _ = self.down1(inputs) - _, x2 = self.down2(x1) - _, x3 = self.down3(x2) - _, x4 = self.down4(x3) - _, x5 = self.down5(x4) - - x = self.up4(x4, x5) - x = self.up3(x3, x) - x = self.up2(x2, x) - x = self.up1(x1, x) - return x - - -class Kie_backbone(nn.Layer): - def __init__(self, in_channels, **kwargs): - super(Kie_backbone, self).__init__() - self.out_channels = 16 - self.img_feat = UNet() - self.maxpool = nn.MaxPool2D(kernel_size=7) - - def bbox2roi(self, bbox_list): - rois_list = [] - rois_num = [] - for img_id, bboxes in enumerate(bbox_list): - rois_num.append(bboxes.shape[0]) - rois_list.append(bboxes) - rois = paddle.concat(rois_list, 0) - rois_num = paddle.to_tensor(rois_num, dtype='int32') - return rois, rois_num - - def pre_process(self, img, relations, texts, gt_bboxes, tag, img_size): - img, relations, texts, gt_bboxes, tag, img_size = img.numpy( - ), relations.numpy(), texts.numpy(), gt_bboxes.numpy(), tag.numpy( - ).tolist(), img_size.numpy() - temp_relations, temp_texts, temp_gt_bboxes = [], [], [] - h, w = int(np.max(img_size[:, 0])), int(np.max(img_size[:, 1])) - img = paddle.to_tensor(img[:, :, :h, :w]) - batch = len(tag) - for i in range(batch): - num, recoder_len = tag[i][0], tag[i][1] - temp_relations.append( - paddle.to_tensor( - relations[i, :num, :num, :], dtype='float32')) - temp_texts.append( - paddle.to_tensor( - texts[i, :num, :recoder_len], dtype='float32')) - temp_gt_bboxes.append( - paddle.to_tensor( - gt_bboxes[i, :num, ...], dtype='float32')) - return img, temp_relations, temp_texts, temp_gt_bboxes - - def forward(self, inputs): - img = inputs[0] - relations, texts, gt_bboxes, tag, img_size = inputs[1], inputs[ - 2], inputs[3], inputs[5], inputs[-1] - img, relations, texts, gt_bboxes = self.pre_process( - img, relations, texts, gt_bboxes, tag, img_size) - x = self.img_feat(img) - boxes, rois_num = self.bbox2roi(gt_bboxes) - feats = paddle.fluid.layers.roi_align( - x, - boxes, - spatial_scale=1.0, - pooled_height=7, - pooled_width=7, - rois_num=rois_num) - feats = self.maxpool(feats).squeeze(-1).squeeze(-1) - return [relations, texts, feats] diff --git a/backend/ppocr/modeling/backbones/rec_efficientb3_pren.py b/backend/ppocr/modeling/backbones/rec_efficientb3_pren.py deleted file mode 100644 index 57eef17..0000000 --- a/backend/ppocr/modeling/backbones/rec_efficientb3_pren.py +++ /dev/null @@ -1,228 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Code is refer from: -https://github.com/RuijieJ/pren/blob/main/Nets/EfficientNet.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import namedtuple -import paddle -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ['EfficientNetb3'] - - -class EffB3Params: - @staticmethod - def get_global_params(): - """ - The fllowing are efficientnetb3's arch superparams, but to fit for scene - text recognition task, the resolution(image_size) here is changed - from 300 to 64. - """ - GlobalParams = namedtuple('GlobalParams', [ - 'drop_connect_rate', 'width_coefficient', 'depth_coefficient', - 'depth_divisor', 'image_size' - ]) - global_params = GlobalParams( - drop_connect_rate=0.3, - width_coefficient=1.2, - depth_coefficient=1.4, - depth_divisor=8, - image_size=64) - return global_params - - @staticmethod - def get_block_params(): - BlockParams = namedtuple('BlockParams', [ - 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', - 'expand_ratio', 'id_skip', 'se_ratio', 'stride' - ]) - block_params = [ - BlockParams(3, 1, 32, 16, 1, True, 0.25, 1), - BlockParams(3, 2, 16, 24, 6, True, 0.25, 2), - BlockParams(5, 2, 24, 40, 6, True, 0.25, 2), - BlockParams(3, 3, 40, 80, 6, True, 0.25, 2), - BlockParams(5, 3, 80, 112, 6, True, 0.25, 1), - BlockParams(5, 4, 112, 192, 6, True, 0.25, 2), - BlockParams(3, 1, 192, 320, 6, True, 0.25, 1) - ] - return block_params - - -class EffUtils: - @staticmethod - def round_filters(filters, global_params): - """Calculate and round number of filters based on depth multiplier.""" - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - filters *= multiplier - new_filters = int(filters + divisor / 2) // divisor * divisor - if new_filters < 0.9 * filters: - new_filters += divisor - return int(new_filters) - - @staticmethod - def round_repeats(repeats, global_params): - """Round number of filters based on depth multiplier.""" - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class ConvBlock(nn.Layer): - def __init__(self, block_params): - super(ConvBlock, self).__init__() - self.block_args = block_params - self.has_se = (self.block_args.se_ratio is not None) and \ - (0 < self.block_args.se_ratio <= 1) - self.id_skip = block_params.id_skip - - # expansion phase - self.input_filters = self.block_args.input_filters - output_filters = \ - self.block_args.input_filters * self.block_args.expand_ratio - if self.block_args.expand_ratio != 1: - self.expand_conv = nn.Conv2D( - self.input_filters, output_filters, 1, bias_attr=False) - self.bn0 = nn.BatchNorm(output_filters) - - # depthwise conv phase - k = self.block_args.kernel_size - s = self.block_args.stride - self.depthwise_conv = nn.Conv2D( - output_filters, - output_filters, - groups=output_filters, - kernel_size=k, - stride=s, - padding='same', - bias_attr=False) - self.bn1 = nn.BatchNorm(output_filters) - - # squeeze and excitation layer, if desired - if self.has_se: - num_squeezed_channels = max(1, - int(self.block_args.input_filters * - self.block_args.se_ratio)) - self.se_reduce = nn.Conv2D(output_filters, num_squeezed_channels, 1) - self.se_expand = nn.Conv2D(num_squeezed_channels, output_filters, 1) - - # output phase - self.final_oup = self.block_args.output_filters - self.project_conv = nn.Conv2D( - output_filters, self.final_oup, 1, bias_attr=False) - self.bn2 = nn.BatchNorm(self.final_oup) - self.swish = nn.Swish() - - def drop_connect(self, inputs, p, training): - if not training: - return inputs - - batch_size = inputs.shape[0] - keep_prob = 1 - p - random_tensor = keep_prob - random_tensor += paddle.rand([batch_size, 1, 1, 1], dtype=inputs.dtype) - random_tensor = paddle.to_tensor(random_tensor, place=inputs.place) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - def forward(self, inputs, drop_connect_rate=None): - # expansion and depthwise conv - x = inputs - if self.block_args.expand_ratio != 1: - x = self.swish(self.bn0(self.expand_conv(inputs))) - x = self.swish(self.bn1(self.depthwise_conv(x))) - - # squeeze and excitation - if self.has_se: - x_squeezed = F.adaptive_avg_pool2d(x, 1) - x_squeezed = self.se_expand(self.swish(self.se_reduce(x_squeezed))) - x = F.sigmoid(x_squeezed) * x - x = self.bn2(self.project_conv(x)) - - # skip conntection and drop connect - if self.id_skip and self.block_args.stride == 1 and \ - self.input_filters == self.final_oup: - if drop_connect_rate: - x = self.drop_connect( - x, p=drop_connect_rate, training=self.training) - x = x + inputs - return x - - -class EfficientNetb3_PREN(nn.Layer): - def __init__(self, in_channels): - super(EfficientNetb3_PREN, self).__init__() - self.blocks_params = EffB3Params.get_block_params() - self.global_params = EffB3Params.get_global_params() - self.out_channels = [] - # stem - stem_channels = EffUtils.round_filters(32, self.global_params) - self.conv_stem = nn.Conv2D( - in_channels, stem_channels, 3, 2, padding='same', bias_attr=False) - self.bn0 = nn.BatchNorm(stem_channels) - - self.blocks = [] - # to extract three feature maps for fpn based on efficientnetb3 backbone - self.concerned_block_idxes = [7, 17, 25] - concerned_idx = 0 - for i, block_params in enumerate(self.blocks_params): - block_params = block_params._replace( - input_filters=EffUtils.round_filters(block_params.input_filters, - self.global_params), - output_filters=EffUtils.round_filters( - block_params.output_filters, self.global_params), - num_repeat=EffUtils.round_repeats(block_params.num_repeat, - self.global_params)) - self.blocks.append( - self.add_sublayer("{}-0".format(i), ConvBlock(block_params))) - concerned_idx += 1 - if concerned_idx in self.concerned_block_idxes: - self.out_channels.append(block_params.output_filters) - if block_params.num_repeat > 1: - block_params = block_params._replace( - input_filters=block_params.output_filters, stride=1) - for j in range(block_params.num_repeat - 1): - self.blocks.append( - self.add_sublayer('{}-{}'.format(i, j + 1), - ConvBlock(block_params))) - concerned_idx += 1 - if concerned_idx in self.concerned_block_idxes: - self.out_channels.append(block_params.output_filters) - - self.swish = nn.Swish() - - def forward(self, inputs): - outs = [] - - x = self.swish(self.bn0(self.conv_stem(inputs))) - for idx, block in enumerate(self.blocks): - drop_connect_rate = self.global_params.drop_connect_rate - if drop_connect_rate: - drop_connect_rate *= float(idx) / len(self.blocks) - x = block(x, drop_connect_rate=drop_connect_rate) - if idx in self.concerned_block_idxes: - outs.append(x) - return outs diff --git a/backend/ppocr/modeling/backbones/rec_micronet.py b/backend/ppocr/modeling/backbones/rec_micronet.py deleted file mode 100644 index b0ae5a1..0000000 --- a/backend/ppocr/modeling/backbones/rec_micronet.py +++ /dev/null @@ -1,528 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/liyunsheng13/micronet/blob/main/backbone/micronet.py -https://github.com/liyunsheng13/micronet/blob/main/backbone/activation.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.nn as nn - -from ppocr.modeling.backbones.det_mobilenet_v3 import make_divisible - -M0_cfgs = [ - # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r - [2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1], - [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1], - [2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1], - [1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1], - [2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1], - [1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2], - [1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2], -] -M1_cfgs = [ - # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 - [2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1], - [2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1], - [2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1], - [1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1], - [2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1], - [1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2], - [1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2], -] -M2_cfgs = [ - # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 - [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1], - [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1], - [1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1], - [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1], - [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2], - [1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2], - [2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2], - [1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2], - [1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2], -] -M3_cfgs = [ - # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 - [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1], - [2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1], - [1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1], - [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1], - [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2], - [1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2], - [1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2], - [1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2], - [1, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2], - [1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2], - [1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2], - [1, 1, 432, 3, 1, 3, 12, 12, 0, 0, 0, 0, 2, 0, 2], -] - - -def get_micronet_config(mode): - return eval(mode + '_cfgs') - - -class MaxGroupPooling(nn.Layer): - def __init__(self, channel_per_group=2): - super(MaxGroupPooling, self).__init__() - self.channel_per_group = channel_per_group - - def forward(self, x): - if self.channel_per_group == 1: - return x - # max op - b, c, h, w = x.shape - - # reshape - y = paddle.reshape(x, [b, c // self.channel_per_group, -1, h, w]) - out = paddle.max(y, axis=2) - return out - - -class SpatialSepConvSF(nn.Layer): - def __init__(self, inp, oups, kernel_size, stride): - super(SpatialSepConvSF, self).__init__() - - oup1, oup2 = oups - self.conv = nn.Sequential( - nn.Conv2D( - inp, - oup1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0), - bias_attr=False, - groups=1), - nn.BatchNorm2D(oup1), - nn.Conv2D( - oup1, - oup1 * oup2, (1, kernel_size), (1, stride), - (0, kernel_size // 2), - bias_attr=False, - groups=oup1), - nn.BatchNorm2D(oup1 * oup2), - ChannelShuffle(oup1), ) - - def forward(self, x): - out = self.conv(x) - return out - - -class ChannelShuffle(nn.Layer): - def __init__(self, groups): - super(ChannelShuffle, self).__init__() - self.groups = groups - - def forward(self, x): - b, c, h, w = x.shape - - channels_per_group = c // self.groups - - # reshape - x = paddle.reshape(x, [b, self.groups, channels_per_group, h, w]) - - x = paddle.transpose(x, (0, 2, 1, 3, 4)) - out = paddle.reshape(x, [b, -1, h, w]) - - return out - - -class StemLayer(nn.Layer): - def __init__(self, inp, oup, stride, groups=(4, 4)): - super(StemLayer, self).__init__() - - g1, g2 = groups - self.stem = nn.Sequential( - SpatialSepConvSF(inp, groups, 3, stride), - MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6()) - - def forward(self, x): - out = self.stem(x) - return out - - -class DepthSpatialSepConv(nn.Layer): - def __init__(self, inp, expand, kernel_size, stride): - super(DepthSpatialSepConv, self).__init__() - - exp1, exp2 = expand - - hidden_dim = inp * exp1 - oup = inp * exp1 * exp2 - - self.conv = nn.Sequential( - nn.Conv2D( - inp, - inp * exp1, (kernel_size, 1), (stride, 1), - (kernel_size // 2, 0), - bias_attr=False, - groups=inp), - nn.BatchNorm2D(inp * exp1), - nn.Conv2D( - hidden_dim, - oup, (1, kernel_size), - 1, (0, kernel_size // 2), - bias_attr=False, - groups=hidden_dim), - nn.BatchNorm2D(oup)) - - def forward(self, x): - x = self.conv(x) - return x - - -class GroupConv(nn.Layer): - def __init__(self, inp, oup, groups=2): - super(GroupConv, self).__init__() - self.inp = inp - self.oup = oup - self.groups = groups - self.conv = nn.Sequential( - nn.Conv2D( - inp, oup, 1, 1, 0, bias_attr=False, groups=self.groups[0]), - nn.BatchNorm2D(oup)) - - def forward(self, x): - x = self.conv(x) - return x - - -class DepthConv(nn.Layer): - def __init__(self, inp, oup, kernel_size, stride): - super(DepthConv, self).__init__() - self.conv = nn.Sequential( - nn.Conv2D( - inp, - oup, - kernel_size, - stride, - kernel_size // 2, - bias_attr=False, - groups=inp), - nn.BatchNorm2D(oup)) - - def forward(self, x): - out = self.conv(x) - return out - - -class DYShiftMax(nn.Layer): - def __init__(self, - inp, - oup, - reduction=4, - act_max=1.0, - act_relu=True, - init_a=[0.0, 0.0], - init_b=[0.0, 0.0], - relu_before_pool=False, - g=None, - expansion=False): - super(DYShiftMax, self).__init__() - self.oup = oup - self.act_max = act_max * 2 - self.act_relu = act_relu - self.avg_pool = nn.Sequential(nn.ReLU() if relu_before_pool == True else - nn.Sequential(), nn.AdaptiveAvgPool2D(1)) - - self.exp = 4 if act_relu else 2 - self.init_a = init_a - self.init_b = init_b - - # determine squeeze - squeeze = make_divisible(inp // reduction, 4) - if squeeze < 4: - squeeze = 4 - - self.fc = nn.Sequential( - nn.Linear(inp, squeeze), - nn.ReLU(), nn.Linear(squeeze, oup * self.exp), nn.Hardsigmoid()) - - if g is None: - g = 1 - self.g = g[1] - if self.g != 1 and expansion: - self.g = inp // self.g - - self.gc = inp // self.g - index = paddle.to_tensor([range(inp)]) - index = paddle.reshape(index, [1, inp, 1, 1]) - index = paddle.reshape(index, [1, self.g, self.gc, 1, 1]) - indexgs = paddle.split(index, [1, self.g - 1], axis=1) - indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1) - indexs = paddle.split(indexgs, [1, self.gc - 1], axis=2) - indexs = paddle.concat((indexs[1], indexs[0]), axis=2) - self.index = paddle.reshape(indexs, [inp]) - self.expansion = expansion - - def forward(self, x): - x_in = x - x_out = x - - b, c, _, _ = x_in.shape - y = self.avg_pool(x_in) - y = paddle.reshape(y, [b, c]) - y = self.fc(y) - y = paddle.reshape(y, [b, self.oup * self.exp, 1, 1]) - y = (y - 0.5) * self.act_max - - n2, c2, h2, w2 = x_out.shape - x2 = paddle.to_tensor(x_out.numpy()[:, self.index.numpy(), :, :]) - - if self.exp == 4: - temp = y.shape - a1, b1, a2, b2 = paddle.split(y, temp[1] // self.oup, axis=1) - - a1 = a1 + self.init_a[0] - a2 = a2 + self.init_a[1] - - b1 = b1 + self.init_b[0] - b2 = b2 + self.init_b[1] - - z1 = x_out * a1 + x2 * b1 - z2 = x_out * a2 + x2 * b2 - - out = paddle.maximum(z1, z2) - - elif self.exp == 2: - temp = y.shape - a1, b1 = paddle.split(y, temp[1] // self.oup, axis=1) - a1 = a1 + self.init_a[0] - b1 = b1 + self.init_b[0] - out = x_out * a1 + x2 * b1 - - return out - - -class DYMicroBlock(nn.Layer): - def __init__(self, - inp, - oup, - kernel_size=3, - stride=1, - ch_exp=(2, 2), - ch_per_group=4, - groups_1x1=(1, 1), - depthsep=True, - shuffle=False, - activation_cfg=None): - super(DYMicroBlock, self).__init__() - - self.identity = stride == 1 and inp == oup - - y1, y2, y3 = activation_cfg['dy'] - act_reduction = 8 * activation_cfg['ratio'] - init_a = activation_cfg['init_a'] - init_b = activation_cfg['init_b'] - - t1 = ch_exp - gs1 = ch_per_group - hidden_fft, g1, g2 = groups_1x1 - hidden_dim2 = inp * t1[0] * t1[1] - - if gs1[0] == 0: - self.layers = nn.Sequential( - DepthSpatialSepConv(inp, t1, kernel_size, stride), - DYShiftMax( - hidden_dim2, - hidden_dim2, - act_max=2.0, - act_relu=True if y2 == 2 else False, - init_a=init_a, - reduction=act_reduction, - init_b=init_b, - g=gs1, - expansion=False) if y2 > 0 else nn.ReLU6(), - ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(), - ChannelShuffle(hidden_dim2 // 2) - if shuffle and y2 != 0 else nn.Sequential(), - GroupConv(hidden_dim2, oup, (g1, g2)), - DYShiftMax( - oup, - oup, - act_max=2.0, - act_relu=False, - init_a=[1.0, 0.0], - reduction=act_reduction // 2, - init_b=[0.0, 0.0], - g=(g1, g2), - expansion=False) if y3 > 0 else nn.Sequential(), - ChannelShuffle(g2) if shuffle else nn.Sequential(), - ChannelShuffle(oup // 2) - if shuffle and oup % 2 == 0 and y3 != 0 else nn.Sequential(), ) - elif g2 == 0: - self.layers = nn.Sequential( - GroupConv(inp, hidden_dim2, gs1), - DYShiftMax( - hidden_dim2, - hidden_dim2, - act_max=2.0, - act_relu=False, - init_a=[1.0, 0.0], - reduction=act_reduction, - init_b=[0.0, 0.0], - g=gs1, - expansion=False) if y3 > 0 else nn.Sequential(), ) - else: - self.layers = nn.Sequential( - GroupConv(inp, hidden_dim2, gs1), - DYShiftMax( - hidden_dim2, - hidden_dim2, - act_max=2.0, - act_relu=True if y1 == 2 else False, - init_a=init_a, - reduction=act_reduction, - init_b=init_b, - g=gs1, - expansion=False) if y1 > 0 else nn.ReLU6(), - ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(), - DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride) - if depthsep else - DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride), - nn.Sequential(), - DYShiftMax( - hidden_dim2, - hidden_dim2, - act_max=2.0, - act_relu=True if y2 == 2 else False, - init_a=init_a, - reduction=act_reduction, - init_b=init_b, - g=gs1, - expansion=True) if y2 > 0 else nn.ReLU6(), - ChannelShuffle(hidden_dim2 // 4) - if shuffle and y1 != 0 and y2 != 0 else nn.Sequential() - if y1 == 0 and y2 == 0 else ChannelShuffle(hidden_dim2 // 2), - GroupConv(hidden_dim2, oup, (g1, g2)), - DYShiftMax( - oup, - oup, - act_max=2.0, - act_relu=False, - init_a=[1.0, 0.0], - reduction=act_reduction // 2 - if oup < hidden_dim2 else act_reduction, - init_b=[0.0, 0.0], - g=(g1, g2), - expansion=False) if y3 > 0 else nn.Sequential(), - ChannelShuffle(g2) if shuffle else nn.Sequential(), - ChannelShuffle(oup // 2) - if shuffle and y3 != 0 else nn.Sequential(), ) - - def forward(self, x): - identity = x - out = self.layers(x) - - if self.identity: - out = out + identity - - return out - - -class MicroNet(nn.Layer): - """ - the MicroNet backbone network for recognition module. - Args: - mode(str): {'M0', 'M1', 'M2', 'M3'} - Four models are proposed based on four different computational costs (4M, 6M, 12M, 21M MAdds) - Default: 'M3'. - """ - - def __init__(self, mode='M3', **kwargs): - super(MicroNet, self).__init__() - - self.cfgs = get_micronet_config(mode) - - activation_cfg = {} - if mode == 'M0': - input_channel = 4 - stem_groups = 2, 2 - out_ch = 384 - activation_cfg['init_a'] = 1.0, 1.0 - activation_cfg['init_b'] = 0.0, 0.0 - elif mode == 'M1': - input_channel = 6 - stem_groups = 3, 2 - out_ch = 576 - activation_cfg['init_a'] = 1.0, 1.0 - activation_cfg['init_b'] = 0.0, 0.0 - elif mode == 'M2': - input_channel = 8 - stem_groups = 4, 2 - out_ch = 768 - activation_cfg['init_a'] = 1.0, 1.0 - activation_cfg['init_b'] = 0.0, 0.0 - elif mode == 'M3': - input_channel = 12 - stem_groups = 4, 3 - out_ch = 432 - activation_cfg['init_a'] = 1.0, 0.5 - activation_cfg['init_b'] = 0.0, 0.5 - else: - raise NotImplementedError("mode[" + mode + - "_model] is not implemented!") - - layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)] - - for idx, val in enumerate(self.cfgs): - s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r = val - - t1 = (c1, c2) - gs1 = (g1, g2) - gs2 = (c3, g3, g4) - activation_cfg['dy'] = [y1, y2, y3] - activation_cfg['ratio'] = r - - output_channel = c - layers.append( - DYMicroBlock( - input_channel, - output_channel, - kernel_size=ks, - stride=s, - ch_exp=t1, - ch_per_group=gs1, - groups_1x1=gs2, - depthsep=True, - shuffle=True, - activation_cfg=activation_cfg, )) - input_channel = output_channel - for i in range(1, n): - layers.append( - DYMicroBlock( - input_channel, - output_channel, - kernel_size=ks, - stride=1, - ch_exp=t1, - ch_per_group=gs1, - groups_1x1=gs2, - depthsep=True, - shuffle=True, - activation_cfg=activation_cfg, )) - input_channel = output_channel - self.features = nn.Sequential(*layers) - - self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - - self.out_channels = make_divisible(out_ch) - - def forward(self, x): - x = self.features(x) - x = self.pool(x) - return x diff --git a/backend/ppocr/modeling/backbones/rec_mobilenet_v3.py b/backend/ppocr/modeling/backbones/rec_mobilenet_v3.py deleted file mode 100644 index 917e000..0000000 --- a/backend/ppocr/modeling/backbones/rec_mobilenet_v3.py +++ /dev/null @@ -1,138 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle import nn - -from ppocr.modeling.backbones.det_mobilenet_v3 import ResidualUnit, ConvBNLayer, make_divisible - -__all__ = ['MobileNetV3'] - - -class MobileNetV3(nn.Layer): - def __init__(self, - in_channels=3, - model_name='small', - scale=0.5, - large_stride=None, - small_stride=None, - disable_se=False, - **kwargs): - super(MobileNetV3, self).__init__() - self.disable_se = disable_se - if small_stride is None: - small_stride = [2, 2, 2, 2] - if large_stride is None: - large_stride = [1, 2, 2, 2] - - assert isinstance(large_stride, list), "large_stride type must " \ - "be list but got {}".format(type(large_stride)) - assert isinstance(small_stride, list), "small_stride type must " \ - "be list but got {}".format(type(small_stride)) - assert len(large_stride) == 4, "large_stride length must be " \ - "4 but got {}".format(len(large_stride)) - assert len(small_stride) == 4, "small_stride length must be " \ - "4 but got {}".format(len(small_stride)) - - if model_name == "large": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, 'relu', large_stride[0]], - [3, 64, 24, False, 'relu', (large_stride[1], 1)], - [3, 72, 24, False, 'relu', 1], - [5, 72, 40, True, 'relu', (large_stride[2], 1)], - [5, 120, 40, True, 'relu', 1], - [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hardswish', 1], - [3, 200, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 480, 112, True, 'hardswish', 1], - [3, 672, 112, True, 'hardswish', 1], - [5, 672, 160, True, 'hardswish', (large_stride[3], 1)], - [5, 960, 160, True, 'hardswish', 1], - [5, 960, 160, True, 'hardswish', 1], - ] - cls_ch_squeeze = 960 - elif model_name == "small": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, 'relu', (small_stride[0], 1)], - [3, 72, 24, False, 'relu', (small_stride[1], 1)], - [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hardswish', (small_stride[2], 1)], - [5, 240, 40, True, 'hardswish', 1], - [5, 240, 40, True, 'hardswish', 1], - [5, 120, 48, True, 'hardswish', 1], - [5, 144, 48, True, 'hardswish', 1], - [5, 288, 96, True, 'hardswish', (small_stride[3], 1)], - [5, 576, 96, True, 'hardswish', 1], - [5, 576, 96, True, 'hardswish', 1], - ] - cls_ch_squeeze = 576 - else: - raise NotImplementedError("mode[" + model_name + - "_model] is not implemented!") - - supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] - assert scale in supported_scale, \ - "supported scales are {} but input scale is {}".format(supported_scale, scale) - - inplanes = 16 - # conv1 - self.conv1 = ConvBNLayer( - in_channels=in_channels, - out_channels=make_divisible(inplanes * scale), - kernel_size=3, - stride=2, - padding=1, - groups=1, - if_act=True, - act='hardswish') - i = 0 - block_list = [] - inplanes = make_divisible(inplanes * scale) - for (k, exp, c, se, nl, s) in cfg: - se = se and not self.disable_se - block_list.append( - ResidualUnit( - in_channels=inplanes, - mid_channels=make_divisible(scale * exp), - out_channels=make_divisible(scale * c), - kernel_size=k, - stride=s, - use_se=se, - act=nl)) - inplanes = make_divisible(scale * c) - i += 1 - self.blocks = nn.Sequential(*block_list) - - self.conv2 = ConvBNLayer( - in_channels=inplanes, - out_channels=make_divisible(scale * cls_ch_squeeze), - kernel_size=1, - stride=1, - padding=0, - groups=1, - if_act=True, - act='hardswish') - - self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - self.out_channels = make_divisible(scale * cls_ch_squeeze) - - def forward(self, x): - x = self.conv1(x) - x = self.blocks(x) - x = self.conv2(x) - x = self.pool(x) - return x diff --git a/backend/ppocr/modeling/backbones/rec_mv1_enhance.py b/backend/ppocr/modeling/backbones/rec_mv1_enhance.py deleted file mode 100644 index bb6af5e..0000000 --- a/backend/ppocr/modeling/backbones/rec_mv1_enhance.py +++ /dev/null @@ -1,256 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This code is refer from: https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/arch/backbone/legendary_models/pp_lcnet.py - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import numpy as np -import paddle -from paddle import ParamAttr, reshape, transpose -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2D, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D -from paddle.nn.initializer import KaimingNormal -from paddle.regularizer import L2Decay -from paddle.nn.functional import hardswish, hardsigmoid - - -class ConvBNLayer(nn.Layer): - def __init__(self, - num_channels, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - act='hard_swish'): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2D( - in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - weight_attr=ParamAttr(initializer=KaimingNormal()), - bias_attr=False) - - self._batch_norm = BatchNorm( - num_filters, - act=act, - param_attr=ParamAttr(regularizer=L2Decay(0.0)), - bias_attr=ParamAttr(regularizer=L2Decay(0.0))) - - def forward(self, inputs): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class DepthwiseSeparable(nn.Layer): - def __init__(self, - num_channels, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - dw_size=3, - padding=1, - use_se=False): - super(DepthwiseSeparable, self).__init__() - self.use_se = use_se - self._depthwise_conv = ConvBNLayer( - num_channels=num_channels, - num_filters=int(num_filters1 * scale), - filter_size=dw_size, - stride=stride, - padding=padding, - num_groups=int(num_groups * scale)) - if use_se: - self._se = SEModule(int(num_filters1 * scale)) - self._pointwise_conv = ConvBNLayer( - num_channels=int(num_filters1 * scale), - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0) - - def forward(self, inputs): - y = self._depthwise_conv(inputs) - if self.use_se: - y = self._se(y) - y = self._pointwise_conv(y) - return y - - -class MobileNetV1Enhance(nn.Layer): - def __init__(self, - in_channels=3, - scale=0.5, - last_conv_stride=1, - last_pool_type='max', - **kwargs): - super().__init__() - self.scale = scale - self.block_list = [] - - self.conv1 = ConvBNLayer( - num_channels=3, - filter_size=3, - channels=3, - num_filters=int(32 * scale), - stride=2, - padding=1) - - conv2_1 = DepthwiseSeparable( - num_channels=int(32 * scale), - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, - scale=scale) - self.block_list.append(conv2_1) - - conv2_2 = DepthwiseSeparable( - num_channels=int(64 * scale), - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=1, - scale=scale) - self.block_list.append(conv2_2) - - conv3_1 = DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, - scale=scale) - self.block_list.append(conv3_1) - - conv3_2 = DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=(2, 1), - scale=scale) - self.block_list.append(conv3_2) - - conv4_1 = DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, - scale=scale) - self.block_list.append(conv4_1) - - conv4_2 = DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=(2, 1), - scale=scale) - self.block_list.append(conv4_2) - - for _ in range(5): - conv5 = DepthwiseSeparable( - num_channels=int(512 * scale), - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, - dw_size=5, - padding=2, - scale=scale, - use_se=False) - self.block_list.append(conv5) - - conv5_6 = DepthwiseSeparable( - num_channels=int(512 * scale), - num_filters1=512, - num_filters2=1024, - num_groups=512, - stride=(2, 1), - dw_size=5, - padding=2, - scale=scale, - use_se=True) - self.block_list.append(conv5_6) - - conv6 = DepthwiseSeparable( - num_channels=int(1024 * scale), - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=last_conv_stride, - dw_size=5, - padding=2, - use_se=True, - scale=scale) - self.block_list.append(conv6) - - self.block_list = nn.Sequential(*self.block_list) - if last_pool_type == 'avg': - self.pool = nn.AvgPool2D(kernel_size=2, stride=2, padding=0) - else: - self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - self.out_channels = int(1024 * scale) - - def forward(self, inputs): - y = self.conv1(inputs) - y = self.block_list(y) - y = self.pool(y) - return y - - -class SEModule(nn.Layer): - def __init__(self, channel, reduction=4): - super(SEModule, self).__init__() - self.avg_pool = AdaptiveAvgPool2D(1) - self.conv1 = Conv2D( - in_channels=channel, - out_channels=channel // reduction, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(), - bias_attr=ParamAttr()) - self.conv2 = Conv2D( - in_channels=channel // reduction, - out_channels=channel, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(), - bias_attr=ParamAttr()) - - def forward(self, inputs): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = F.relu(outputs) - outputs = self.conv2(outputs) - outputs = hardsigmoid(outputs) - return paddle.multiply(x=inputs, y=outputs) diff --git a/backend/ppocr/modeling/backbones/rec_nrtr_mtb.py b/backend/ppocr/modeling/backbones/rec_nrtr_mtb.py deleted file mode 100644 index 22e02a6..0000000 --- a/backend/ppocr/modeling/backbones/rec_nrtr_mtb.py +++ /dev/null @@ -1,48 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle import nn -import paddle - - -class MTB(nn.Layer): - def __init__(self, cnn_num, in_channels): - super(MTB, self).__init__() - self.block = nn.Sequential() - self.out_channels = in_channels - self.cnn_num = cnn_num - if self.cnn_num == 2: - for i in range(self.cnn_num): - self.block.add_sublayer( - 'conv_{}'.format(i), - nn.Conv2D( - in_channels=in_channels - if i == 0 else 32 * (2**(i - 1)), - out_channels=32 * (2**i), - kernel_size=3, - stride=2, - padding=1)) - self.block.add_sublayer('relu_{}'.format(i), nn.ReLU()) - self.block.add_sublayer('bn_{}'.format(i), - nn.BatchNorm2D(32 * (2**i))) - - def forward(self, images): - x = self.block(images) - if self.cnn_num == 2: - # (b, w, h, c) - x = paddle.transpose(x, [0, 3, 2, 1]) - x_shape = paddle.shape(x) - x = paddle.reshape( - x, [x_shape[0], x_shape[1], x_shape[2] * x_shape[3]]) - return x diff --git a/backend/ppocr/modeling/backbones/rec_resnet_31.py b/backend/ppocr/modeling/backbones/rec_resnet_31.py deleted file mode 100644 index 9651701..0000000 --- a/backend/ppocr/modeling/backbones/rec_resnet_31.py +++ /dev/null @@ -1,210 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -import numpy as np - -__all__ = ["ResNet31"] - - -def conv3x3(in_channel, out_channel, stride=1): - return nn.Conv2D( - in_channel, - out_channel, - kernel_size=3, - stride=stride, - padding=1, - bias_attr=False) - - -class BasicBlock(nn.Layer): - expansion = 1 - - def __init__(self, in_channels, channels, stride=1, downsample=False): - super().__init__() - self.conv1 = conv3x3(in_channels, channels, stride) - self.bn1 = nn.BatchNorm2D(channels) - self.relu = nn.ReLU() - self.conv2 = conv3x3(channels, channels) - self.bn2 = nn.BatchNorm2D(channels) - self.downsample = downsample - if downsample: - self.downsample = nn.Sequential( - nn.Conv2D( - in_channels, - channels * self.expansion, - 1, - stride, - bias_attr=False), - nn.BatchNorm2D(channels * self.expansion), ) - else: - self.downsample = nn.Sequential() - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class ResNet31(nn.Layer): - ''' - Args: - in_channels (int): Number of channels of input image tensor. - layers (list[int]): List of BasicBlock number for each stage. - channels (list[int]): List of out_channels of Conv2d layer. - out_indices (None | Sequence[int]): Indices of output stages. - last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage. - ''' - - def __init__(self, - in_channels=3, - layers=[1, 2, 5, 3], - channels=[64, 128, 256, 256, 512, 512, 512], - out_indices=None, - last_stage_pool=False): - super(ResNet31, self).__init__() - assert isinstance(in_channels, int) - assert isinstance(last_stage_pool, bool) - - self.out_indices = out_indices - self.last_stage_pool = last_stage_pool - - # conv 1 (Conv Conv) - self.conv1_1 = nn.Conv2D( - in_channels, channels[0], kernel_size=3, stride=1, padding=1) - self.bn1_1 = nn.BatchNorm2D(channels[0]) - self.relu1_1 = nn.ReLU() - - self.conv1_2 = nn.Conv2D( - channels[0], channels[1], kernel_size=3, stride=1, padding=1) - self.bn1_2 = nn.BatchNorm2D(channels[1]) - self.relu1_2 = nn.ReLU() - - # conv 2 (Max-pooling, Residual block, Conv) - self.pool2 = nn.MaxPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self.block2 = self._make_layer(channels[1], channels[2], layers[0]) - self.conv2 = nn.Conv2D( - channels[2], channels[2], kernel_size=3, stride=1, padding=1) - self.bn2 = nn.BatchNorm2D(channels[2]) - self.relu2 = nn.ReLU() - - # conv 3 (Max-pooling, Residual block, Conv) - self.pool3 = nn.MaxPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self.block3 = self._make_layer(channels[2], channels[3], layers[1]) - self.conv3 = nn.Conv2D( - channels[3], channels[3], kernel_size=3, stride=1, padding=1) - self.bn3 = nn.BatchNorm2D(channels[3]) - self.relu3 = nn.ReLU() - - # conv 4 (Max-pooling, Residual block, Conv) - self.pool4 = nn.MaxPool2D( - kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True) - self.block4 = self._make_layer(channels[3], channels[4], layers[2]) - self.conv4 = nn.Conv2D( - channels[4], channels[4], kernel_size=3, stride=1, padding=1) - self.bn4 = nn.BatchNorm2D(channels[4]) - self.relu4 = nn.ReLU() - - # conv 5 ((Max-pooling), Residual block, Conv) - self.pool5 = None - if self.last_stage_pool: - self.pool5 = nn.MaxPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self.block5 = self._make_layer(channels[4], channels[5], layers[3]) - self.conv5 = nn.Conv2D( - channels[5], channels[5], kernel_size=3, stride=1, padding=1) - self.bn5 = nn.BatchNorm2D(channels[5]) - self.relu5 = nn.ReLU() - - self.out_channels = channels[-1] - - def _make_layer(self, input_channels, output_channels, blocks): - layers = [] - for _ in range(blocks): - downsample = None - if input_channels != output_channels: - downsample = nn.Sequential( - nn.Conv2D( - input_channels, - output_channels, - kernel_size=1, - stride=1, - bias_attr=False), - nn.BatchNorm2D(output_channels), ) - - layers.append( - BasicBlock( - input_channels, output_channels, downsample=downsample)) - input_channels = output_channels - return nn.Sequential(*layers) - - def forward(self, x): - x = self.conv1_1(x) - x = self.bn1_1(x) - x = self.relu1_1(x) - - x = self.conv1_2(x) - x = self.bn1_2(x) - x = self.relu1_2(x) - - outs = [] - for i in range(4): - layer_index = i + 2 - pool_layer = getattr(self, f'pool{layer_index}') - block_layer = getattr(self, f'block{layer_index}') - conv_layer = getattr(self, f'conv{layer_index}') - bn_layer = getattr(self, f'bn{layer_index}') - relu_layer = getattr(self, f'relu{layer_index}') - - if pool_layer is not None: - x = pool_layer(x) - x = block_layer(x) - x = conv_layer(x) - x = bn_layer(x) - x = relu_layer(x) - - outs.append(x) - - if self.out_indices is not None: - return tuple([outs[i] for i in self.out_indices]) - - return x diff --git a/backend/ppocr/modeling/backbones/rec_resnet_aster.py b/backend/ppocr/modeling/backbones/rec_resnet_aster.py deleted file mode 100644 index 6a2710d..0000000 --- a/backend/ppocr/modeling/backbones/rec_resnet_aster.py +++ /dev/null @@ -1,143 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/resnet_aster.py -""" -import paddle -import paddle.nn as nn - -import sys -import math - - -def conv3x3(in_planes, out_planes, stride=1): - """3x3 convolution with padding""" - return nn.Conv2D( - in_planes, - out_planes, - kernel_size=3, - stride=stride, - padding=1, - bias_attr=False) - - -def conv1x1(in_planes, out_planes, stride=1): - """1x1 convolution""" - return nn.Conv2D( - in_planes, out_planes, kernel_size=1, stride=stride, bias_attr=False) - - -def get_sinusoid_encoding(n_position, feat_dim, wave_length=10000): - # [n_position] - positions = paddle.arange(0, n_position) - # [feat_dim] - dim_range = paddle.arange(0, feat_dim) - dim_range = paddle.pow(wave_length, 2 * (dim_range // 2) / feat_dim) - # [n_position, feat_dim] - angles = paddle.unsqueeze( - positions, axis=1) / paddle.unsqueeze( - dim_range, axis=0) - angles = paddle.cast(angles, "float32") - angles[:, 0::2] = paddle.sin(angles[:, 0::2]) - angles[:, 1::2] = paddle.cos(angles[:, 1::2]) - return angles - - -class AsterBlock(nn.Layer): - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(AsterBlock, self).__init__() - self.conv1 = conv1x1(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2D(planes) - self.relu = nn.ReLU() - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2D(planes) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - out += residual - out = self.relu(out) - return out - - -class ResNet_ASTER(nn.Layer): - """For aster or crnn""" - - def __init__(self, with_lstm=True, n_group=1, in_channels=3): - super(ResNet_ASTER, self).__init__() - self.with_lstm = with_lstm - self.n_group = n_group - - self.layer0 = nn.Sequential( - nn.Conv2D( - in_channels, - 32, - kernel_size=(3, 3), - stride=1, - padding=1, - bias_attr=False), - nn.BatchNorm2D(32), - nn.ReLU()) - - self.inplanes = 32 - self.layer1 = self._make_layer(32, 3, [2, 2]) # [16, 50] - self.layer2 = self._make_layer(64, 4, [2, 2]) # [8, 25] - self.layer3 = self._make_layer(128, 6, [2, 1]) # [4, 25] - self.layer4 = self._make_layer(256, 6, [2, 1]) # [2, 25] - self.layer5 = self._make_layer(512, 3, [2, 1]) # [1, 25] - - if with_lstm: - self.rnn = nn.LSTM(512, 256, direction="bidirect", num_layers=2) - self.out_channels = 2 * 256 - else: - self.out_channels = 512 - - def _make_layer(self, planes, blocks, stride): - downsample = None - if stride != [1, 1] or self.inplanes != planes: - downsample = nn.Sequential( - conv1x1(self.inplanes, planes, stride), nn.BatchNorm2D(planes)) - - layers = [] - layers.append(AsterBlock(self.inplanes, planes, stride, downsample)) - self.inplanes = planes - for _ in range(1, blocks): - layers.append(AsterBlock(self.inplanes, planes)) - return nn.Sequential(*layers) - - def forward(self, x): - x0 = self.layer0(x) - x1 = self.layer1(x0) - x2 = self.layer2(x1) - x3 = self.layer3(x2) - x4 = self.layer4(x3) - x5 = self.layer5(x4) - - cnn_feat = x5.squeeze(2) # [N, c, w] - cnn_feat = paddle.transpose(cnn_feat, perm=[0, 2, 1]) - if self.with_lstm: - rnn_feat, _ = self.rnn(cnn_feat) - return rnn_feat - else: - return cnn_feat diff --git a/backend/ppocr/modeling/backbones/rec_resnet_fpn.py b/backend/ppocr/modeling/backbones/rec_resnet_fpn.py deleted file mode 100644 index a7e876a..0000000 --- a/backend/ppocr/modeling/backbones/rec_resnet_fpn.py +++ /dev/null @@ -1,307 +0,0 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import paddle.fluid as fluid -import paddle -import numpy as np - -__all__ = ["ResNetFPN"] - - -class ResNetFPN(nn.Layer): - def __init__(self, in_channels=1, layers=50, **kwargs): - super(ResNetFPN, self).__init__() - supported_layers = { - 18: { - 'depth': [2, 2, 2, 2], - 'block_class': BasicBlock - }, - 34: { - 'depth': [3, 4, 6, 3], - 'block_class': BasicBlock - }, - 50: { - 'depth': [3, 4, 6, 3], - 'block_class': BottleneckBlock - }, - 101: { - 'depth': [3, 4, 23, 3], - 'block_class': BottleneckBlock - }, - 152: { - 'depth': [3, 8, 36, 3], - 'block_class': BottleneckBlock - } - } - stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)] - num_filters = [64, 128, 256, 512] - self.depth = supported_layers[layers]['depth'] - self.F = [] - self.conv = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=7, - stride=2, - act="relu", - name="conv1") - self.block_list = [] - in_ch = 64 - if layers >= 50: - for block in range(len(self.depth)): - for i in range(self.depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - block_list = self.add_sublayer( - "bottleneckBlock_{}_{}".format(block, i), - BottleneckBlock( - in_channels=in_ch, - out_channels=num_filters[block], - stride=stride_list[block] if i == 0 else 1, - name=conv_name)) - in_ch = num_filters[block] * 4 - self.block_list.append(block_list) - self.F.append(block_list) - else: - for block in range(len(self.depth)): - for i in range(self.depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - if i == 0 and block != 0: - stride = (2, 1) - else: - stride = (1, 1) - basic_block = self.add_sublayer( - conv_name, - BasicBlock( - in_channels=in_ch, - out_channels=num_filters[block], - stride=stride_list[block] if i == 0 else 1, - is_first=block == i == 0, - name=conv_name)) - in_ch = basic_block.out_channels - self.block_list.append(basic_block) - out_ch_list = [in_ch // 4, in_ch // 2, in_ch] - self.base_block = [] - self.conv_trans = [] - self.bn_block = [] - for i in [-2, -3]: - in_channels = out_ch_list[i + 1] + out_ch_list[i] - - self.base_block.append( - self.add_sublayer( - "F_{}_base_block_0".format(i), - nn.Conv2D( - in_channels=in_channels, - out_channels=out_ch_list[i], - kernel_size=1, - weight_attr=ParamAttr(trainable=True), - bias_attr=ParamAttr(trainable=True)))) - self.base_block.append( - self.add_sublayer( - "F_{}_base_block_1".format(i), - nn.Conv2D( - in_channels=out_ch_list[i], - out_channels=out_ch_list[i], - kernel_size=3, - padding=1, - weight_attr=ParamAttr(trainable=True), - bias_attr=ParamAttr(trainable=True)))) - self.base_block.append( - self.add_sublayer( - "F_{}_base_block_2".format(i), - nn.BatchNorm( - num_channels=out_ch_list[i], - act="relu", - param_attr=ParamAttr(trainable=True), - bias_attr=ParamAttr(trainable=True)))) - self.base_block.append( - self.add_sublayer( - "F_{}_base_block_3".format(i), - nn.Conv2D( - in_channels=out_ch_list[i], - out_channels=512, - kernel_size=1, - bias_attr=ParamAttr(trainable=True), - weight_attr=ParamAttr(trainable=True)))) - self.out_channels = 512 - - def __call__(self, x): - x = self.conv(x) - fpn_list = [] - F = [] - for i in range(len(self.depth)): - fpn_list.append(np.sum(self.depth[:i + 1])) - - for i, block in enumerate(self.block_list): - x = block(x) - for number in fpn_list: - if i + 1 == number: - F.append(x) - base = F[-1] - - j = 0 - for i, block in enumerate(self.base_block): - if i % 3 == 0 and i < 6: - j = j + 1 - b, c, w, h = F[-j - 1].shape - if [w, h] == list(base.shape[2:]): - base = base - else: - base = self.conv_trans[j - 1](base) - base = self.bn_block[j - 1](base) - base = paddle.concat([base, F[-j - 1]], axis=1) - base = block(base) - return base - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=2 if stride == (1, 1) else kernel_size, - dilation=2 if stride == (1, 1) else 1, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + '.conv2d.output.1.w_0'), - bias_attr=False, ) - - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name=name + '.output.1.w_0'), - bias_attr=ParamAttr(name=name + '.output.1.b_0'), - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance") - - def __call__(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class ShortCut(nn.Layer): - def __init__(self, in_channels, out_channels, stride, name, is_first=False): - super(ShortCut, self).__init__() - self.use_conv = True - - if in_channels != out_channels or stride != 1 or is_first == True: - if stride == (1, 1): - self.conv = ConvBNLayer( - in_channels, out_channels, 1, 1, name=name) - else: # stride==(2,2) - self.conv = ConvBNLayer( - in_channels, out_channels, 1, stride, name=name) - else: - self.use_conv = False - - def forward(self, x): - if self.use_conv: - x = self.conv(x) - return x - - -class BottleneckBlock(nn.Layer): - def __init__(self, in_channels, out_channels, stride, name): - super(BottleneckBlock, self).__init__() - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - self.short = ShortCut( - in_channels=in_channels, - out_channels=out_channels * 4, - stride=stride, - is_first=False, - name=name + "_branch1") - self.out_channels = out_channels * 4 - - def forward(self, x): - y = self.conv0(x) - y = self.conv1(y) - y = self.conv2(y) - y = y + self.short(x) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, in_channels, out_channels, stride, name, is_first): - super(BasicBlock, self).__init__() - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - self.short = ShortCut( - in_channels=in_channels, - out_channels=out_channels, - stride=stride, - is_first=is_first, - name=name + "_branch1") - self.out_channels = out_channels - - def forward(self, x): - y = self.conv0(x) - y = self.conv1(y) - y = y + self.short(x) - return F.relu(y) diff --git a/backend/ppocr/modeling/backbones/rec_resnet_vd.py b/backend/ppocr/modeling/backbones/rec_resnet_vd.py deleted file mode 100644 index 0187deb..0000000 --- a/backend/ppocr/modeling/backbones/rec_resnet_vd.py +++ /dev/null @@ -1,286 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ["ResNet"] - - -class ConvBNLayer(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=stride, stride=stride, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=1 if is_vd_mode else stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=stride, - is_vd_mode=not if_first and stride[0] != 1, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=stride, - is_vd_mode=not if_first and stride[0] != 1, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet(nn.Layer): - def __init__(self, in_channels=3, layers=50, **kwargs): - super(ResNet, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, - 1024] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=32, - kernel_size=3, - stride=1, - act='relu', - name="conv1_1") - self.conv1_2 = ConvBNLayer( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - act='relu', - name="conv1_2") - self.conv1_3 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=1, - act='relu', - name="conv1_3") - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - if layers >= 50: - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - - if i == 0 and block != 0: - stride = (2, 1) - else: - stride = (1, 1) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=stride, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - self.block_list.append(bottleneck_block) - self.out_channels = num_filters[block] * 4 - else: - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - if i == 0 and block != 0: - stride = (2, 1) - else: - stride = (1, 1) - - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=stride, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - self.block_list.append(basic_block) - self.out_channels = num_filters[block] - self.out_pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - - def forward(self, inputs): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.out_pool(y) - return y diff --git a/backend/ppocr/modeling/backbones/rec_svtrnet.py b/backend/ppocr/modeling/backbones/rec_svtrnet.py deleted file mode 100644 index c57bf46..0000000 --- a/backend/ppocr/modeling/backbones/rec_svtrnet.py +++ /dev/null @@ -1,584 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle import ParamAttr -from paddle.nn.initializer import KaimingNormal -import numpy as np -import paddle -import paddle.nn as nn -from paddle.nn.initializer import TruncatedNormal, Constant, Normal - -trunc_normal_ = TruncatedNormal(std=.02) -normal_ = Normal -zeros_ = Constant(value=0.) -ones_ = Constant(value=1.) - - -def drop_path(x, drop_prob=0., training=False): - """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). - the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... - See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... - """ - if drop_prob == 0. or not training: - return x - keep_prob = paddle.to_tensor(1 - drop_prob) - shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) - random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) - random_tensor = paddle.floor(random_tensor) # binarize - output = x.divide(keep_prob) * random_tensor - return output - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size=3, - stride=1, - padding=0, - bias_attr=False, - groups=1, - act=nn.GELU): - super().__init__() - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=paddle.ParamAttr( - initializer=nn.initializer.KaimingUniform()), - bias_attr=bias_attr) - self.norm = nn.BatchNorm2D(out_channels) - self.act = act() - - def forward(self, inputs): - out = self.conv(inputs) - out = self.norm(out) - out = self.act(out) - return out - - -class DropPath(nn.Layer): - """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). - """ - - def __init__(self, drop_prob=None): - super(DropPath, self).__init__() - self.drop_prob = drop_prob - - def forward(self, x): - return drop_path(x, self.drop_prob, self.training) - - -class Identity(nn.Layer): - def __init__(self): - super(Identity, self).__init__() - - def forward(self, input): - return input - - -class Mlp(nn.Layer): - def __init__(self, - in_features, - hidden_features=None, - out_features=None, - act_layer=nn.GELU, - drop=0.): - super().__init__() - out_features = out_features or in_features - hidden_features = hidden_features or in_features - self.fc1 = nn.Linear(in_features, hidden_features) - self.act = act_layer() - self.fc2 = nn.Linear(hidden_features, out_features) - self.drop = nn.Dropout(drop) - - def forward(self, x): - x = self.fc1(x) - x = self.act(x) - x = self.drop(x) - x = self.fc2(x) - x = self.drop(x) - return x - - -class ConvMixer(nn.Layer): - def __init__( - self, - dim, - num_heads=8, - HW=[8, 25], - local_k=[3, 3], ): - super().__init__() - self.HW = HW - self.dim = dim - self.local_mixer = nn.Conv2D( - dim, - dim, - local_k, - 1, [local_k[0] // 2, local_k[1] // 2], - groups=num_heads, - weight_attr=ParamAttr(initializer=KaimingNormal())) - - def forward(self, x): - h = self.HW[0] - w = self.HW[1] - x = x.transpose([0, 2, 1]).reshape([0, self.dim, h, w]) - x = self.local_mixer(x) - x = x.flatten(2).transpose([0, 2, 1]) - return x - - -class Attention(nn.Layer): - def __init__(self, - dim, - num_heads=8, - mixer='Global', - HW=[8, 25], - local_k=[7, 11], - qkv_bias=False, - qk_scale=None, - attn_drop=0., - proj_drop=0.): - super().__init__() - self.num_heads = num_heads - head_dim = dim // num_heads - self.scale = qk_scale or head_dim**-0.5 - - self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) - self.attn_drop = nn.Dropout(attn_drop) - self.proj = nn.Linear(dim, dim) - self.proj_drop = nn.Dropout(proj_drop) - self.HW = HW - if HW is not None: - H = HW[0] - W = HW[1] - self.N = H * W - self.C = dim - if mixer == 'Local' and HW is not None: - hk = local_k[0] - wk = local_k[1] - mask = paddle.ones([H * W, H + hk - 1, W + wk - 1], dtype='float32') - for h in range(0, H): - for w in range(0, W): - mask[h * W + w, h:h + hk, w:w + wk] = 0. - mask_paddle = mask[:, hk // 2:H + hk // 2, wk // 2:W + wk // - 2].flatten(1) - mask_inf = paddle.full([H * W, H * W], '-inf', dtype='float32') - mask = paddle.where(mask_paddle < 1, mask_paddle, mask_inf) - self.mask = mask.unsqueeze([0, 1]) - self.mixer = mixer - - def forward(self, x): - if self.HW is not None: - N = self.N - C = self.C - else: - _, N, C = x.shape - qkv = self.qkv(x).reshape((0, N, 3, self.num_heads, C // - self.num_heads)).transpose((2, 0, 3, 1, 4)) - q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] - - attn = (q.matmul(k.transpose((0, 1, 3, 2)))) - if self.mixer == 'Local': - attn += self.mask - attn = nn.functional.softmax(attn, axis=-1) - attn = self.attn_drop(attn) - - x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((0, N, C)) - x = self.proj(x) - x = self.proj_drop(x) - return x - - -class Block(nn.Layer): - def __init__(self, - dim, - num_heads, - mixer='Global', - local_mixer=[7, 11], - HW=[8, 25], - mlp_ratio=4., - qkv_bias=False, - qk_scale=None, - drop=0., - attn_drop=0., - drop_path=0., - act_layer=nn.GELU, - norm_layer='nn.LayerNorm', - epsilon=1e-6, - prenorm=True): - super().__init__() - if isinstance(norm_layer, str): - self.norm1 = eval(norm_layer)(dim, epsilon=epsilon) - else: - self.norm1 = norm_layer(dim) - if mixer == 'Global' or mixer == 'Local': - self.mixer = Attention( - dim, - num_heads=num_heads, - mixer=mixer, - HW=HW, - local_k=local_mixer, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - attn_drop=attn_drop, - proj_drop=drop) - elif mixer == 'Conv': - self.mixer = ConvMixer( - dim, num_heads=num_heads, HW=HW, local_k=local_mixer) - else: - raise TypeError("The mixer must be one of [Global, Local, Conv]") - - self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() - if isinstance(norm_layer, str): - self.norm2 = eval(norm_layer)(dim, epsilon=epsilon) - else: - self.norm2 = norm_layer(dim) - mlp_hidden_dim = int(dim * mlp_ratio) - self.mlp_ratio = mlp_ratio - self.mlp = Mlp(in_features=dim, - hidden_features=mlp_hidden_dim, - act_layer=act_layer, - drop=drop) - self.prenorm = prenorm - - def forward(self, x): - if self.prenorm: - x = self.norm1(x + self.drop_path(self.mixer(x))) - x = self.norm2(x + self.drop_path(self.mlp(x))) - else: - x = x + self.drop_path(self.mixer(self.norm1(x))) - x = x + self.drop_path(self.mlp(self.norm2(x))) - return x - - -class PatchEmbed(nn.Layer): - """ Image to Patch Embedding - """ - - def __init__(self, - img_size=[32, 100], - in_channels=3, - embed_dim=768, - sub_num=2): - super().__init__() - num_patches = (img_size[1] // (2 ** sub_num)) * \ - (img_size[0] // (2 ** sub_num)) - self.img_size = img_size - self.num_patches = num_patches - self.embed_dim = embed_dim - self.norm = None - if sub_num == 2: - self.proj = nn.Sequential( - ConvBNLayer( - in_channels=in_channels, - out_channels=embed_dim // 2, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None), - ConvBNLayer( - in_channels=embed_dim // 2, - out_channels=embed_dim, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None)) - if sub_num == 3: - self.proj = nn.Sequential( - ConvBNLayer( - in_channels=in_channels, - out_channels=embed_dim // 4, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None), - ConvBNLayer( - in_channels=embed_dim // 4, - out_channels=embed_dim // 2, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None), - ConvBNLayer( - in_channels=embed_dim // 2, - out_channels=embed_dim, - kernel_size=3, - stride=2, - padding=1, - act=nn.GELU, - bias_attr=None)) - - def forward(self, x): - B, C, H, W = x.shape - assert H == self.img_size[0] and W == self.img_size[1], \ - f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." - x = self.proj(x).flatten(2).transpose((0, 2, 1)) - return x - - -class SubSample(nn.Layer): - def __init__(self, - in_channels, - out_channels, - types='Pool', - stride=[2, 1], - sub_norm='nn.LayerNorm', - act=None): - super().__init__() - self.types = types - if types == 'Pool': - self.avgpool = nn.AvgPool2D( - kernel_size=[3, 5], stride=stride, padding=[1, 2]) - self.maxpool = nn.MaxPool2D( - kernel_size=[3, 5], stride=stride, padding=[1, 2]) - self.proj = nn.Linear(in_channels, out_channels) - else: - self.conv = nn.Conv2D( - in_channels, - out_channels, - kernel_size=3, - stride=stride, - padding=1, - weight_attr=ParamAttr(initializer=KaimingNormal())) - self.norm = eval(sub_norm)(out_channels) - if act is not None: - self.act = act() - else: - self.act = None - - def forward(self, x): - - if self.types == 'Pool': - x1 = self.avgpool(x) - x2 = self.maxpool(x) - x = (x1 + x2) * 0.5 - out = self.proj(x.flatten(2).transpose((0, 2, 1))) - else: - x = self.conv(x) - out = x.flatten(2).transpose((0, 2, 1)) - out = self.norm(out) - if self.act is not None: - out = self.act(out) - - return out - - -class SVTRNet(nn.Layer): - def __init__( - self, - img_size=[32, 100], - in_channels=3, - embed_dim=[64, 128, 256], - depth=[3, 6, 3], - num_heads=[2, 4, 8], - mixer=['Local'] * 6 + ['Global'] * - 6, # Local atten, Global atten, Conv - local_mixer=[[7, 11], [7, 11], [7, 11]], - patch_merging='Conv', # Conv, Pool, None - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - drop_rate=0., - last_drop=0.1, - attn_drop_rate=0., - drop_path_rate=0.1, - norm_layer='nn.LayerNorm', - sub_norm='nn.LayerNorm', - epsilon=1e-6, - out_channels=192, - out_char_num=25, - block_unit='Block', - act='nn.GELU', - last_stage=True, - sub_num=2, - prenorm=True, - use_lenhead=False, - **kwargs): - super().__init__() - self.img_size = img_size - self.embed_dim = embed_dim - self.out_channels = out_channels - self.prenorm = prenorm - patch_merging = None if patch_merging != 'Conv' and patch_merging != 'Pool' else patch_merging - self.patch_embed = PatchEmbed( - img_size=img_size, - in_channels=in_channels, - embed_dim=embed_dim[0], - sub_num=sub_num) - num_patches = self.patch_embed.num_patches - self.HW = [img_size[0] // (2**sub_num), img_size[1] // (2**sub_num)] - self.pos_embed = self.create_parameter( - shape=[1, num_patches, embed_dim[0]], default_initializer=zeros_) - self.add_parameter("pos_embed", self.pos_embed) - self.pos_drop = nn.Dropout(p=drop_rate) - Block_unit = eval(block_unit) - - dpr = np.linspace(0, drop_path_rate, sum(depth)) - self.blocks1 = nn.LayerList([ - Block_unit( - dim=embed_dim[0], - num_heads=num_heads[0], - mixer=mixer[0:depth[0]][i], - HW=self.HW, - local_mixer=local_mixer[0], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - act_layer=eval(act), - attn_drop=attn_drop_rate, - drop_path=dpr[0:depth[0]][i], - norm_layer=norm_layer, - epsilon=epsilon, - prenorm=prenorm) for i in range(depth[0]) - ]) - if patch_merging is not None: - self.sub_sample1 = SubSample( - embed_dim[0], - embed_dim[1], - sub_norm=sub_norm, - stride=[2, 1], - types=patch_merging) - HW = [self.HW[0] // 2, self.HW[1]] - else: - HW = self.HW - self.patch_merging = patch_merging - self.blocks2 = nn.LayerList([ - Block_unit( - dim=embed_dim[1], - num_heads=num_heads[1], - mixer=mixer[depth[0]:depth[0] + depth[1]][i], - HW=HW, - local_mixer=local_mixer[1], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - act_layer=eval(act), - attn_drop=attn_drop_rate, - drop_path=dpr[depth[0]:depth[0] + depth[1]][i], - norm_layer=norm_layer, - epsilon=epsilon, - prenorm=prenorm) for i in range(depth[1]) - ]) - if patch_merging is not None: - self.sub_sample2 = SubSample( - embed_dim[1], - embed_dim[2], - sub_norm=sub_norm, - stride=[2, 1], - types=patch_merging) - HW = [self.HW[0] // 4, self.HW[1]] - else: - HW = self.HW - self.blocks3 = nn.LayerList([ - Block_unit( - dim=embed_dim[2], - num_heads=num_heads[2], - mixer=mixer[depth[0] + depth[1]:][i], - HW=HW, - local_mixer=local_mixer[2], - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - act_layer=eval(act), - attn_drop=attn_drop_rate, - drop_path=dpr[depth[0] + depth[1]:][i], - norm_layer=norm_layer, - epsilon=epsilon, - prenorm=prenorm) for i in range(depth[2]) - ]) - self.last_stage = last_stage - if last_stage: - self.avg_pool = nn.AdaptiveAvgPool2D([1, out_char_num]) - self.last_conv = nn.Conv2D( - in_channels=embed_dim[2], - out_channels=self.out_channels, - kernel_size=1, - stride=1, - padding=0, - bias_attr=False) - self.hardswish = nn.Hardswish() - self.dropout = nn.Dropout(p=last_drop, mode="downscale_in_infer") - if not prenorm: - self.norm = eval(norm_layer)(embed_dim[-1], epsilon=epsilon) - self.use_lenhead = use_lenhead - if use_lenhead: - self.len_conv = nn.Linear(embed_dim[2], self.out_channels) - self.hardswish_len = nn.Hardswish() - self.dropout_len = nn.Dropout( - p=last_drop, mode="downscale_in_infer") - - trunc_normal_(self.pos_embed) - self.apply(self._init_weights) - - def _init_weights(self, m): - if isinstance(m, nn.Linear): - trunc_normal_(m.weight) - if isinstance(m, nn.Linear) and m.bias is not None: - zeros_(m.bias) - elif isinstance(m, nn.LayerNorm): - zeros_(m.bias) - ones_(m.weight) - - def forward_features(self, x): - x = self.patch_embed(x) - x = x + self.pos_embed - x = self.pos_drop(x) - for blk in self.blocks1: - x = blk(x) - if self.patch_merging is not None: - x = self.sub_sample1( - x.transpose([0, 2, 1]).reshape( - [0, self.embed_dim[0], self.HW[0], self.HW[1]])) - for blk in self.blocks2: - x = blk(x) - if self.patch_merging is not None: - x = self.sub_sample2( - x.transpose([0, 2, 1]).reshape( - [0, self.embed_dim[1], self.HW[0] // 2, self.HW[1]])) - for blk in self.blocks3: - x = blk(x) - if not self.prenorm: - x = self.norm(x) - return x - - def forward(self, x): - x = self.forward_features(x) - if self.use_lenhead: - len_x = self.len_conv(x.mean(1)) - len_x = self.dropout_len(self.hardswish_len(len_x)) - if self.last_stage: - if self.patch_merging is not None: - h = self.HW[0] // 4 - else: - h = self.HW[0] - x = self.avg_pool( - x.transpose([0, 2, 1]).reshape( - [0, self.embed_dim[2], h, self.HW[1]])) - x = self.last_conv(x) - x = self.hardswish(x) - x = self.dropout(x) - if self.use_lenhead: - return x, len_x - return x diff --git a/backend/ppocr/modeling/backbones/vqa_layoutlm.py b/backend/ppocr/modeling/backbones/vqa_layoutlm.py deleted file mode 100644 index ede5b7a..0000000 --- a/backend/ppocr/modeling/backbones/vqa_layoutlm.py +++ /dev/null @@ -1,172 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from paddle import nn - -from paddlenlp.transformers import LayoutXLMModel, LayoutXLMForTokenClassification, LayoutXLMForRelationExtraction -from paddlenlp.transformers import LayoutLMModel, LayoutLMForTokenClassification -from paddlenlp.transformers import LayoutLMv2Model, LayoutLMv2ForTokenClassification, LayoutLMv2ForRelationExtraction - -__all__ = ["LayoutXLMForSer", 'LayoutLMForSer'] - -pretrained_model_dict = { - LayoutXLMModel: 'layoutxlm-base-uncased', - LayoutLMModel: 'layoutlm-base-uncased', - LayoutLMv2Model: 'layoutlmv2-base-uncased' -} - - -class NLPBaseModel(nn.Layer): - def __init__(self, - base_model_class, - model_class, - type='ser', - pretrained=True, - checkpoints=None, - **kwargs): - super(NLPBaseModel, self).__init__() - if checkpoints is not None: - self.model = model_class.from_pretrained(checkpoints) - else: - pretrained_model_name = pretrained_model_dict[base_model_class] - if pretrained: - base_model = base_model_class.from_pretrained( - pretrained_model_name) - else: - base_model = base_model_class( - **base_model_class.pretrained_init_configuration[ - pretrained_model_name]) - if type == 'ser': - self.model = model_class( - base_model, num_classes=kwargs['num_classes'], dropout=None) - else: - self.model = model_class(base_model, dropout=None) - self.out_channels = 1 - - -class LayoutLMForSer(NLPBaseModel): - def __init__(self, num_classes, pretrained=True, checkpoints=None, - **kwargs): - super(LayoutLMForSer, self).__init__( - LayoutLMModel, - LayoutLMForTokenClassification, - 'ser', - pretrained, - checkpoints, - num_classes=num_classes) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[2], - attention_mask=x[4], - token_type_ids=x[5], - position_ids=None, - output_hidden_states=False) - return x - - -class LayoutLMv2ForSer(NLPBaseModel): - def __init__(self, num_classes, pretrained=True, checkpoints=None, - **kwargs): - super(LayoutLMv2ForSer, self).__init__( - LayoutLMv2Model, - LayoutLMv2ForTokenClassification, - 'ser', - pretrained, - checkpoints, - num_classes=num_classes) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[2], - image=x[3], - attention_mask=x[4], - token_type_ids=x[5], - position_ids=None, - head_mask=None, - labels=None) - return x[0] - - -class LayoutXLMForSer(NLPBaseModel): - def __init__(self, num_classes, pretrained=True, checkpoints=None, - **kwargs): - super(LayoutXLMForSer, self).__init__( - LayoutXLMModel, - LayoutXLMForTokenClassification, - 'ser', - pretrained, - checkpoints, - num_classes=num_classes) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[2], - image=x[3], - attention_mask=x[4], - token_type_ids=x[5], - position_ids=None, - head_mask=None, - labels=None) - return x[0] - - -class LayoutLMv2ForRe(NLPBaseModel): - def __init__(self, pretrained=True, checkpoints=None, **kwargs): - super(LayoutLMv2ForRe, self).__init__(LayoutLMv2Model, - LayoutLMv2ForRelationExtraction, - 're', pretrained, checkpoints) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[1], - labels=None, - image=x[2], - attention_mask=x[3], - token_type_ids=x[4], - position_ids=None, - head_mask=None, - entities=x[5], - relations=x[6]) - return x - - -class LayoutXLMForRe(NLPBaseModel): - def __init__(self, pretrained=True, checkpoints=None, **kwargs): - super(LayoutXLMForRe, self).__init__(LayoutXLMModel, - LayoutXLMForRelationExtraction, - 're', pretrained, checkpoints) - - def forward(self, x): - x = self.model( - input_ids=x[0], - bbox=x[1], - labels=None, - image=x[2], - attention_mask=x[3], - token_type_ids=x[4], - position_ids=None, - head_mask=None, - entities=x[5], - relations=x[6]) - return x diff --git a/backend/ppocr/modeling/heads/__init__.py b/backend/ppocr/modeling/heads/__init__.py deleted file mode 100755 index 1670ea3..0000000 --- a/backend/ppocr/modeling/heads/__init__.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ['build_head'] - - -def build_head(config): - # det head - from .det_db_head import DBHead - from .det_east_head import EASTHead - from .det_sast_head import SASTHead - from .det_pse_head import PSEHead - from .det_fce_head import FCEHead - from .e2e_pg_head import PGHead - - # rec head - from .rec_ctc_head import CTCHead - from .rec_att_head import AttentionHead - from .rec_srn_head import SRNHead - from .rec_nrtr_head import Transformer - from .rec_sar_head import SARHead - from .rec_aster_head import AsterHead - from .rec_pren_head import PRENHead - from .rec_multi_head import MultiHead - - # cls head - from .cls_head import ClsHead - - #kie head - from .kie_sdmgr_head import SDMGRHead - - from .table_att_head import TableAttentionHead - - support_dict = [ - 'DBHead', 'PSEHead', 'FCEHead', 'EASTHead', 'SASTHead', 'CTCHead', - 'ClsHead', 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer', - 'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead', - 'MultiHead' - ] - - #table head - - module_name = config.pop('name') - assert module_name in support_dict, Exception('head only support {}'.format( - support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/modeling/heads/cls_head.py b/backend/ppocr/modeling/heads/cls_head.py deleted file mode 100644 index 91bfa61..0000000 --- a/backend/ppocr/modeling/heads/cls_head.py +++ /dev/null @@ -1,52 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -import paddle.nn.functional as F - - -class ClsHead(nn.Layer): - """ - Class orientation - - Args: - - params(dict): super parameters for build Class network - """ - - def __init__(self, in_channels, class_dim, **kwargs): - super(ClsHead, self).__init__() - self.pool = nn.AdaptiveAvgPool2D(1) - stdv = 1.0 / math.sqrt(in_channels * 1.0) - self.fc = nn.Linear( - in_channels, - class_dim, - weight_attr=ParamAttr( - name="fc_0.w_0", - initializer=nn.initializer.Uniform(-stdv, stdv)), - bias_attr=ParamAttr(name="fc_0.b_0"), ) - - def forward(self, x, targets=None): - x = self.pool(x) - x = paddle.reshape(x, shape=[x.shape[0], x.shape[1]]) - x = self.fc(x) - if not self.training: - x = F.softmax(x, axis=1) - return x diff --git a/backend/ppocr/modeling/heads/det_db_head.py b/backend/ppocr/modeling/heads/det_db_head.py deleted file mode 100644 index a686ae5..0000000 --- a/backend/ppocr/modeling/heads/det_db_head.py +++ /dev/null @@ -1,118 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -def get_bias_attr(k): - stdv = 1.0 / math.sqrt(k * 1.0) - initializer = paddle.nn.initializer.Uniform(-stdv, stdv) - bias_attr = ParamAttr(initializer=initializer) - return bias_attr - - -class Head(nn.Layer): - def __init__(self, in_channels, name_list, kernel_list=[3, 2, 2], **kwargs): - super(Head, self).__init__() - - self.conv1 = nn.Conv2D( - in_channels=in_channels, - out_channels=in_channels // 4, - kernel_size=kernel_list[0], - padding=int(kernel_list[0] // 2), - weight_attr=ParamAttr(), - bias_attr=False) - self.conv_bn1 = nn.BatchNorm( - num_channels=in_channels // 4, - param_attr=ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0)), - bias_attr=ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1e-4)), - act='relu') - self.conv2 = nn.Conv2DTranspose( - in_channels=in_channels // 4, - out_channels=in_channels // 4, - kernel_size=kernel_list[1], - stride=2, - weight_attr=ParamAttr( - initializer=paddle.nn.initializer.KaimingUniform()), - bias_attr=get_bias_attr(in_channels // 4)) - self.conv_bn2 = nn.BatchNorm( - num_channels=in_channels // 4, - param_attr=ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0)), - bias_attr=ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1e-4)), - act="relu") - self.conv3 = nn.Conv2DTranspose( - in_channels=in_channels // 4, - out_channels=1, - kernel_size=kernel_list[2], - stride=2, - weight_attr=ParamAttr( - initializer=paddle.nn.initializer.KaimingUniform()), - bias_attr=get_bias_attr(in_channels // 4), ) - - def forward(self, x): - x = self.conv1(x) - x = self.conv_bn1(x) - x = self.conv2(x) - x = self.conv_bn2(x) - x = self.conv3(x) - x = F.sigmoid(x) - return x - - -class DBHead(nn.Layer): - """ - Differentiable Binarization (DB) for text detection: - see https://arxiv.org/abs/1911.08947 - args: - params(dict): super parameters for build DB network - """ - - def __init__(self, in_channels, k=50, **kwargs): - super(DBHead, self).__init__() - self.k = k - binarize_name_list = [ - 'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48', - 'conv2d_transpose_1', 'binarize' - ] - thresh_name_list = [ - 'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50', - 'conv2d_transpose_3', 'thresh' - ] - self.binarize = Head(in_channels, binarize_name_list, **kwargs) - self.thresh = Head(in_channels, thresh_name_list, **kwargs) - - def step_function(self, x, y): - return paddle.reciprocal(1 + paddle.exp(-self.k * (x - y))) - - def forward(self, x, targets=None): - shrink_maps = self.binarize(x) - if not self.training: - return {'maps': shrink_maps} - - threshold_maps = self.thresh(x) - binary_maps = self.step_function(shrink_maps, threshold_maps) - y = paddle.concat([shrink_maps, threshold_maps, binary_maps], axis=1) - return {'maps': y} diff --git a/backend/ppocr/modeling/heads/det_east_head.py b/backend/ppocr/modeling/heads/det_east_head.py deleted file mode 100644 index 004eb5d..0000000 --- a/backend/ppocr/modeling/heads/det_east_head.py +++ /dev/null @@ -1,121 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class EASTHead(nn.Layer): - """ - """ - def __init__(self, in_channels, model_name, **kwargs): - super(EASTHead, self).__init__() - self.model_name = model_name - if self.model_name == "large": - num_outputs = [128, 64, 1, 8] - else: - num_outputs = [64, 32, 1, 8] - - self.det_conv1 = ConvBNLayer( - in_channels=in_channels, - out_channels=num_outputs[0], - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="det_head1") - self.det_conv2 = ConvBNLayer( - in_channels=num_outputs[0], - out_channels=num_outputs[1], - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="det_head2") - self.score_conv = ConvBNLayer( - in_channels=num_outputs[1], - out_channels=num_outputs[2], - kernel_size=1, - stride=1, - padding=0, - if_act=False, - act=None, - name="f_score") - self.geo_conv = ConvBNLayer( - in_channels=num_outputs[1], - out_channels=num_outputs[3], - kernel_size=1, - stride=1, - padding=0, - if_act=False, - act=None, - name="f_geo") - - def forward(self, x, targets=None): - f_det = self.det_conv1(x) - f_det = self.det_conv2(f_det) - f_score = self.score_conv(f_det) - f_score = F.sigmoid(f_score) - f_geo = self.geo_conv(f_det) - f_geo = (F.sigmoid(f_geo) - 0.5) * 2 * 800 - - pred = {'f_score': f_score, 'f_geo': f_geo} - return pred diff --git a/backend/ppocr/modeling/heads/det_fce_head.py b/backend/ppocr/modeling/heads/det_fce_head.py deleted file mode 100644 index 9503989..0000000 --- a/backend/ppocr/modeling/heads/det_fce_head.py +++ /dev/null @@ -1,99 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/dense_heads/fce_head.py -""" - -from paddle import nn -from paddle import ParamAttr -import paddle.nn.functional as F -from paddle.nn.initializer import Normal -import paddle -from functools import partial - - -def multi_apply(func, *args, **kwargs): - pfunc = partial(func, **kwargs) if kwargs else func - map_results = map(pfunc, *args) - return tuple(map(list, zip(*map_results))) - - -class FCEHead(nn.Layer): - """The class for implementing FCENet head. - FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped Text - Detection. - - [https://arxiv.org/abs/2104.10442] - - Args: - in_channels (int): The number of input channels. - scales (list[int]) : The scale of each layer. - fourier_degree (int) : The maximum Fourier transform degree k. - """ - - def __init__(self, in_channels, fourier_degree=5): - super().__init__() - assert isinstance(in_channels, int) - - self.downsample_ratio = 1.0 - self.in_channels = in_channels - self.fourier_degree = fourier_degree - self.out_channels_cls = 4 - self.out_channels_reg = (2 * self.fourier_degree + 1) * 2 - - self.out_conv_cls = nn.Conv2D( - in_channels=self.in_channels, - out_channels=self.out_channels_cls, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr( - name='cls_weights', - initializer=Normal( - mean=0., std=0.01)), - bias_attr=True) - self.out_conv_reg = nn.Conv2D( - in_channels=self.in_channels, - out_channels=self.out_channels_reg, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr( - name='reg_weights', - initializer=Normal( - mean=0., std=0.01)), - bias_attr=True) - - def forward(self, feats, targets=None): - cls_res, reg_res = multi_apply(self.forward_single, feats) - level_num = len(cls_res) - outs = {} - if not self.training: - for i in range(level_num): - tr_pred = F.softmax(cls_res[i][:, 0:2, :, :], axis=1) - tcl_pred = F.softmax(cls_res[i][:, 2:, :, :], axis=1) - outs['level_{}'.format(i)] = paddle.concat( - [tr_pred, tcl_pred, reg_res[i]], axis=1) - else: - preds = [[cls_res[i], reg_res[i]] for i in range(level_num)] - outs['levels'] = preds - return outs - - def forward_single(self, x): - cls_predict = self.out_conv_cls(x) - reg_predict = self.out_conv_reg(x) - return cls_predict, reg_predict diff --git a/backend/ppocr/modeling/heads/det_pse_head.py b/backend/ppocr/modeling/heads/det_pse_head.py deleted file mode 100644 index 32a5b48..0000000 --- a/backend/ppocr/modeling/heads/det_pse_head.py +++ /dev/null @@ -1,37 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py -""" - -from paddle import nn - - -class PSEHead(nn.Layer): - def __init__(self, in_channels, hidden_dim=256, out_channels=7, **kwargs): - super(PSEHead, self).__init__() - self.conv1 = nn.Conv2D( - in_channels, hidden_dim, kernel_size=3, stride=1, padding=1) - self.bn1 = nn.BatchNorm2D(hidden_dim) - self.relu1 = nn.ReLU() - - self.conv2 = nn.Conv2D( - hidden_dim, out_channels, kernel_size=1, stride=1, padding=0) - - def forward(self, x, **kwargs): - out = self.conv1(x) - out = self.relu1(self.bn1(out)) - out = self.conv2(out) - return {'maps': out} diff --git a/backend/ppocr/modeling/heads/det_sast_head.py b/backend/ppocr/modeling/heads/det_sast_head.py deleted file mode 100644 index 7a88a2d..0000000 --- a/backend/ppocr/modeling/heads/det_sast_head.py +++ /dev/null @@ -1,128 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class SAST_Header1(nn.Layer): - def __init__(self, in_channels, **kwargs): - super(SAST_Header1, self).__init__() - out_channels = [64, 64, 128] - self.score_conv = nn.Sequential( - ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_score1'), - ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_score2'), - ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_score3'), - ConvBNLayer(out_channels[2], 1, 3, 1, act=None, name='f_score4') - ) - self.border_conv = nn.Sequential( - ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_border1'), - ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_border2'), - ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_border3'), - ConvBNLayer(out_channels[2], 4, 3, 1, act=None, name='f_border4') - ) - - def forward(self, x): - f_score = self.score_conv(x) - f_score = F.sigmoid(f_score) - f_border = self.border_conv(x) - return f_score, f_border - - -class SAST_Header2(nn.Layer): - def __init__(self, in_channels, **kwargs): - super(SAST_Header2, self).__init__() - out_channels = [64, 64, 128] - self.tvo_conv = nn.Sequential( - ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tvo1'), - ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tvo2'), - ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tvo3'), - ConvBNLayer(out_channels[2], 8, 3, 1, act=None, name='f_tvo4') - ) - self.tco_conv = nn.Sequential( - ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tco1'), - ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tco2'), - ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tco3'), - ConvBNLayer(out_channels[2], 2, 3, 1, act=None, name='f_tco4') - ) - - def forward(self, x): - f_tvo = self.tvo_conv(x) - f_tco = self.tco_conv(x) - return f_tvo, f_tco - - -class SASTHead(nn.Layer): - """ - """ - def __init__(self, in_channels, **kwargs): - super(SASTHead, self).__init__() - - self.head1 = SAST_Header1(in_channels) - self.head2 = SAST_Header2(in_channels) - - def forward(self, x, targets=None): - f_score, f_border = self.head1(x) - f_tvo, f_tco = self.head2(x) - - predicts = {} - predicts['f_score'] = f_score - predicts['f_border'] = f_border - predicts['f_tvo'] = f_tvo - predicts['f_tco'] = f_tco - return predicts \ No newline at end of file diff --git a/backend/ppocr/modeling/heads/e2e_pg_head.py b/backend/ppocr/modeling/heads/e2e_pg_head.py deleted file mode 100644 index 274e1cd..0000000 --- a/backend/ppocr/modeling/heads/e2e_pg_head.py +++ /dev/null @@ -1,253 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance", - use_global_stats=False) - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class PGHead(nn.Layer): - """ - """ - - def __init__(self, in_channels, **kwargs): - super(PGHead, self).__init__() - self.conv_f_score1 = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_score{}".format(1)) - self.conv_f_score2 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_score{}".format(2)) - self.conv_f_score3 = ConvBNLayer( - in_channels=64, - out_channels=128, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_score{}".format(3)) - - self.conv1 = nn.Conv2D( - in_channels=128, - out_channels=1, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr(name="conv_f_score{}".format(4)), - bias_attr=False) - - self.conv_f_boder1 = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_boder{}".format(1)) - self.conv_f_boder2 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_boder{}".format(2)) - self.conv_f_boder3 = ConvBNLayer( - in_channels=64, - out_channels=128, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_boder{}".format(3)) - self.conv2 = nn.Conv2D( - in_channels=128, - out_channels=4, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr(name="conv_f_boder{}".format(4)), - bias_attr=False) - self.conv_f_char1 = ConvBNLayer( - in_channels=in_channels, - out_channels=128, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_char{}".format(1)) - self.conv_f_char2 = ConvBNLayer( - in_channels=128, - out_channels=128, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_char{}".format(2)) - self.conv_f_char3 = ConvBNLayer( - in_channels=128, - out_channels=256, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_char{}".format(3)) - self.conv_f_char4 = ConvBNLayer( - in_channels=256, - out_channels=256, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_char{}".format(4)) - self.conv_f_char5 = ConvBNLayer( - in_channels=256, - out_channels=256, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_char{}".format(5)) - self.conv3 = nn.Conv2D( - in_channels=256, - out_channels=37, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr(name="conv_f_char{}".format(6)), - bias_attr=False) - - self.conv_f_direc1 = ConvBNLayer( - in_channels=in_channels, - out_channels=64, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_direc{}".format(1)) - self.conv_f_direc2 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - padding=1, - act='relu', - name="conv_f_direc{}".format(2)) - self.conv_f_direc3 = ConvBNLayer( - in_channels=64, - out_channels=128, - kernel_size=1, - stride=1, - padding=0, - act='relu', - name="conv_f_direc{}".format(3)) - self.conv4 = nn.Conv2D( - in_channels=128, - out_channels=2, - kernel_size=3, - stride=1, - padding=1, - groups=1, - weight_attr=ParamAttr(name="conv_f_direc{}".format(4)), - bias_attr=False) - - def forward(self, x, targets=None): - f_score = self.conv_f_score1(x) - f_score = self.conv_f_score2(f_score) - f_score = self.conv_f_score3(f_score) - f_score = self.conv1(f_score) - f_score = F.sigmoid(f_score) - - # f_border - f_border = self.conv_f_boder1(x) - f_border = self.conv_f_boder2(f_border) - f_border = self.conv_f_boder3(f_border) - f_border = self.conv2(f_border) - - f_char = self.conv_f_char1(x) - f_char = self.conv_f_char2(f_char) - f_char = self.conv_f_char3(f_char) - f_char = self.conv_f_char4(f_char) - f_char = self.conv_f_char5(f_char) - f_char = self.conv3(f_char) - - f_direction = self.conv_f_direc1(x) - f_direction = self.conv_f_direc2(f_direction) - f_direction = self.conv_f_direc3(f_direction) - f_direction = self.conv4(f_direction) - - predicts = {} - predicts['f_score'] = f_score - predicts['f_border'] = f_border - predicts['f_char'] = f_char - predicts['f_direction'] = f_direction - return predicts diff --git a/backend/ppocr/modeling/heads/kie_sdmgr_head.py b/backend/ppocr/modeling/heads/kie_sdmgr_head.py deleted file mode 100644 index ac5f73f..0000000 --- a/backend/ppocr/modeling/heads/kie_sdmgr_head.py +++ /dev/null @@ -1,207 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/heads/sdmgr_head.py - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class SDMGRHead(nn.Layer): - def __init__(self, - in_channels, - num_chars=92, - visual_dim=16, - fusion_dim=1024, - node_input=32, - node_embed=256, - edge_input=5, - edge_embed=256, - num_gnn=2, - num_classes=26, - bidirectional=False): - super().__init__() - - self.fusion = Block([visual_dim, node_embed], node_embed, fusion_dim) - self.node_embed = nn.Embedding(num_chars, node_input, 0) - hidden = node_embed // 2 if bidirectional else node_embed - self.rnn = nn.LSTM( - input_size=node_input, hidden_size=hidden, num_layers=1) - self.edge_embed = nn.Linear(edge_input, edge_embed) - self.gnn_layers = nn.LayerList( - [GNNLayer(node_embed, edge_embed) for _ in range(num_gnn)]) - self.node_cls = nn.Linear(node_embed, num_classes) - self.edge_cls = nn.Linear(edge_embed, 2) - - def forward(self, input, targets): - relations, texts, x = input - node_nums, char_nums = [], [] - for text in texts: - node_nums.append(text.shape[0]) - char_nums.append(paddle.sum((text > -1).astype(int), axis=-1)) - - max_num = max([char_num.max() for char_num in char_nums]) - all_nodes = paddle.concat([ - paddle.concat( - [text, paddle.zeros( - (text.shape[0], max_num - text.shape[1]))], -1) - for text in texts - ]) - temp = paddle.clip(all_nodes, min=0).astype(int) - embed_nodes = self.node_embed(temp) - rnn_nodes, _ = self.rnn(embed_nodes) - - b, h, w = rnn_nodes.shape - nodes = paddle.zeros([b, w]) - all_nums = paddle.concat(char_nums) - valid = paddle.nonzero((all_nums > 0).astype(int)) - temp_all_nums = ( - paddle.gather(all_nums, valid) - 1).unsqueeze(-1).unsqueeze(-1) - temp_all_nums = paddle.expand(temp_all_nums, [ - temp_all_nums.shape[0], temp_all_nums.shape[1], rnn_nodes.shape[-1] - ]) - temp_all_nodes = paddle.gather(rnn_nodes, valid) - N, C, A = temp_all_nodes.shape - one_hot = F.one_hot( - temp_all_nums[:, 0, :], num_classes=C).transpose([0, 2, 1]) - one_hot = paddle.multiply( - temp_all_nodes, one_hot.astype("float32")).sum(axis=1, keepdim=True) - t = one_hot.expand([N, 1, A]).squeeze(1) - nodes = paddle.scatter(nodes, valid.squeeze(1), t) - - if x is not None: - nodes = self.fusion([x, nodes]) - - all_edges = paddle.concat( - [rel.reshape([-1, rel.shape[-1]]) for rel in relations]) - embed_edges = self.edge_embed(all_edges.astype('float32')) - embed_edges = F.normalize(embed_edges) - - for gnn_layer in self.gnn_layers: - nodes, cat_nodes = gnn_layer(nodes, embed_edges, node_nums) - - node_cls, edge_cls = self.node_cls(nodes), self.edge_cls(cat_nodes) - return node_cls, edge_cls - - -class GNNLayer(nn.Layer): - def __init__(self, node_dim=256, edge_dim=256): - super().__init__() - self.in_fc = nn.Linear(node_dim * 2 + edge_dim, node_dim) - self.coef_fc = nn.Linear(node_dim, 1) - self.out_fc = nn.Linear(node_dim, node_dim) - self.relu = nn.ReLU() - - def forward(self, nodes, edges, nums): - start, cat_nodes = 0, [] - for num in nums: - sample_nodes = nodes[start:start + num] - cat_nodes.append( - paddle.concat([ - paddle.expand(sample_nodes.unsqueeze(1), [-1, num, -1]), - paddle.expand(sample_nodes.unsqueeze(0), [num, -1, -1]) - ], -1).reshape([num**2, -1])) - start += num - cat_nodes = paddle.concat([paddle.concat(cat_nodes), edges], -1) - cat_nodes = self.relu(self.in_fc(cat_nodes)) - coefs = self.coef_fc(cat_nodes) - - start, residuals = 0, [] - for num in nums: - residual = F.softmax( - -paddle.eye(num).unsqueeze(-1) * 1e9 + - coefs[start:start + num**2].reshape([num, num, -1]), 1) - residuals.append((residual * cat_nodes[start:start + num**2] - .reshape([num, num, -1])).sum(1)) - start += num**2 - - nodes += self.relu(self.out_fc(paddle.concat(residuals))) - return [nodes, cat_nodes] - - -class Block(nn.Layer): - def __init__(self, - input_dims, - output_dim, - mm_dim=1600, - chunks=20, - rank=15, - shared=False, - dropout_input=0., - dropout_pre_lin=0., - dropout_output=0., - pos_norm='before_cat'): - super().__init__() - self.rank = rank - self.dropout_input = dropout_input - self.dropout_pre_lin = dropout_pre_lin - self.dropout_output = dropout_output - assert (pos_norm in ['before_cat', 'after_cat']) - self.pos_norm = pos_norm - # Modules - self.linear0 = nn.Linear(input_dims[0], mm_dim) - self.linear1 = (self.linear0 - if shared else nn.Linear(input_dims[1], mm_dim)) - self.merge_linears0 = nn.LayerList() - self.merge_linears1 = nn.LayerList() - self.chunks = self.chunk_sizes(mm_dim, chunks) - for size in self.chunks: - ml0 = nn.Linear(size, size * rank) - self.merge_linears0.append(ml0) - ml1 = ml0 if shared else nn.Linear(size, size * rank) - self.merge_linears1.append(ml1) - self.linear_out = nn.Linear(mm_dim, output_dim) - - def forward(self, x): - x0 = self.linear0(x[0]) - x1 = self.linear1(x[1]) - bs = x1.shape[0] - if self.dropout_input > 0: - x0 = F.dropout(x0, p=self.dropout_input, training=self.training) - x1 = F.dropout(x1, p=self.dropout_input, training=self.training) - x0_chunks = paddle.split(x0, self.chunks, -1) - x1_chunks = paddle.split(x1, self.chunks, -1) - zs = [] - for x0_c, x1_c, m0, m1 in zip(x0_chunks, x1_chunks, self.merge_linears0, - self.merge_linears1): - m = m0(x0_c) * m1(x1_c) # bs x split_size*rank - m = m.reshape([bs, self.rank, -1]) - z = paddle.sum(m, 1) - if self.pos_norm == 'before_cat': - z = paddle.sqrt(F.relu(z)) - paddle.sqrt(F.relu(-z)) - z = F.normalize(z) - zs.append(z) - z = paddle.concat(zs, 1) - if self.pos_norm == 'after_cat': - z = paddle.sqrt(F.relu(z)) - paddle.sqrt(F.relu(-z)) - z = F.normalize(z) - - if self.dropout_pre_lin > 0: - z = F.dropout(z, p=self.dropout_pre_lin, training=self.training) - z = self.linear_out(z) - if self.dropout_output > 0: - z = F.dropout(z, p=self.dropout_output, training=self.training) - return z - - def chunk_sizes(self, dim, chunks): - split_size = (dim + chunks - 1) // chunks - sizes_list = [split_size] * chunks - sizes_list[-1] = sizes_list[-1] - (sum(sizes_list) - dim) - return sizes_list diff --git a/backend/ppocr/modeling/heads/multiheadAttention.py b/backend/ppocr/modeling/heads/multiheadAttention.py deleted file mode 100755 index 900865b..0000000 --- a/backend/ppocr/modeling/heads/multiheadAttention.py +++ /dev/null @@ -1,163 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle.nn import Linear -from paddle.nn.initializer import XavierUniform as xavier_uniform_ -from paddle.nn.initializer import Constant as constant_ -from paddle.nn.initializer import XavierNormal as xavier_normal_ - -zeros_ = constant_(value=0.) -ones_ = constant_(value=1.) - - -class MultiheadAttention(nn.Layer): - """Allows the model to jointly attend to information - from different representation subspaces. - See reference: Attention Is All You Need - - .. math:: - \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O - \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V) - - Args: - embed_dim: total dimension of the model - num_heads: parallel attention layers, or heads - - """ - - def __init__(self, - embed_dim, - num_heads, - dropout=0., - bias=True, - add_bias_kv=False, - add_zero_attn=False): - super(MultiheadAttention, self).__init__() - self.embed_dim = embed_dim - self.num_heads = num_heads - self.dropout = dropout - self.head_dim = embed_dim // num_heads - assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" - self.scaling = self.head_dim**-0.5 - self.out_proj = Linear(embed_dim, embed_dim, bias_attr=bias) - self._reset_parameters() - self.conv1 = paddle.nn.Conv2D( - in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1)) - self.conv2 = paddle.nn.Conv2D( - in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1)) - self.conv3 = paddle.nn.Conv2D( - in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1)) - - def _reset_parameters(self): - xavier_uniform_(self.out_proj.weight) - - def forward(self, - query, - key, - value, - key_padding_mask=None, - incremental_state=None, - attn_mask=None): - """ - Inputs of forward function - query: [target length, batch size, embed dim] - key: [sequence length, batch size, embed dim] - value: [sequence length, batch size, embed dim] - key_padding_mask: if True, mask padding based on batch size - incremental_state: if provided, previous time steps are cashed - need_weights: output attn_output_weights - static_kv: key and value are static - - Outputs of forward function - attn_output: [target length, batch size, embed dim] - attn_output_weights: [batch size, target length, sequence length] - """ - q_shape = paddle.shape(query) - src_shape = paddle.shape(key) - q = self._in_proj_q(query) - k = self._in_proj_k(key) - v = self._in_proj_v(value) - q *= self.scaling - q = paddle.transpose( - paddle.reshape( - q, [q_shape[0], q_shape[1], self.num_heads, self.head_dim]), - [1, 2, 0, 3]) - k = paddle.transpose( - paddle.reshape( - k, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]), - [1, 2, 0, 3]) - v = paddle.transpose( - paddle.reshape( - v, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]), - [1, 2, 0, 3]) - if key_padding_mask is not None: - assert key_padding_mask.shape[0] == q_shape[1] - assert key_padding_mask.shape[1] == src_shape[0] - attn_output_weights = paddle.matmul(q, - paddle.transpose(k, [0, 1, 3, 2])) - if attn_mask is not None: - attn_mask = paddle.unsqueeze(paddle.unsqueeze(attn_mask, 0), 0) - attn_output_weights += attn_mask - if key_padding_mask is not None: - attn_output_weights = paddle.reshape( - attn_output_weights, - [q_shape[1], self.num_heads, q_shape[0], src_shape[0]]) - key = paddle.unsqueeze(paddle.unsqueeze(key_padding_mask, 1), 2) - key = paddle.cast(key, 'float32') - y = paddle.full( - shape=paddle.shape(key), dtype='float32', fill_value='-inf') - y = paddle.where(key == 0., key, y) - attn_output_weights += y - attn_output_weights = F.softmax( - attn_output_weights.astype('float32'), - axis=-1, - dtype=paddle.float32 if attn_output_weights.dtype == paddle.float16 - else attn_output_weights.dtype) - attn_output_weights = F.dropout( - attn_output_weights, p=self.dropout, training=self.training) - - attn_output = paddle.matmul(attn_output_weights, v) - attn_output = paddle.reshape( - paddle.transpose(attn_output, [2, 0, 1, 3]), - [q_shape[0], q_shape[1], self.embed_dim]) - attn_output = self.out_proj(attn_output) - - return attn_output - - def _in_proj_q(self, query): - query = paddle.transpose(query, [1, 2, 0]) - query = paddle.unsqueeze(query, axis=2) - res = self.conv1(query) - res = paddle.squeeze(res, axis=2) - res = paddle.transpose(res, [2, 0, 1]) - return res - - def _in_proj_k(self, key): - key = paddle.transpose(key, [1, 2, 0]) - key = paddle.unsqueeze(key, axis=2) - res = self.conv2(key) - res = paddle.squeeze(res, axis=2) - res = paddle.transpose(res, [2, 0, 1]) - return res - - def _in_proj_v(self, value): - value = paddle.transpose(value, [1, 2, 0]) #(1, 2, 0) - value = paddle.unsqueeze(value, axis=2) - res = self.conv3(value) - res = paddle.squeeze(res, axis=2) - res = paddle.transpose(res, [2, 0, 1]) - return res diff --git a/backend/ppocr/modeling/heads/rec_aster_head.py b/backend/ppocr/modeling/heads/rec_aster_head.py deleted file mode 100644 index c95e8fd..0000000 --- a/backend/ppocr/modeling/heads/rec_aster_head.py +++ /dev/null @@ -1,393 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/attention_recognition_head.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys - -import paddle -from paddle import nn -from paddle.nn import functional as F - - -class AsterHead(nn.Layer): - def __init__(self, - in_channels, - out_channels, - sDim, - attDim, - max_len_labels, - time_step=25, - beam_width=5, - **kwargs): - super(AsterHead, self).__init__() - self.num_classes = out_channels - self.in_planes = in_channels - self.sDim = sDim - self.attDim = attDim - self.max_len_labels = max_len_labels - self.decoder = AttentionRecognitionHead(in_channels, out_channels, sDim, - attDim, max_len_labels) - self.time_step = time_step - self.embeder = Embedding(self.time_step, in_channels) - self.beam_width = beam_width - self.eos = self.num_classes - 3 - - def forward(self, x, targets=None, embed=None): - return_dict = {} - embedding_vectors = self.embeder(x) - - if self.training: - rec_targets, rec_lengths, _ = targets - rec_pred = self.decoder([x, rec_targets, rec_lengths], - embedding_vectors) - return_dict['rec_pred'] = rec_pred - return_dict['embedding_vectors'] = embedding_vectors - else: - rec_pred, rec_pred_scores = self.decoder.beam_search( - x, self.beam_width, self.eos, embedding_vectors) - return_dict['rec_pred'] = rec_pred - return_dict['rec_pred_scores'] = rec_pred_scores - return_dict['embedding_vectors'] = embedding_vectors - - return return_dict - - -class Embedding(nn.Layer): - def __init__(self, in_timestep, in_planes, mid_dim=4096, embed_dim=300): - super(Embedding, self).__init__() - self.in_timestep = in_timestep - self.in_planes = in_planes - self.embed_dim = embed_dim - self.mid_dim = mid_dim - self.eEmbed = nn.Linear( - in_timestep * in_planes, - self.embed_dim) # Embed encoder output to a word-embedding like - - def forward(self, x): - x = paddle.reshape(x, [paddle.shape(x)[0], -1]) - x = self.eEmbed(x) - return x - - -class AttentionRecognitionHead(nn.Layer): - """ - input: [b x 16 x 64 x in_planes] - output: probability sequence: [b x T x num_classes] - """ - - def __init__(self, in_channels, out_channels, sDim, attDim, max_len_labels): - super(AttentionRecognitionHead, self).__init__() - self.num_classes = out_channels # this is the output classes. So it includes the . - self.in_planes = in_channels - self.sDim = sDim - self.attDim = attDim - self.max_len_labels = max_len_labels - - self.decoder = DecoderUnit( - sDim=sDim, xDim=in_channels, yDim=self.num_classes, attDim=attDim) - - def forward(self, x, embed): - x, targets, lengths = x - batch_size = paddle.shape(x)[0] - # Decoder - state = self.decoder.get_initial_state(embed) - outputs = [] - for i in range(max(lengths)): - if i == 0: - y_prev = paddle.full( - shape=[batch_size], fill_value=self.num_classes) - else: - y_prev = targets[:, i - 1] - output, state = self.decoder(x, state, y_prev) - outputs.append(output) - outputs = paddle.concat([_.unsqueeze(1) for _ in outputs], 1) - return outputs - - # inference stage. - def sample(self, x): - x, _, _ = x - batch_size = x.size(0) - # Decoder - state = paddle.zeros([1, batch_size, self.sDim]) - - predicted_ids, predicted_scores = [], [] - for i in range(self.max_len_labels): - if i == 0: - y_prev = paddle.full( - shape=[batch_size], fill_value=self.num_classes) - else: - y_prev = predicted - - output, state = self.decoder(x, state, y_prev) - output = F.softmax(output, axis=1) - score, predicted = output.max(1) - predicted_ids.append(predicted.unsqueeze(1)) - predicted_scores.append(score.unsqueeze(1)) - predicted_ids = paddle.concat([predicted_ids, 1]) - predicted_scores = paddle.concat([predicted_scores, 1]) - # return predicted_ids.squeeze(), predicted_scores.squeeze() - return predicted_ids, predicted_scores - - def beam_search(self, x, beam_width, eos, embed): - def _inflate(tensor, times, dim): - repeat_dims = [1] * tensor.dim() - repeat_dims[dim] = times - output = paddle.tile(tensor, repeat_dims) - return output - - # https://github.com/IBM/pytorch-seq2seq/blob/fede87655ddce6c94b38886089e05321dc9802af/seq2seq/models/TopKDecoder.py - batch_size, l, d = x.shape - x = paddle.tile( - paddle.transpose( - x.unsqueeze(1), perm=[1, 0, 2, 3]), [beam_width, 1, 1, 1]) - inflated_encoder_feats = paddle.reshape( - paddle.transpose( - x, perm=[1, 0, 2, 3]), [-1, l, d]) - - # Initialize the decoder - state = self.decoder.get_initial_state(embed, tile_times=beam_width) - - pos_index = paddle.reshape( - paddle.arange(batch_size) * beam_width, shape=[-1, 1]) - - # Initialize the scores - sequence_scores = paddle.full( - shape=[batch_size * beam_width, 1], fill_value=-float('Inf')) - index = [i * beam_width for i in range(0, batch_size)] - sequence_scores[index] = 0.0 - - # Initialize the input vector - y_prev = paddle.full( - shape=[batch_size * beam_width], fill_value=self.num_classes) - - # Store decisions for backtracking - stored_scores = list() - stored_predecessors = list() - stored_emitted_symbols = list() - - for i in range(self.max_len_labels): - output, state = self.decoder(inflated_encoder_feats, state, y_prev) - state = paddle.unsqueeze(state, axis=0) - log_softmax_output = paddle.nn.functional.log_softmax( - output, axis=1) - - sequence_scores = _inflate(sequence_scores, self.num_classes, 1) - sequence_scores += log_softmax_output - scores, candidates = paddle.topk( - paddle.reshape(sequence_scores, [batch_size, -1]), - beam_width, - axis=1) - - # Reshape input = (bk, 1) and sequence_scores = (bk, 1) - y_prev = paddle.reshape( - candidates % self.num_classes, shape=[batch_size * beam_width]) - sequence_scores = paddle.reshape( - scores, shape=[batch_size * beam_width, 1]) - - # Update fields for next timestep - pos_index = paddle.expand_as(pos_index, candidates) - predecessors = paddle.cast( - candidates / self.num_classes + pos_index, dtype='int64') - predecessors = paddle.reshape( - predecessors, shape=[batch_size * beam_width, 1]) - state = paddle.index_select( - state, index=predecessors.squeeze(), axis=1) - - # Update sequence socres and erase scores for symbol so that they aren't expanded - stored_scores.append(sequence_scores.clone()) - y_prev = paddle.reshape(y_prev, shape=[-1, 1]) - eos_prev = paddle.full_like(y_prev, fill_value=eos) - mask = eos_prev == y_prev - mask = paddle.nonzero(mask) - if mask.dim() > 0: - sequence_scores = sequence_scores.numpy() - mask = mask.numpy() - sequence_scores[mask] = -float('inf') - sequence_scores = paddle.to_tensor(sequence_scores) - - # Cache results for backtracking - stored_predecessors.append(predecessors) - y_prev = paddle.squeeze(y_prev) - stored_emitted_symbols.append(y_prev) - - # Do backtracking to return the optimal values - #====== backtrak ======# - # Initialize return variables given different types - p = list() - l = [[self.max_len_labels] * beam_width for _ in range(batch_size) - ] # Placeholder for lengths of top-k sequences - - # the last step output of the beams are not sorted - # thus they are sorted here - sorted_score, sorted_idx = paddle.topk( - paddle.reshape( - stored_scores[-1], shape=[batch_size, beam_width]), - beam_width) - - # initialize the sequence scores with the sorted last step beam scores - s = sorted_score.clone() - - batch_eos_found = [0] * batch_size # the number of EOS found - # in the backward loop below for each batch - t = self.max_len_labels - 1 - # initialize the back pointer with the sorted order of the last step beams. - # add pos_index for indexing variable with b*k as the first dimension. - t_predecessors = paddle.reshape( - sorted_idx + pos_index.expand_as(sorted_idx), - shape=[batch_size * beam_width]) - while t >= 0: - # Re-order the variables with the back pointer - current_symbol = paddle.index_select( - stored_emitted_symbols[t], index=t_predecessors, axis=0) - t_predecessors = paddle.index_select( - stored_predecessors[t].squeeze(), index=t_predecessors, axis=0) - eos_indices = stored_emitted_symbols[t] == eos - eos_indices = paddle.nonzero(eos_indices) - - if eos_indices.dim() > 0: - for i in range(eos_indices.shape[0] - 1, -1, -1): - # Indices of the EOS symbol for both variables - # with b*k as the first dimension, and b, k for - # the first two dimensions - idx = eos_indices[i] - b_idx = int(idx[0] / beam_width) - # The indices of the replacing position - # according to the replacement strategy noted above - res_k_idx = beam_width - (batch_eos_found[b_idx] % - beam_width) - 1 - batch_eos_found[b_idx] += 1 - res_idx = b_idx * beam_width + res_k_idx - - # Replace the old information in return variables - # with the new ended sequence information - t_predecessors[res_idx] = stored_predecessors[t][idx[0]] - current_symbol[res_idx] = stored_emitted_symbols[t][idx[0]] - s[b_idx, res_k_idx] = stored_scores[t][idx[0], 0] - l[b_idx][res_k_idx] = t + 1 - - # record the back tracked results - p.append(current_symbol) - t -= 1 - - # Sort and re-order again as the added ended sequences may change - # the order (very unlikely) - s, re_sorted_idx = s.topk(beam_width) - for b_idx in range(batch_size): - l[b_idx] = [ - l[b_idx][k_idx.item()] for k_idx in re_sorted_idx[b_idx, :] - ] - - re_sorted_idx = paddle.reshape( - re_sorted_idx + pos_index.expand_as(re_sorted_idx), - [batch_size * beam_width]) - - # Reverse the sequences and re-order at the same time - # It is reversed because the backtracking happens in reverse time order - p = [ - paddle.reshape( - paddle.index_select(step, re_sorted_idx, 0), - shape=[batch_size, beam_width, -1]) for step in reversed(p) - ] - p = paddle.concat(p, -1)[:, 0, :] - return p, paddle.ones_like(p) - - -class AttentionUnit(nn.Layer): - def __init__(self, sDim, xDim, attDim): - super(AttentionUnit, self).__init__() - - self.sDim = sDim - self.xDim = xDim - self.attDim = attDim - - self.sEmbed = nn.Linear(sDim, attDim) - self.xEmbed = nn.Linear(xDim, attDim) - self.wEmbed = nn.Linear(attDim, 1) - - def forward(self, x, sPrev): - batch_size, T, _ = x.shape # [b x T x xDim] - x = paddle.reshape(x, [-1, self.xDim]) # [(b x T) x xDim] - xProj = self.xEmbed(x) # [(b x T) x attDim] - xProj = paddle.reshape(xProj, [batch_size, T, -1]) # [b x T x attDim] - - sPrev = sPrev.squeeze(0) - sProj = self.sEmbed(sPrev) # [b x attDim] - sProj = paddle.unsqueeze(sProj, 1) # [b x 1 x attDim] - sProj = paddle.expand(sProj, - [batch_size, T, self.attDim]) # [b x T x attDim] - - sumTanh = paddle.tanh(sProj + xProj) - sumTanh = paddle.reshape(sumTanh, [-1, self.attDim]) - - vProj = self.wEmbed(sumTanh) # [(b x T) x 1] - vProj = paddle.reshape(vProj, [batch_size, T]) - alpha = F.softmax( - vProj, axis=1) # attention weights for each sample in the minibatch - return alpha - - -class DecoderUnit(nn.Layer): - def __init__(self, sDim, xDim, yDim, attDim): - super(DecoderUnit, self).__init__() - self.sDim = sDim - self.xDim = xDim - self.yDim = yDim - self.attDim = attDim - self.emdDim = attDim - - self.attention_unit = AttentionUnit(sDim, xDim, attDim) - self.tgt_embedding = nn.Embedding( - yDim + 1, self.emdDim, weight_attr=nn.initializer.Normal( - std=0.01)) # the last is used for - self.gru = nn.GRUCell(input_size=xDim + self.emdDim, hidden_size=sDim) - self.fc = nn.Linear( - sDim, - yDim, - weight_attr=nn.initializer.Normal(std=0.01), - bias_attr=nn.initializer.Constant(value=0)) - self.embed_fc = nn.Linear(300, self.sDim) - - def get_initial_state(self, embed, tile_times=1): - assert embed.shape[1] == 300 - state = self.embed_fc(embed) # N * sDim - if tile_times != 1: - state = state.unsqueeze(1) - trans_state = paddle.transpose(state, perm=[1, 0, 2]) - state = paddle.tile(trans_state, repeat_times=[tile_times, 1, 1]) - trans_state = paddle.transpose(state, perm=[1, 0, 2]) - state = paddle.reshape(trans_state, shape=[-1, self.sDim]) - state = state.unsqueeze(0) # 1 * N * sDim - return state - - def forward(self, x, sPrev, yPrev): - # x: feature sequence from the image decoder. - batch_size, T, _ = x.shape - alpha = self.attention_unit(x, sPrev) - context = paddle.squeeze(paddle.matmul(alpha.unsqueeze(1), x), axis=1) - yPrev = paddle.cast(yPrev, dtype="int64") - yProj = self.tgt_embedding(yPrev) - - concat_context = paddle.concat([yProj, context], 1) - concat_context = paddle.squeeze(concat_context, 1) - sPrev = paddle.squeeze(sPrev, 0) - output, state = self.gru(concat_context, sPrev) - output = paddle.squeeze(output, axis=1) - output = self.fc(output) - return output, state \ No newline at end of file diff --git a/backend/ppocr/modeling/heads/rec_att_head.py b/backend/ppocr/modeling/heads/rec_att_head.py deleted file mode 100644 index ab8b119..0000000 --- a/backend/ppocr/modeling/heads/rec_att_head.py +++ /dev/null @@ -1,202 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F -import numpy as np - - -class AttentionHead(nn.Layer): - def __init__(self, in_channels, out_channels, hidden_size, **kwargs): - super(AttentionHead, self).__init__() - self.input_size = in_channels - self.hidden_size = hidden_size - self.num_classes = out_channels - - self.attention_cell = AttentionGRUCell( - in_channels, hidden_size, out_channels, use_gru=False) - self.generator = nn.Linear(hidden_size, out_channels) - - def _char_to_onehot(self, input_char, onehot_dim): - input_ont_hot = F.one_hot(input_char, onehot_dim) - return input_ont_hot - - def forward(self, inputs, targets=None, batch_max_length=25): - batch_size = paddle.shape(inputs)[0] - num_steps = batch_max_length - - hidden = paddle.zeros((batch_size, self.hidden_size)) - output_hiddens = [] - - if targets is not None: - for i in range(num_steps): - char_onehots = self._char_to_onehot( - targets[:, i], onehot_dim=self.num_classes) - (outputs, hidden), alpha = self.attention_cell(hidden, inputs, - char_onehots) - output_hiddens.append(paddle.unsqueeze(outputs, axis=1)) - output = paddle.concat(output_hiddens, axis=1) - probs = self.generator(output) - else: - targets = paddle.zeros(shape=[batch_size], dtype="int32") - probs = None - char_onehots = None - outputs = None - alpha = None - - for i in range(num_steps): - char_onehots = self._char_to_onehot( - targets, onehot_dim=self.num_classes) - (outputs, hidden), alpha = self.attention_cell(hidden, inputs, - char_onehots) - probs_step = self.generator(outputs) - if probs is None: - probs = paddle.unsqueeze(probs_step, axis=1) - else: - probs = paddle.concat( - [probs, paddle.unsqueeze( - probs_step, axis=1)], axis=1) - next_input = probs_step.argmax(axis=1) - targets = next_input - if not self.training: - probs = paddle.nn.functional.softmax(probs, axis=2) - return probs - - -class AttentionGRUCell(nn.Layer): - def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): - super(AttentionGRUCell, self).__init__() - self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) - self.h2h = nn.Linear(hidden_size, hidden_size) - self.score = nn.Linear(hidden_size, 1, bias_attr=False) - - self.rnn = nn.GRUCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - - self.hidden_size = hidden_size - - def forward(self, prev_hidden, batch_H, char_onehots): - - batch_H_proj = self.i2h(batch_H) - prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1) - - res = paddle.add(batch_H_proj, prev_hidden_proj) - res = paddle.tanh(res) - e = self.score(res) - - alpha = F.softmax(e, axis=1) - alpha = paddle.transpose(alpha, [0, 2, 1]) - context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) - concat_context = paddle.concat([context, char_onehots], 1) - - cur_hidden = self.rnn(concat_context, prev_hidden) - - return cur_hidden, alpha - - -class AttentionLSTM(nn.Layer): - def __init__(self, in_channels, out_channels, hidden_size, **kwargs): - super(AttentionLSTM, self).__init__() - self.input_size = in_channels - self.hidden_size = hidden_size - self.num_classes = out_channels - - self.attention_cell = AttentionLSTMCell( - in_channels, hidden_size, out_channels, use_gru=False) - self.generator = nn.Linear(hidden_size, out_channels) - - def _char_to_onehot(self, input_char, onehot_dim): - input_ont_hot = F.one_hot(input_char, onehot_dim) - return input_ont_hot - - def forward(self, inputs, targets=None, batch_max_length=25): - batch_size = inputs.shape[0] - num_steps = batch_max_length - - hidden = (paddle.zeros((batch_size, self.hidden_size)), paddle.zeros( - (batch_size, self.hidden_size))) - output_hiddens = [] - - if targets is not None: - for i in range(num_steps): - # one-hot vectors for a i-th char - char_onehots = self._char_to_onehot( - targets[:, i], onehot_dim=self.num_classes) - hidden, alpha = self.attention_cell(hidden, inputs, - char_onehots) - - hidden = (hidden[1][0], hidden[1][1]) - output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1)) - output = paddle.concat(output_hiddens, axis=1) - probs = self.generator(output) - - else: - targets = paddle.zeros(shape=[batch_size], dtype="int32") - probs = None - - for i in range(num_steps): - char_onehots = self._char_to_onehot( - targets, onehot_dim=self.num_classes) - hidden, alpha = self.attention_cell(hidden, inputs, - char_onehots) - probs_step = self.generator(hidden[0]) - hidden = (hidden[1][0], hidden[1][1]) - if probs is None: - probs = paddle.unsqueeze(probs_step, axis=1) - else: - probs = paddle.concat( - [probs, paddle.unsqueeze( - probs_step, axis=1)], axis=1) - - next_input = probs_step.argmax(axis=1) - - targets = next_input - - return probs - - -class AttentionLSTMCell(nn.Layer): - def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): - super(AttentionLSTMCell, self).__init__() - self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) - self.h2h = nn.Linear(hidden_size, hidden_size) - self.score = nn.Linear(hidden_size, 1, bias_attr=False) - if not use_gru: - self.rnn = nn.LSTMCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - else: - self.rnn = nn.GRUCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - - self.hidden_size = hidden_size - - def forward(self, prev_hidden, batch_H, char_onehots): - batch_H_proj = self.i2h(batch_H) - prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden[0]), axis=1) - res = paddle.add(batch_H_proj, prev_hidden_proj) - res = paddle.tanh(res) - e = self.score(res) - - alpha = F.softmax(e, axis=1) - alpha = paddle.transpose(alpha, [0, 2, 1]) - context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) - concat_context = paddle.concat([context, char_onehots], 1) - cur_hidden = self.rnn(concat_context, prev_hidden) - - return cur_hidden, alpha diff --git a/backend/ppocr/modeling/heads/rec_ctc_head.py b/backend/ppocr/modeling/heads/rec_ctc_head.py deleted file mode 100755 index 6c1cf06..0000000 --- a/backend/ppocr/modeling/heads/rec_ctc_head.py +++ /dev/null @@ -1,87 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -from paddle import ParamAttr, nn -from paddle.nn import functional as F - - -def get_para_bias_attr(l2_decay, k): - regularizer = paddle.regularizer.L2Decay(l2_decay) - stdv = 1.0 / math.sqrt(k * 1.0) - initializer = nn.initializer.Uniform(-stdv, stdv) - weight_attr = ParamAttr(regularizer=regularizer, initializer=initializer) - bias_attr = ParamAttr(regularizer=regularizer, initializer=initializer) - return [weight_attr, bias_attr] - - -class CTCHead(nn.Layer): - def __init__(self, - in_channels, - out_channels, - fc_decay=0.0004, - mid_channels=None, - return_feats=False, - **kwargs): - super(CTCHead, self).__init__() - if mid_channels is None: - weight_attr, bias_attr = get_para_bias_attr( - l2_decay=fc_decay, k=in_channels) - self.fc = nn.Linear( - in_channels, - out_channels, - weight_attr=weight_attr, - bias_attr=bias_attr) - else: - weight_attr1, bias_attr1 = get_para_bias_attr( - l2_decay=fc_decay, k=in_channels) - self.fc1 = nn.Linear( - in_channels, - mid_channels, - weight_attr=weight_attr1, - bias_attr=bias_attr1) - - weight_attr2, bias_attr2 = get_para_bias_attr( - l2_decay=fc_decay, k=mid_channels) - self.fc2 = nn.Linear( - mid_channels, - out_channels, - weight_attr=weight_attr2, - bias_attr=bias_attr2) - self.out_channels = out_channels - self.mid_channels = mid_channels - self.return_feats = return_feats - - def forward(self, x, targets=None): - if self.mid_channels is None: - predicts = self.fc(x) - else: - x = self.fc1(x) - predicts = self.fc2(x) - - if self.return_feats: - result = (x, predicts) - else: - result = predicts - if not self.training: - predicts = F.softmax(predicts, axis=2) - result = predicts - - return result diff --git a/backend/ppocr/modeling/heads/rec_multi_head.py b/backend/ppocr/modeling/heads/rec_multi_head.py deleted file mode 100644 index ef78bf9..0000000 --- a/backend/ppocr/modeling/heads/rec_multi_head.py +++ /dev/null @@ -1,73 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -from ppocr.modeling.necks.rnn import Im2Seq, EncoderWithRNN, EncoderWithFC, SequenceEncoder, EncoderWithSVTR -from .rec_ctc_head import CTCHead -from .rec_sar_head import SARHead - - -class MultiHead(nn.Layer): - def __init__(self, in_channels, out_channels_list, **kwargs): - super().__init__() - self.head_list = kwargs.pop('head_list') - self.gtc_head = 'sar' - assert len(self.head_list) >= 2 - for idx, head_name in enumerate(self.head_list): - name = list(head_name)[0] - if name == 'SARHead': - # sar head - sar_args = self.head_list[idx][name] - self.sar_head = eval(name)(in_channels=in_channels, \ - out_channels=out_channels_list['SARLabelDecode'], **sar_args) - elif name == 'CTCHead': - # ctc neck - self.encoder_reshape = Im2Seq(in_channels) - neck_args = self.head_list[idx][name]['Neck'] - encoder_type = neck_args.pop('name') - self.encoder = encoder_type - self.ctc_encoder = SequenceEncoder(in_channels=in_channels, \ - encoder_type=encoder_type, **neck_args) - # ctc head - head_args = self.head_list[idx][name]['Head'] - self.ctc_head = eval(name)(in_channels=self.ctc_encoder.out_channels, \ - out_channels=out_channels_list['CTCLabelDecode'], **head_args) - else: - raise NotImplementedError( - '{} is not supported in MultiHead yet'.format(name)) - - def forward(self, x, targets=None): - ctc_encoder = self.ctc_encoder(x) - ctc_out = self.ctc_head(ctc_encoder, targets) - head_out = dict() - head_out['ctc'] = ctc_out - head_out['ctc_neck'] = ctc_encoder - # eval mode - if not self.training: - return ctc_out - if self.gtc_head == 'sar': - sar_out = self.sar_head(x, targets[1:]) - head_out['sar'] = sar_out - return head_out - else: - return head_out diff --git a/backend/ppocr/modeling/heads/rec_nrtr_head.py b/backend/ppocr/modeling/heads/rec_nrtr_head.py deleted file mode 100644 index 38ba0c9..0000000 --- a/backend/ppocr/modeling/heads/rec_nrtr_head.py +++ /dev/null @@ -1,826 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import paddle -import copy -from paddle import nn -import paddle.nn.functional as F -from paddle.nn import LayerList -from paddle.nn.initializer import XavierNormal as xavier_uniform_ -from paddle.nn import Dropout, Linear, LayerNorm, Conv2D -import numpy as np -from ppocr.modeling.heads.multiheadAttention import MultiheadAttention -from paddle.nn.initializer import Constant as constant_ -from paddle.nn.initializer import XavierNormal as xavier_normal_ - -zeros_ = constant_(value=0.) -ones_ = constant_(value=1.) - - -class Transformer(nn.Layer): - """A transformer model. User is able to modify the attributes as needed. The architechture - is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer, - Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and - Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information - Processing Systems, pages 6000-6010. - - Args: - d_model: the number of expected features in the encoder/decoder inputs (default=512). - nhead: the number of heads in the multiheadattention models (default=8). - num_encoder_layers: the number of sub-encoder-layers in the encoder (default=6). - num_decoder_layers: the number of sub-decoder-layers in the decoder (default=6). - dim_feedforward: the dimension of the feedforward network model (default=2048). - dropout: the dropout value (default=0.1). - custom_encoder: custom encoder (default=None). - custom_decoder: custom decoder (default=None). - - """ - - def __init__(self, - d_model=512, - nhead=8, - num_encoder_layers=6, - beam_size=0, - num_decoder_layers=6, - dim_feedforward=1024, - attention_dropout_rate=0.0, - residual_dropout_rate=0.1, - custom_encoder=None, - custom_decoder=None, - in_channels=0, - out_channels=0, - scale_embedding=True): - super(Transformer, self).__init__() - self.out_channels = out_channels + 1 - self.embedding = Embeddings( - d_model=d_model, - vocab=self.out_channels, - padding_idx=0, - scale_embedding=scale_embedding) - self.positional_encoding = PositionalEncoding( - dropout=residual_dropout_rate, - dim=d_model, ) - if custom_encoder is not None: - self.encoder = custom_encoder - else: - if num_encoder_layers > 0: - encoder_layer = TransformerEncoderLayer( - d_model, nhead, dim_feedforward, attention_dropout_rate, - residual_dropout_rate) - self.encoder = TransformerEncoder(encoder_layer, - num_encoder_layers) - else: - self.encoder = None - - if custom_decoder is not None: - self.decoder = custom_decoder - else: - decoder_layer = TransformerDecoderLayer( - d_model, nhead, dim_feedforward, attention_dropout_rate, - residual_dropout_rate) - self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers) - - self._reset_parameters() - self.beam_size = beam_size - self.d_model = d_model - self.nhead = nhead - self.tgt_word_prj = nn.Linear( - d_model, self.out_channels, bias_attr=False) - w0 = np.random.normal(0.0, d_model**-0.5, - (d_model, self.out_channels)).astype(np.float32) - self.tgt_word_prj.weight.set_value(w0) - self.apply(self._init_weights) - - def _init_weights(self, m): - - if isinstance(m, nn.Conv2D): - xavier_normal_(m.weight) - if m.bias is not None: - zeros_(m.bias) - - def forward_train(self, src, tgt): - tgt = tgt[:, :-1] - - tgt_key_padding_mask = self.generate_padding_mask(tgt) - tgt = self.embedding(tgt).transpose([1, 0, 2]) - tgt = self.positional_encoding(tgt) - tgt_mask = self.generate_square_subsequent_mask(tgt.shape[0]) - - if self.encoder is not None: - src = self.positional_encoding(src.transpose([1, 0, 2])) - memory = self.encoder(src) - else: - memory = src.squeeze(2).transpose([2, 0, 1]) - output = self.decoder( - tgt, - memory, - tgt_mask=tgt_mask, - memory_mask=None, - tgt_key_padding_mask=tgt_key_padding_mask, - memory_key_padding_mask=None) - output = output.transpose([1, 0, 2]) - logit = self.tgt_word_prj(output) - return logit - - def forward(self, src, targets=None): - """Take in and process masked source/target sequences. - Args: - src: the sequence to the encoder (required). - tgt: the sequence to the decoder (required). - Shape: - - src: :math:`(S, N, E)`. - - tgt: :math:`(T, N, E)`. - Examples: - >>> output = transformer_model(src, tgt) - """ - - if self.training: - max_len = targets[1].max() - tgt = targets[0][:, :2 + max_len] - return self.forward_train(src, tgt) - else: - if self.beam_size > 0: - return self.forward_beam(src) - else: - return self.forward_test(src) - - def forward_test(self, src): - bs = paddle.shape(src)[0] - if self.encoder is not None: - src = self.positional_encoding(paddle.transpose(src, [1, 0, 2])) - memory = self.encoder(src) - else: - memory = paddle.transpose(paddle.squeeze(src, 2), [2, 0, 1]) - dec_seq = paddle.full((bs, 1), 2, dtype=paddle.int64) - dec_prob = paddle.full((bs, 1), 1., dtype=paddle.float32) - for len_dec_seq in range(1, 25): - dec_seq_embed = paddle.transpose(self.embedding(dec_seq), [1, 0, 2]) - dec_seq_embed = self.positional_encoding(dec_seq_embed) - tgt_mask = self.generate_square_subsequent_mask( - paddle.shape(dec_seq_embed)[0]) - output = self.decoder( - dec_seq_embed, - memory, - tgt_mask=tgt_mask, - memory_mask=None, - tgt_key_padding_mask=None, - memory_key_padding_mask=None) - dec_output = paddle.transpose(output, [1, 0, 2]) - dec_output = dec_output[:, -1, :] - word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1) - preds_idx = paddle.argmax(word_prob, axis=1) - if paddle.equal_all( - preds_idx, - paddle.full( - paddle.shape(preds_idx), 3, dtype='int64')): - break - preds_prob = paddle.max(word_prob, axis=1) - dec_seq = paddle.concat( - [dec_seq, paddle.reshape(preds_idx, [-1, 1])], axis=1) - dec_prob = paddle.concat( - [dec_prob, paddle.reshape(preds_prob, [-1, 1])], axis=1) - return [dec_seq, dec_prob] - - def forward_beam(self, images): - ''' Translation work in one batch ''' - - def get_inst_idx_to_tensor_position_map(inst_idx_list): - ''' Indicate the position of an instance in a tensor. ''' - return { - inst_idx: tensor_position - for tensor_position, inst_idx in enumerate(inst_idx_list) - } - - def collect_active_part(beamed_tensor, curr_active_inst_idx, - n_prev_active_inst, n_bm): - ''' Collect tensor parts associated to active instances. ''' - - beamed_tensor_shape = paddle.shape(beamed_tensor) - n_curr_active_inst = len(curr_active_inst_idx) - new_shape = (n_curr_active_inst * n_bm, beamed_tensor_shape[1], - beamed_tensor_shape[2]) - - beamed_tensor = beamed_tensor.reshape([n_prev_active_inst, -1]) - beamed_tensor = beamed_tensor.index_select( - curr_active_inst_idx, axis=0) - beamed_tensor = beamed_tensor.reshape(new_shape) - - return beamed_tensor - - def collate_active_info(src_enc, inst_idx_to_position_map, - active_inst_idx_list): - # Sentences which are still active are collected, - # so the decoder will not run on completed sentences. - - n_prev_active_inst = len(inst_idx_to_position_map) - active_inst_idx = [ - inst_idx_to_position_map[k] for k in active_inst_idx_list - ] - active_inst_idx = paddle.to_tensor(active_inst_idx, dtype='int64') - active_src_enc = collect_active_part( - src_enc.transpose([1, 0, 2]), active_inst_idx, - n_prev_active_inst, n_bm).transpose([1, 0, 2]) - active_inst_idx_to_position_map = get_inst_idx_to_tensor_position_map( - active_inst_idx_list) - return active_src_enc, active_inst_idx_to_position_map - - def beam_decode_step(inst_dec_beams, len_dec_seq, enc_output, - inst_idx_to_position_map, n_bm, - memory_key_padding_mask): - ''' Decode and update beam status, and then return active beam idx ''' - - def prepare_beam_dec_seq(inst_dec_beams, len_dec_seq): - dec_partial_seq = [ - b.get_current_state() for b in inst_dec_beams if not b.done - ] - dec_partial_seq = paddle.stack(dec_partial_seq) - dec_partial_seq = dec_partial_seq.reshape([-1, len_dec_seq]) - return dec_partial_seq - - def predict_word(dec_seq, enc_output, n_active_inst, n_bm, - memory_key_padding_mask): - dec_seq = paddle.transpose(self.embedding(dec_seq), [1, 0, 2]) - dec_seq = self.positional_encoding(dec_seq) - tgt_mask = self.generate_square_subsequent_mask( - paddle.shape(dec_seq)[0]) - dec_output = self.decoder( - dec_seq, - enc_output, - tgt_mask=tgt_mask, - tgt_key_padding_mask=None, - memory_key_padding_mask=memory_key_padding_mask, ) - dec_output = paddle.transpose(dec_output, [1, 0, 2]) - dec_output = dec_output[:, - -1, :] # Pick the last step: (bh * bm) * d_h - word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1) - word_prob = paddle.reshape(word_prob, [n_active_inst, n_bm, -1]) - return word_prob - - def collect_active_inst_idx_list(inst_beams, word_prob, - inst_idx_to_position_map): - active_inst_idx_list = [] - for inst_idx, inst_position in inst_idx_to_position_map.items(): - is_inst_complete = inst_beams[inst_idx].advance(word_prob[ - inst_position]) - if not is_inst_complete: - active_inst_idx_list += [inst_idx] - - return active_inst_idx_list - - n_active_inst = len(inst_idx_to_position_map) - dec_seq = prepare_beam_dec_seq(inst_dec_beams, len_dec_seq) - word_prob = predict_word(dec_seq, enc_output, n_active_inst, n_bm, - None) - # Update the beam with predicted word prob information and collect incomplete instances - active_inst_idx_list = collect_active_inst_idx_list( - inst_dec_beams, word_prob, inst_idx_to_position_map) - return active_inst_idx_list - - def collect_hypothesis_and_scores(inst_dec_beams, n_best): - all_hyp, all_scores = [], [] - for inst_idx in range(len(inst_dec_beams)): - scores, tail_idxs = inst_dec_beams[inst_idx].sort_scores() - all_scores += [scores[:n_best]] - hyps = [ - inst_dec_beams[inst_idx].get_hypothesis(i) - for i in tail_idxs[:n_best] - ] - all_hyp += [hyps] - return all_hyp, all_scores - - with paddle.no_grad(): - #-- Encode - if self.encoder is not None: - src = self.positional_encoding(images.transpose([1, 0, 2])) - src_enc = self.encoder(src) - else: - src_enc = images.squeeze(2).transpose([0, 2, 1]) - - n_bm = self.beam_size - src_shape = paddle.shape(src_enc) - inst_dec_beams = [Beam(n_bm) for _ in range(1)] - active_inst_idx_list = list(range(1)) - # Repeat data for beam search - src_enc = paddle.tile(src_enc, [1, n_bm, 1]) - inst_idx_to_position_map = get_inst_idx_to_tensor_position_map( - active_inst_idx_list) - # Decode - for len_dec_seq in range(1, 25): - src_enc_copy = src_enc.clone() - active_inst_idx_list = beam_decode_step( - inst_dec_beams, len_dec_seq, src_enc_copy, - inst_idx_to_position_map, n_bm, None) - if not active_inst_idx_list: - break # all instances have finished their path to - src_enc, inst_idx_to_position_map = collate_active_info( - src_enc_copy, inst_idx_to_position_map, - active_inst_idx_list) - batch_hyp, batch_scores = collect_hypothesis_and_scores(inst_dec_beams, - 1) - result_hyp = [] - hyp_scores = [] - for bs_hyp, score in zip(batch_hyp, batch_scores): - l = len(bs_hyp[0]) - bs_hyp_pad = bs_hyp[0] + [3] * (25 - l) - result_hyp.append(bs_hyp_pad) - score = float(score) / l - hyp_score = [score for _ in range(25)] - hyp_scores.append(hyp_score) - return [ - paddle.to_tensor( - np.array(result_hyp), dtype=paddle.int64), - paddle.to_tensor(hyp_scores) - ] - - def generate_square_subsequent_mask(self, sz): - """Generate a square mask for the sequence. The masked positions are filled with float('-inf'). - Unmasked positions are filled with float(0.0). - """ - mask = paddle.zeros([sz, sz], dtype='float32') - mask_inf = paddle.triu( - paddle.full( - shape=[sz, sz], dtype='float32', fill_value='-inf'), - diagonal=1) - mask = mask + mask_inf - return mask - - def generate_padding_mask(self, x): - padding_mask = paddle.equal(x, paddle.to_tensor(0, dtype=x.dtype)) - return padding_mask - - def _reset_parameters(self): - """Initiate parameters in the transformer model.""" - - for p in self.parameters(): - if p.dim() > 1: - xavier_uniform_(p) - - -class TransformerEncoder(nn.Layer): - """TransformerEncoder is a stack of N encoder layers - Args: - encoder_layer: an instance of the TransformerEncoderLayer() class (required). - num_layers: the number of sub-encoder-layers in the encoder (required). - norm: the layer normalization component (optional). - """ - - def __init__(self, encoder_layer, num_layers): - super(TransformerEncoder, self).__init__() - self.layers = _get_clones(encoder_layer, num_layers) - self.num_layers = num_layers - - def forward(self, src): - """Pass the input through the endocder layers in turn. - Args: - src: the sequnce to the encoder (required). - mask: the mask for the src sequence (optional). - src_key_padding_mask: the mask for the src keys per batch (optional). - """ - output = src - - for i in range(self.num_layers): - output = self.layers[i](output, - src_mask=None, - src_key_padding_mask=None) - - return output - - -class TransformerDecoder(nn.Layer): - """TransformerDecoder is a stack of N decoder layers - - Args: - decoder_layer: an instance of the TransformerDecoderLayer() class (required). - num_layers: the number of sub-decoder-layers in the decoder (required). - norm: the layer normalization component (optional). - - """ - - def __init__(self, decoder_layer, num_layers): - super(TransformerDecoder, self).__init__() - self.layers = _get_clones(decoder_layer, num_layers) - self.num_layers = num_layers - - def forward(self, - tgt, - memory, - tgt_mask=None, - memory_mask=None, - tgt_key_padding_mask=None, - memory_key_padding_mask=None): - """Pass the inputs (and mask) through the decoder layer in turn. - - Args: - tgt: the sequence to the decoder (required). - memory: the sequnce from the last layer of the encoder (required). - tgt_mask: the mask for the tgt sequence (optional). - memory_mask: the mask for the memory sequence (optional). - tgt_key_padding_mask: the mask for the tgt keys per batch (optional). - memory_key_padding_mask: the mask for the memory keys per batch (optional). - """ - output = tgt - for i in range(self.num_layers): - output = self.layers[i]( - output, - memory, - tgt_mask=tgt_mask, - memory_mask=memory_mask, - tgt_key_padding_mask=tgt_key_padding_mask, - memory_key_padding_mask=memory_key_padding_mask) - - return output - - -class TransformerEncoderLayer(nn.Layer): - """TransformerEncoderLayer is made up of self-attn and feedforward network. - This standard encoder layer is based on the paper "Attention Is All You Need". - Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, - Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in - Neural Information Processing Systems, pages 6000-6010. Users may modify or implement - in a different way during application. - - Args: - d_model: the number of expected features in the input (required). - nhead: the number of heads in the multiheadattention models (required). - dim_feedforward: the dimension of the feedforward network model (default=2048). - dropout: the dropout value (default=0.1). - - """ - - def __init__(self, - d_model, - nhead, - dim_feedforward=2048, - attention_dropout_rate=0.0, - residual_dropout_rate=0.1): - super(TransformerEncoderLayer, self).__init__() - self.self_attn = MultiheadAttention( - d_model, nhead, dropout=attention_dropout_rate) - - self.conv1 = Conv2D( - in_channels=d_model, - out_channels=dim_feedforward, - kernel_size=(1, 1)) - self.conv2 = Conv2D( - in_channels=dim_feedforward, - out_channels=d_model, - kernel_size=(1, 1)) - - self.norm1 = LayerNorm(d_model) - self.norm2 = LayerNorm(d_model) - self.dropout1 = Dropout(residual_dropout_rate) - self.dropout2 = Dropout(residual_dropout_rate) - - def forward(self, src, src_mask=None, src_key_padding_mask=None): - """Pass the input through the endocder layer. - Args: - src: the sequnce to the encoder layer (required). - src_mask: the mask for the src sequence (optional). - src_key_padding_mask: the mask for the src keys per batch (optional). - """ - src2 = self.self_attn( - src, - src, - src, - attn_mask=src_mask, - key_padding_mask=src_key_padding_mask) - src = src + self.dropout1(src2) - src = self.norm1(src) - - src = paddle.transpose(src, [1, 2, 0]) - src = paddle.unsqueeze(src, 2) - src2 = self.conv2(F.relu(self.conv1(src))) - src2 = paddle.squeeze(src2, 2) - src2 = paddle.transpose(src2, [2, 0, 1]) - src = paddle.squeeze(src, 2) - src = paddle.transpose(src, [2, 0, 1]) - - src = src + self.dropout2(src2) - src = self.norm2(src) - return src - - -class TransformerDecoderLayer(nn.Layer): - """TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network. - This standard decoder layer is based on the paper "Attention Is All You Need". - Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, - Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in - Neural Information Processing Systems, pages 6000-6010. Users may modify or implement - in a different way during application. - - Args: - d_model: the number of expected features in the input (required). - nhead: the number of heads in the multiheadattention models (required). - dim_feedforward: the dimension of the feedforward network model (default=2048). - dropout: the dropout value (default=0.1). - - """ - - def __init__(self, - d_model, - nhead, - dim_feedforward=2048, - attention_dropout_rate=0.0, - residual_dropout_rate=0.1): - super(TransformerDecoderLayer, self).__init__() - self.self_attn = MultiheadAttention( - d_model, nhead, dropout=attention_dropout_rate) - self.multihead_attn = MultiheadAttention( - d_model, nhead, dropout=attention_dropout_rate) - - self.conv1 = Conv2D( - in_channels=d_model, - out_channels=dim_feedforward, - kernel_size=(1, 1)) - self.conv2 = Conv2D( - in_channels=dim_feedforward, - out_channels=d_model, - kernel_size=(1, 1)) - - self.norm1 = LayerNorm(d_model) - self.norm2 = LayerNorm(d_model) - self.norm3 = LayerNorm(d_model) - self.dropout1 = Dropout(residual_dropout_rate) - self.dropout2 = Dropout(residual_dropout_rate) - self.dropout3 = Dropout(residual_dropout_rate) - - def forward(self, - tgt, - memory, - tgt_mask=None, - memory_mask=None, - tgt_key_padding_mask=None, - memory_key_padding_mask=None): - """Pass the inputs (and mask) through the decoder layer. - - Args: - tgt: the sequence to the decoder layer (required). - memory: the sequnce from the last layer of the encoder (required). - tgt_mask: the mask for the tgt sequence (optional). - memory_mask: the mask for the memory sequence (optional). - tgt_key_padding_mask: the mask for the tgt keys per batch (optional). - memory_key_padding_mask: the mask for the memory keys per batch (optional). - - """ - tgt2 = self.self_attn( - tgt, - tgt, - tgt, - attn_mask=tgt_mask, - key_padding_mask=tgt_key_padding_mask) - tgt = tgt + self.dropout1(tgt2) - tgt = self.norm1(tgt) - tgt2 = self.multihead_attn( - tgt, - memory, - memory, - attn_mask=memory_mask, - key_padding_mask=memory_key_padding_mask) - tgt = tgt + self.dropout2(tgt2) - tgt = self.norm2(tgt) - - # default - tgt = paddle.transpose(tgt, [1, 2, 0]) - tgt = paddle.unsqueeze(tgt, 2) - tgt2 = self.conv2(F.relu(self.conv1(tgt))) - tgt2 = paddle.squeeze(tgt2, 2) - tgt2 = paddle.transpose(tgt2, [2, 0, 1]) - tgt = paddle.squeeze(tgt, 2) - tgt = paddle.transpose(tgt, [2, 0, 1]) - - tgt = tgt + self.dropout3(tgt2) - tgt = self.norm3(tgt) - return tgt - - -def _get_clones(module, N): - return LayerList([copy.deepcopy(module) for i in range(N)]) - - -class PositionalEncoding(nn.Layer): - """Inject some information about the relative or absolute position of the tokens - in the sequence. The positional encodings have the same dimension as - the embeddings, so that the two can be summed. Here, we use sine and cosine - functions of different frequencies. - .. math:: - \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model)) - \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model)) - \text{where pos is the word position and i is the embed idx) - Args: - d_model: the embed dim (required). - dropout: the dropout value (default=0.1). - max_len: the max. length of the incoming sequence (default=5000). - Examples: - >>> pos_encoder = PositionalEncoding(d_model) - """ - - def __init__(self, dropout, dim, max_len=5000): - super(PositionalEncoding, self).__init__() - self.dropout = nn.Dropout(p=dropout) - - pe = paddle.zeros([max_len, dim]) - position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1) - div_term = paddle.exp( - paddle.arange(0, dim, 2).astype('float32') * - (-math.log(10000.0) / dim)) - pe[:, 0::2] = paddle.sin(position * div_term) - pe[:, 1::2] = paddle.cos(position * div_term) - pe = paddle.unsqueeze(pe, 0) - pe = paddle.transpose(pe, [1, 0, 2]) - self.register_buffer('pe', pe) - - def forward(self, x): - """Inputs of forward function - Args: - x: the sequence fed to the positional encoder model (required). - Shape: - x: [sequence length, batch size, embed dim] - output: [sequence length, batch size, embed dim] - Examples: - >>> output = pos_encoder(x) - """ - x = x + self.pe[:paddle.shape(x)[0], :] - return self.dropout(x) - - -class PositionalEncoding_2d(nn.Layer): - """Inject some information about the relative or absolute position of the tokens - in the sequence. The positional encodings have the same dimension as - the embeddings, so that the two can be summed. Here, we use sine and cosine - functions of different frequencies. - .. math:: - \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model)) - \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model)) - \text{where pos is the word position and i is the embed idx) - Args: - d_model: the embed dim (required). - dropout: the dropout value (default=0.1). - max_len: the max. length of the incoming sequence (default=5000). - Examples: - >>> pos_encoder = PositionalEncoding(d_model) - """ - - def __init__(self, dropout, dim, max_len=5000): - super(PositionalEncoding_2d, self).__init__() - self.dropout = nn.Dropout(p=dropout) - - pe = paddle.zeros([max_len, dim]) - position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1) - div_term = paddle.exp( - paddle.arange(0, dim, 2).astype('float32') * - (-math.log(10000.0) / dim)) - pe[:, 0::2] = paddle.sin(position * div_term) - pe[:, 1::2] = paddle.cos(position * div_term) - pe = paddle.transpose(paddle.unsqueeze(pe, 0), [1, 0, 2]) - self.register_buffer('pe', pe) - - self.avg_pool_1 = nn.AdaptiveAvgPool2D((1, 1)) - self.linear1 = nn.Linear(dim, dim) - self.linear1.weight.data.fill_(1.) - self.avg_pool_2 = nn.AdaptiveAvgPool2D((1, 1)) - self.linear2 = nn.Linear(dim, dim) - self.linear2.weight.data.fill_(1.) - - def forward(self, x): - """Inputs of forward function - Args: - x: the sequence fed to the positional encoder model (required). - Shape: - x: [sequence length, batch size, embed dim] - output: [sequence length, batch size, embed dim] - Examples: - >>> output = pos_encoder(x) - """ - w_pe = self.pe[:paddle.shape(x)[-1], :] - w1 = self.linear1(self.avg_pool_1(x).squeeze()).unsqueeze(0) - w_pe = w_pe * w1 - w_pe = paddle.transpose(w_pe, [1, 2, 0]) - w_pe = paddle.unsqueeze(w_pe, 2) - - h_pe = self.pe[:paddle.shape(x).shape[-2], :] - w2 = self.linear2(self.avg_pool_2(x).squeeze()).unsqueeze(0) - h_pe = h_pe * w2 - h_pe = paddle.transpose(h_pe, [1, 2, 0]) - h_pe = paddle.unsqueeze(h_pe, 3) - - x = x + w_pe + h_pe - x = paddle.transpose( - paddle.reshape(x, - [x.shape[0], x.shape[1], x.shape[2] * x.shape[3]]), - [2, 0, 1]) - - return self.dropout(x) - - -class Embeddings(nn.Layer): - def __init__(self, d_model, vocab, padding_idx, scale_embedding): - super(Embeddings, self).__init__() - self.embedding = nn.Embedding(vocab, d_model, padding_idx=padding_idx) - w0 = np.random.normal(0.0, d_model**-0.5, - (vocab, d_model)).astype(np.float32) - self.embedding.weight.set_value(w0) - self.d_model = d_model - self.scale_embedding = scale_embedding - - def forward(self, x): - if self.scale_embedding: - x = self.embedding(x) - return x * math.sqrt(self.d_model) - return self.embedding(x) - - -class Beam(): - ''' Beam search ''' - - def __init__(self, size, device=False): - - self.size = size - self._done = False - # The score for each translation on the beam. - self.scores = paddle.zeros((size, ), dtype=paddle.float32) - self.all_scores = [] - # The backpointers at each time-step. - self.prev_ks = [] - # The outputs at each time-step. - self.next_ys = [paddle.full((size, ), 0, dtype=paddle.int64)] - self.next_ys[0][0] = 2 - - def get_current_state(self): - "Get the outputs for the current timestep." - return self.get_tentative_hypothesis() - - def get_current_origin(self): - "Get the backpointers for the current timestep." - return self.prev_ks[-1] - - @property - def done(self): - return self._done - - def advance(self, word_prob): - "Update beam status and check if finished or not." - num_words = word_prob.shape[1] - - # Sum the previous scores. - if len(self.prev_ks) > 0: - beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob) - else: - beam_lk = word_prob[0] - - flat_beam_lk = beam_lk.reshape([-1]) - best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, - True) # 1st sort - self.all_scores.append(self.scores) - self.scores = best_scores - # bestScoresId is flattened as a (beam x word) array, - # so we need to calculate which word and beam each score came from - prev_k = best_scores_id // num_words - self.prev_ks.append(prev_k) - self.next_ys.append(best_scores_id - prev_k * num_words) - # End condition is when top-of-beam is EOS. - if self.next_ys[-1][0] == 3: - self._done = True - self.all_scores.append(self.scores) - - return self._done - - def sort_scores(self): - "Sort the scores." - return self.scores, paddle.to_tensor( - [i for i in range(int(self.scores.shape[0]))], dtype='int32') - - def get_the_best_score_and_idx(self): - "Get the score of the best in the beam." - scores, ids = self.sort_scores() - return scores[1], ids[1] - - def get_tentative_hypothesis(self): - "Get the decoded sequence for the current timestep." - if len(self.next_ys) == 1: - dec_seq = self.next_ys[0].unsqueeze(1) - else: - _, keys = self.sort_scores() - hyps = [self.get_hypothesis(k) for k in keys] - hyps = [[2] + h for h in hyps] - dec_seq = paddle.to_tensor(hyps, dtype='int64') - return dec_seq - - def get_hypothesis(self, k): - """ Walk back to construct the full hypothesis. """ - hyp = [] - for j in range(len(self.prev_ks) - 1, -1, -1): - hyp.append(self.next_ys[j + 1][k]) - k = self.prev_ks[j][k] - return list(map(lambda x: x.item(), hyp[::-1])) diff --git a/backend/ppocr/modeling/heads/rec_pren_head.py b/backend/ppocr/modeling/heads/rec_pren_head.py deleted file mode 100644 index c9e4b3e..0000000 --- a/backend/ppocr/modeling/heads/rec_pren_head.py +++ /dev/null @@ -1,34 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import nn -from paddle.nn import functional as F - - -class PRENHead(nn.Layer): - def __init__(self, in_channels, out_channels, **kwargs): - super(PRENHead, self).__init__() - self.linear = nn.Linear(in_channels, out_channels) - - def forward(self, x, targets=None): - predicts = self.linear(x) - - if not self.training: - predicts = F.softmax(predicts, axis=2) - - return predicts diff --git a/backend/ppocr/modeling/heads/rec_sar_head.py b/backend/ppocr/modeling/heads/rec_sar_head.py deleted file mode 100644 index 0e6b344..0000000 --- a/backend/ppocr/modeling/heads/rec_sar_head.py +++ /dev/null @@ -1,410 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/encoders/sar_encoder.py -https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/decoders/sar_decoder.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - - -class SAREncoder(nn.Layer): - """ - Args: - enc_bi_rnn (bool): If True, use bidirectional RNN in encoder. - enc_drop_rnn (float): Dropout probability of RNN layer in encoder. - enc_gru (bool): If True, use GRU, else LSTM in encoder. - d_model (int): Dim of channels from backbone. - d_enc (int): Dim of encoder RNN layer. - mask (bool): If True, mask padding in RNN sequence. - """ - - def __init__(self, - enc_bi_rnn=False, - enc_drop_rnn=0.1, - enc_gru=False, - d_model=512, - d_enc=512, - mask=True, - **kwargs): - super().__init__() - assert isinstance(enc_bi_rnn, bool) - assert isinstance(enc_drop_rnn, (int, float)) - assert 0 <= enc_drop_rnn < 1.0 - assert isinstance(enc_gru, bool) - assert isinstance(d_model, int) - assert isinstance(d_enc, int) - assert isinstance(mask, bool) - - self.enc_bi_rnn = enc_bi_rnn - self.enc_drop_rnn = enc_drop_rnn - self.mask = mask - - # LSTM Encoder - if enc_bi_rnn: - direction = 'bidirectional' - else: - direction = 'forward' - kwargs = dict( - input_size=d_model, - hidden_size=d_enc, - num_layers=2, - time_major=False, - dropout=enc_drop_rnn, - direction=direction) - if enc_gru: - self.rnn_encoder = nn.GRU(**kwargs) - else: - self.rnn_encoder = nn.LSTM(**kwargs) - - # global feature transformation - encoder_rnn_out_size = d_enc * (int(enc_bi_rnn) + 1) - self.linear = nn.Linear(encoder_rnn_out_size, encoder_rnn_out_size) - - def forward(self, feat, img_metas=None): - if img_metas is not None: - assert len(img_metas[0]) == feat.shape[0] - - valid_ratios = None - if img_metas is not None and self.mask: - valid_ratios = img_metas[-1] - - h_feat = feat.shape[2] # bsz c h w - feat_v = F.max_pool2d( - feat, kernel_size=(h_feat, 1), stride=1, padding=0) - feat_v = feat_v.squeeze(2) # bsz * C * W - feat_v = paddle.transpose(feat_v, perm=[0, 2, 1]) # bsz * W * C - holistic_feat = self.rnn_encoder(feat_v)[0] # bsz * T * C - - if valid_ratios is not None: - valid_hf = [] - T = holistic_feat.shape[1] - for i in range(len(valid_ratios)): - valid_step = min(T, math.ceil(T * valid_ratios[i])) - 1 - valid_hf.append(holistic_feat[i, valid_step, :]) - valid_hf = paddle.stack(valid_hf, axis=0) - else: - valid_hf = holistic_feat[:, -1, :] # bsz * C - holistic_feat = self.linear(valid_hf) # bsz * C - - return holistic_feat - - -class BaseDecoder(nn.Layer): - def __init__(self, **kwargs): - super().__init__() - - def forward_train(self, feat, out_enc, targets, img_metas): - raise NotImplementedError - - def forward_test(self, feat, out_enc, img_metas): - raise NotImplementedError - - def forward(self, - feat, - out_enc, - label=None, - img_metas=None, - train_mode=True): - self.train_mode = train_mode - - if train_mode: - return self.forward_train(feat, out_enc, label, img_metas) - return self.forward_test(feat, out_enc, img_metas) - - -class ParallelSARDecoder(BaseDecoder): - """ - Args: - out_channels (int): Output class number. - enc_bi_rnn (bool): If True, use bidirectional RNN in encoder. - dec_bi_rnn (bool): If True, use bidirectional RNN in decoder. - dec_drop_rnn (float): Dropout of RNN layer in decoder. - dec_gru (bool): If True, use GRU, else LSTM in decoder. - d_model (int): Dim of channels from backbone. - d_enc (int): Dim of encoder RNN layer. - d_k (int): Dim of channels of attention module. - pred_dropout (float): Dropout probability of prediction layer. - max_seq_len (int): Maximum sequence length for decoding. - mask (bool): If True, mask padding in feature map. - start_idx (int): Index of start token. - padding_idx (int): Index of padding token. - pred_concat (bool): If True, concat glimpse feature from - attention with holistic feature and hidden state. - """ - - def __init__( - self, - out_channels, # 90 + unknown + start + padding - enc_bi_rnn=False, - dec_bi_rnn=False, - dec_drop_rnn=0.0, - dec_gru=False, - d_model=512, - d_enc=512, - d_k=64, - pred_dropout=0.1, - max_text_length=30, - mask=True, - pred_concat=True, - **kwargs): - super().__init__() - - self.num_classes = out_channels - self.enc_bi_rnn = enc_bi_rnn - self.d_k = d_k - self.start_idx = out_channels - 2 - self.padding_idx = out_channels - 1 - self.max_seq_len = max_text_length - self.mask = mask - self.pred_concat = pred_concat - - encoder_rnn_out_size = d_enc * (int(enc_bi_rnn) + 1) - decoder_rnn_out_size = encoder_rnn_out_size * (int(dec_bi_rnn) + 1) - - # 2D attention layer - self.conv1x1_1 = nn.Linear(decoder_rnn_out_size, d_k) - self.conv3x3_1 = nn.Conv2D( - d_model, d_k, kernel_size=3, stride=1, padding=1) - self.conv1x1_2 = nn.Linear(d_k, 1) - - # Decoder RNN layer - if dec_bi_rnn: - direction = 'bidirectional' - else: - direction = 'forward' - - kwargs = dict( - input_size=encoder_rnn_out_size, - hidden_size=encoder_rnn_out_size, - num_layers=2, - time_major=False, - dropout=dec_drop_rnn, - direction=direction) - if dec_gru: - self.rnn_decoder = nn.GRU(**kwargs) - else: - self.rnn_decoder = nn.LSTM(**kwargs) - - # Decoder input embedding - self.embedding = nn.Embedding( - self.num_classes, - encoder_rnn_out_size, - padding_idx=self.padding_idx) - - # Prediction layer - self.pred_dropout = nn.Dropout(pred_dropout) - pred_num_classes = self.num_classes - 1 - if pred_concat: - fc_in_channel = decoder_rnn_out_size + d_model + encoder_rnn_out_size - else: - fc_in_channel = d_model - self.prediction = nn.Linear(fc_in_channel, pred_num_classes) - - def _2d_attention(self, - decoder_input, - feat, - holistic_feat, - valid_ratios=None): - - y = self.rnn_decoder(decoder_input)[0] - # y: bsz * (seq_len + 1) * hidden_size - - attn_query = self.conv1x1_1(y) # bsz * (seq_len + 1) * attn_size - bsz, seq_len, attn_size = attn_query.shape - attn_query = paddle.unsqueeze(attn_query, axis=[3, 4]) - # (bsz, seq_len + 1, attn_size, 1, 1) - - attn_key = self.conv3x3_1(feat) - # bsz * attn_size * h * w - attn_key = attn_key.unsqueeze(1) - # bsz * 1 * attn_size * h * w - - attn_weight = paddle.tanh(paddle.add(attn_key, attn_query)) - - # bsz * (seq_len + 1) * attn_size * h * w - attn_weight = paddle.transpose(attn_weight, perm=[0, 1, 3, 4, 2]) - # bsz * (seq_len + 1) * h * w * attn_size - attn_weight = self.conv1x1_2(attn_weight) - # bsz * (seq_len + 1) * h * w * 1 - bsz, T, h, w, c = attn_weight.shape - assert c == 1 - - if valid_ratios is not None: - # cal mask of attention weight - for i in range(len(valid_ratios)): - valid_width = min(w, math.ceil(w * valid_ratios[i])) - if valid_width < w: - attn_weight[i, :, :, valid_width:, :] = float('-inf') - - attn_weight = paddle.reshape(attn_weight, [bsz, T, -1]) - attn_weight = F.softmax(attn_weight, axis=-1) - - attn_weight = paddle.reshape(attn_weight, [bsz, T, h, w, c]) - attn_weight = paddle.transpose(attn_weight, perm=[0, 1, 4, 2, 3]) - # attn_weight: bsz * T * c * h * w - # feat: bsz * c * h * w - attn_feat = paddle.sum(paddle.multiply(feat.unsqueeze(1), attn_weight), - (3, 4), - keepdim=False) - # bsz * (seq_len + 1) * C - - # Linear transformation - if self.pred_concat: - hf_c = holistic_feat.shape[-1] - holistic_feat = paddle.expand( - holistic_feat, shape=[bsz, seq_len, hf_c]) - y = self.prediction(paddle.concat((y, attn_feat, holistic_feat), 2)) - else: - y = self.prediction(attn_feat) - # bsz * (seq_len + 1) * num_classes - if self.train_mode: - y = self.pred_dropout(y) - - return y - - def forward_train(self, feat, out_enc, label, img_metas): - ''' - img_metas: [label, valid_ratio] - ''' - if img_metas is not None: - assert len(img_metas[0]) == feat.shape[0] - - valid_ratios = None - if img_metas is not None and self.mask: - valid_ratios = img_metas[-1] - - lab_embedding = self.embedding(label) - # bsz * seq_len * emb_dim - out_enc = out_enc.unsqueeze(1) - # bsz * 1 * emb_dim - in_dec = paddle.concat((out_enc, lab_embedding), axis=1) - # bsz * (seq_len + 1) * C - out_dec = self._2d_attention( - in_dec, feat, out_enc, valid_ratios=valid_ratios) - # bsz * (seq_len + 1) * num_classes - - return out_dec[:, 1:, :] # bsz * seq_len * num_classes - - def forward_test(self, feat, out_enc, img_metas): - if img_metas is not None: - assert len(img_metas[0]) == feat.shape[0] - - valid_ratios = None - if img_metas is not None and self.mask: - valid_ratios = img_metas[-1] - - seq_len = self.max_seq_len - bsz = feat.shape[0] - start_token = paddle.full( - (bsz, ), fill_value=self.start_idx, dtype='int64') - # bsz - start_token = self.embedding(start_token) - # bsz * emb_dim - emb_dim = start_token.shape[1] - start_token = start_token.unsqueeze(1) - start_token = paddle.expand(start_token, shape=[bsz, seq_len, emb_dim]) - # bsz * seq_len * emb_dim - out_enc = out_enc.unsqueeze(1) - # bsz * 1 * emb_dim - decoder_input = paddle.concat((out_enc, start_token), axis=1) - # bsz * (seq_len + 1) * emb_dim - - outputs = [] - for i in range(1, seq_len + 1): - decoder_output = self._2d_attention( - decoder_input, feat, out_enc, valid_ratios=valid_ratios) - char_output = decoder_output[:, i, :] # bsz * num_classes - char_output = F.softmax(char_output, -1) - outputs.append(char_output) - max_idx = paddle.argmax(char_output, axis=1, keepdim=False) - char_embedding = self.embedding(max_idx) # bsz * emb_dim - if i < seq_len: - decoder_input[:, i + 1, :] = char_embedding - - outputs = paddle.stack(outputs, 1) # bsz * seq_len * num_classes - - return outputs - - -class SARHead(nn.Layer): - def __init__(self, - in_channels, - out_channels, - enc_dim=512, - max_text_length=30, - enc_bi_rnn=False, - enc_drop_rnn=0.1, - enc_gru=False, - dec_bi_rnn=False, - dec_drop_rnn=0.0, - dec_gru=False, - d_k=512, - pred_dropout=0.1, - pred_concat=True, - **kwargs): - super(SARHead, self).__init__() - - # encoder module - self.encoder = SAREncoder( - enc_bi_rnn=enc_bi_rnn, - enc_drop_rnn=enc_drop_rnn, - enc_gru=enc_gru, - d_model=in_channels, - d_enc=enc_dim) - - # decoder module - self.decoder = ParallelSARDecoder( - out_channels=out_channels, - enc_bi_rnn=enc_bi_rnn, - dec_bi_rnn=dec_bi_rnn, - dec_drop_rnn=dec_drop_rnn, - dec_gru=dec_gru, - d_model=in_channels, - d_enc=enc_dim, - d_k=d_k, - pred_dropout=pred_dropout, - max_text_length=max_text_length, - pred_concat=pred_concat) - - def forward(self, feat, targets=None): - ''' - img_metas: [label, valid_ratio] - ''' - holistic_feat = self.encoder(feat, targets) # bsz c - - if self.training: - label = targets[0] # label - label = paddle.to_tensor(label, dtype='int64') - final_out = self.decoder( - feat, holistic_feat, label, img_metas=targets) - else: - final_out = self.decoder( - feat, - holistic_feat, - label=None, - img_metas=targets, - train_mode=False) - # (bsz, seq_len, num_classes) - - return final_out diff --git a/backend/ppocr/modeling/heads/rec_srn_head.py b/backend/ppocr/modeling/heads/rec_srn_head.py deleted file mode 100644 index 8d59e47..0000000 --- a/backend/ppocr/modeling/heads/rec_srn_head.py +++ /dev/null @@ -1,280 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import paddle.fluid as fluid -import numpy as np -from .self_attention import WrapEncoderForFeature -from .self_attention import WrapEncoder -from paddle.static import Program -from ppocr.modeling.backbones.rec_resnet_fpn import ResNetFPN -import paddle.fluid.framework as framework - -from collections import OrderedDict -gradient_clip = 10 - - -class PVAM(nn.Layer): - def __init__(self, in_channels, char_num, max_text_length, num_heads, - num_encoder_tus, hidden_dims): - super(PVAM, self).__init__() - self.char_num = char_num - self.max_length = max_text_length - self.num_heads = num_heads - self.num_encoder_TUs = num_encoder_tus - self.hidden_dims = hidden_dims - # Transformer encoder - t = 256 - c = 512 - self.wrap_encoder_for_feature = WrapEncoderForFeature( - src_vocab_size=1, - max_length=t, - n_layer=self.num_encoder_TUs, - n_head=self.num_heads, - d_key=int(self.hidden_dims / self.num_heads), - d_value=int(self.hidden_dims / self.num_heads), - d_model=self.hidden_dims, - d_inner_hid=self.hidden_dims, - prepostprocess_dropout=0.1, - attention_dropout=0.1, - relu_dropout=0.1, - preprocess_cmd="n", - postprocess_cmd="da", - weight_sharing=True) - - # PVAM - self.flatten0 = paddle.nn.Flatten(start_axis=0, stop_axis=1) - self.fc0 = paddle.nn.Linear( - in_features=in_channels, - out_features=in_channels, ) - self.emb = paddle.nn.Embedding( - num_embeddings=self.max_length, embedding_dim=in_channels) - self.flatten1 = paddle.nn.Flatten(start_axis=0, stop_axis=2) - self.fc1 = paddle.nn.Linear( - in_features=in_channels, out_features=1, bias_attr=False) - - def forward(self, inputs, encoder_word_pos, gsrm_word_pos): - b, c, h, w = inputs.shape - conv_features = paddle.reshape(inputs, shape=[-1, c, h * w]) - conv_features = paddle.transpose(conv_features, perm=[0, 2, 1]) - # transformer encoder - b, t, c = conv_features.shape - - enc_inputs = [conv_features, encoder_word_pos, None] - word_features = self.wrap_encoder_for_feature(enc_inputs) - - # pvam - b, t, c = word_features.shape - word_features = self.fc0(word_features) - word_features_ = paddle.reshape(word_features, [-1, 1, t, c]) - word_features_ = paddle.tile(word_features_, [1, self.max_length, 1, 1]) - word_pos_feature = self.emb(gsrm_word_pos) - word_pos_feature_ = paddle.reshape(word_pos_feature, - [-1, self.max_length, 1, c]) - word_pos_feature_ = paddle.tile(word_pos_feature_, [1, 1, t, 1]) - y = word_pos_feature_ + word_features_ - y = F.tanh(y) - attention_weight = self.fc1(y) - attention_weight = paddle.reshape( - attention_weight, shape=[-1, self.max_length, t]) - attention_weight = F.softmax(attention_weight, axis=-1) - pvam_features = paddle.matmul(attention_weight, - word_features) #[b, max_length, c] - return pvam_features - - -class GSRM(nn.Layer): - def __init__(self, in_channels, char_num, max_text_length, num_heads, - num_encoder_tus, num_decoder_tus, hidden_dims): - super(GSRM, self).__init__() - self.char_num = char_num - self.max_length = max_text_length - self.num_heads = num_heads - self.num_encoder_TUs = num_encoder_tus - self.num_decoder_TUs = num_decoder_tus - self.hidden_dims = hidden_dims - - self.fc0 = paddle.nn.Linear( - in_features=in_channels, out_features=self.char_num) - self.wrap_encoder0 = WrapEncoder( - src_vocab_size=self.char_num + 1, - max_length=self.max_length, - n_layer=self.num_decoder_TUs, - n_head=self.num_heads, - d_key=int(self.hidden_dims / self.num_heads), - d_value=int(self.hidden_dims / self.num_heads), - d_model=self.hidden_dims, - d_inner_hid=self.hidden_dims, - prepostprocess_dropout=0.1, - attention_dropout=0.1, - relu_dropout=0.1, - preprocess_cmd="n", - postprocess_cmd="da", - weight_sharing=True) - - self.wrap_encoder1 = WrapEncoder( - src_vocab_size=self.char_num + 1, - max_length=self.max_length, - n_layer=self.num_decoder_TUs, - n_head=self.num_heads, - d_key=int(self.hidden_dims / self.num_heads), - d_value=int(self.hidden_dims / self.num_heads), - d_model=self.hidden_dims, - d_inner_hid=self.hidden_dims, - prepostprocess_dropout=0.1, - attention_dropout=0.1, - relu_dropout=0.1, - preprocess_cmd="n", - postprocess_cmd="da", - weight_sharing=True) - - self.mul = lambda x: paddle.matmul(x=x, - y=self.wrap_encoder0.prepare_decoder.emb0.weight, - transpose_y=True) - - def forward(self, inputs, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2): - # ===== GSRM Visual-to-semantic embedding block ===== - b, t, c = inputs.shape - pvam_features = paddle.reshape(inputs, [-1, c]) - word_out = self.fc0(pvam_features) - word_ids = paddle.argmax(F.softmax(word_out), axis=1) - word_ids = paddle.reshape(x=word_ids, shape=[-1, t, 1]) - - #===== GSRM Semantic reasoning block ===== - """ - This module is achieved through bi-transformers, - ngram_feature1 is the froward one, ngram_fetaure2 is the backward one - """ - pad_idx = self.char_num - - word1 = paddle.cast(word_ids, "float32") - word1 = F.pad(word1, [1, 0], value=1.0 * pad_idx, data_format="NLC") - word1 = paddle.cast(word1, "int64") - word1 = word1[:, :-1, :] - word2 = word_ids - - enc_inputs_1 = [word1, gsrm_word_pos, gsrm_slf_attn_bias1] - enc_inputs_2 = [word2, gsrm_word_pos, gsrm_slf_attn_bias2] - - gsrm_feature1 = self.wrap_encoder0(enc_inputs_1) - gsrm_feature2 = self.wrap_encoder1(enc_inputs_2) - - gsrm_feature2 = F.pad(gsrm_feature2, [0, 1], - value=0., - data_format="NLC") - gsrm_feature2 = gsrm_feature2[:, 1:, ] - gsrm_features = gsrm_feature1 + gsrm_feature2 - - gsrm_out = self.mul(gsrm_features) - - b, t, c = gsrm_out.shape - gsrm_out = paddle.reshape(gsrm_out, [-1, c]) - - return gsrm_features, word_out, gsrm_out - - -class VSFD(nn.Layer): - def __init__(self, in_channels=512, pvam_ch=512, char_num=38): - super(VSFD, self).__init__() - self.char_num = char_num - self.fc0 = paddle.nn.Linear( - in_features=in_channels * 2, out_features=pvam_ch) - self.fc1 = paddle.nn.Linear( - in_features=pvam_ch, out_features=self.char_num) - - def forward(self, pvam_feature, gsrm_feature): - b, t, c1 = pvam_feature.shape - b, t, c2 = gsrm_feature.shape - combine_feature_ = paddle.concat([pvam_feature, gsrm_feature], axis=2) - img_comb_feature_ = paddle.reshape( - combine_feature_, shape=[-1, c1 + c2]) - img_comb_feature_map = self.fc0(img_comb_feature_) - img_comb_feature_map = F.sigmoid(img_comb_feature_map) - img_comb_feature_map = paddle.reshape( - img_comb_feature_map, shape=[-1, t, c1]) - combine_feature = img_comb_feature_map * pvam_feature + ( - 1.0 - img_comb_feature_map) * gsrm_feature - img_comb_feature = paddle.reshape(combine_feature, shape=[-1, c1]) - - out = self.fc1(img_comb_feature) - return out - - -class SRNHead(nn.Layer): - def __init__(self, in_channels, out_channels, max_text_length, num_heads, - num_encoder_TUs, num_decoder_TUs, hidden_dims, **kwargs): - super(SRNHead, self).__init__() - self.char_num = out_channels - self.max_length = max_text_length - self.num_heads = num_heads - self.num_encoder_TUs = num_encoder_TUs - self.num_decoder_TUs = num_decoder_TUs - self.hidden_dims = hidden_dims - - self.pvam = PVAM( - in_channels=in_channels, - char_num=self.char_num, - max_text_length=self.max_length, - num_heads=self.num_heads, - num_encoder_tus=self.num_encoder_TUs, - hidden_dims=self.hidden_dims) - - self.gsrm = GSRM( - in_channels=in_channels, - char_num=self.char_num, - max_text_length=self.max_length, - num_heads=self.num_heads, - num_encoder_tus=self.num_encoder_TUs, - num_decoder_tus=self.num_decoder_TUs, - hidden_dims=self.hidden_dims) - self.vsfd = VSFD(in_channels=in_channels, char_num=self.char_num) - - self.gsrm.wrap_encoder1.prepare_decoder.emb0 = self.gsrm.wrap_encoder0.prepare_decoder.emb0 - - def forward(self, inputs, targets=None): - others = targets[-4:] - encoder_word_pos = others[0] - gsrm_word_pos = others[1] - gsrm_slf_attn_bias1 = others[2] - gsrm_slf_attn_bias2 = others[3] - - pvam_feature = self.pvam(inputs, encoder_word_pos, gsrm_word_pos) - - gsrm_feature, word_out, gsrm_out = self.gsrm( - pvam_feature, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2) - - final_out = self.vsfd(pvam_feature, gsrm_feature) - if not self.training: - final_out = F.softmax(final_out, axis=1) - - _, decoded_out = paddle.topk(final_out, k=1) - - predicts = OrderedDict([ - ('predict', final_out), - ('pvam_feature', pvam_feature), - ('decoded_out', decoded_out), - ('word_out', word_out), - ('gsrm_out', gsrm_out), - ]) - - return predicts diff --git a/backend/ppocr/modeling/heads/self_attention.py b/backend/ppocr/modeling/heads/self_attention.py deleted file mode 100644 index 6c27fdb..0000000 --- a/backend/ppocr/modeling/heads/self_attention.py +++ /dev/null @@ -1,406 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -from paddle import ParamAttr, nn -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import paddle.fluid as fluid -import numpy as np -gradient_clip = 10 - - -class WrapEncoderForFeature(nn.Layer): - def __init__(self, - src_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - weight_sharing, - bos_idx=0): - super(WrapEncoderForFeature, self).__init__() - - self.prepare_encoder = PrepareEncoder( - src_vocab_size, - d_model, - max_length, - prepostprocess_dropout, - bos_idx=bos_idx, - word_emb_param_name="src_word_emb_table") - self.encoder = Encoder(n_layer, n_head, d_key, d_value, d_model, - d_inner_hid, prepostprocess_dropout, - attention_dropout, relu_dropout, preprocess_cmd, - postprocess_cmd) - - def forward(self, enc_inputs): - conv_features, src_pos, src_slf_attn_bias = enc_inputs - enc_input = self.prepare_encoder(conv_features, src_pos) - enc_output = self.encoder(enc_input, src_slf_attn_bias) - return enc_output - - -class WrapEncoder(nn.Layer): - """ - embedder + encoder - """ - - def __init__(self, - src_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - weight_sharing, - bos_idx=0): - super(WrapEncoder, self).__init__() - - self.prepare_decoder = PrepareDecoder( - src_vocab_size, - d_model, - max_length, - prepostprocess_dropout, - bos_idx=bos_idx) - self.encoder = Encoder(n_layer, n_head, d_key, d_value, d_model, - d_inner_hid, prepostprocess_dropout, - attention_dropout, relu_dropout, preprocess_cmd, - postprocess_cmd) - - def forward(self, enc_inputs): - src_word, src_pos, src_slf_attn_bias = enc_inputs - enc_input = self.prepare_decoder(src_word, src_pos) - enc_output = self.encoder(enc_input, src_slf_attn_bias) - return enc_output - - -class Encoder(nn.Layer): - """ - encoder - """ - - def __init__(self, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd="n", - postprocess_cmd="da"): - - super(Encoder, self).__init__() - - self.encoder_layers = list() - for i in range(n_layer): - self.encoder_layers.append( - self.add_sublayer( - "layer_%d" % i, - EncoderLayer(n_head, d_key, d_value, d_model, d_inner_hid, - prepostprocess_dropout, attention_dropout, - relu_dropout, preprocess_cmd, - postprocess_cmd))) - self.processer = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - - def forward(self, enc_input, attn_bias): - for encoder_layer in self.encoder_layers: - enc_output = encoder_layer(enc_input, attn_bias) - enc_input = enc_output - enc_output = self.processer(enc_output) - return enc_output - - -class EncoderLayer(nn.Layer): - """ - EncoderLayer - """ - - def __init__(self, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd="n", - postprocess_cmd="da"): - - super(EncoderLayer, self).__init__() - self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - self.self_attn = MultiHeadAttention(d_key, d_value, d_model, n_head, - attention_dropout) - self.postprocesser1 = PrePostProcessLayer(postprocess_cmd, d_model, - prepostprocess_dropout) - - self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - self.ffn = FFN(d_inner_hid, d_model, relu_dropout) - self.postprocesser2 = PrePostProcessLayer(postprocess_cmd, d_model, - prepostprocess_dropout) - - def forward(self, enc_input, attn_bias): - attn_output = self.self_attn( - self.preprocesser1(enc_input), None, None, attn_bias) - attn_output = self.postprocesser1(attn_output, enc_input) - ffn_output = self.ffn(self.preprocesser2(attn_output)) - ffn_output = self.postprocesser2(ffn_output, attn_output) - return ffn_output - - -class MultiHeadAttention(nn.Layer): - """ - Multi-Head Attention - """ - - def __init__(self, d_key, d_value, d_model, n_head=1, dropout_rate=0.): - super(MultiHeadAttention, self).__init__() - self.n_head = n_head - self.d_key = d_key - self.d_value = d_value - self.d_model = d_model - self.dropout_rate = dropout_rate - self.q_fc = paddle.nn.Linear( - in_features=d_model, out_features=d_key * n_head, bias_attr=False) - self.k_fc = paddle.nn.Linear( - in_features=d_model, out_features=d_key * n_head, bias_attr=False) - self.v_fc = paddle.nn.Linear( - in_features=d_model, out_features=d_value * n_head, bias_attr=False) - self.proj_fc = paddle.nn.Linear( - in_features=d_value * n_head, out_features=d_model, bias_attr=False) - - def _prepare_qkv(self, queries, keys, values, cache=None): - if keys is None: # self-attention - keys, values = queries, queries - static_kv = False - else: # cross-attention - static_kv = True - - q = self.q_fc(queries) - q = paddle.reshape(x=q, shape=[0, 0, self.n_head, self.d_key]) - q = paddle.transpose(x=q, perm=[0, 2, 1, 3]) - - if cache is not None and static_kv and "static_k" in cache: - # for encoder-decoder attention in inference and has cached - k = cache["static_k"] - v = cache["static_v"] - else: - k = self.k_fc(keys) - v = self.v_fc(values) - k = paddle.reshape(x=k, shape=[0, 0, self.n_head, self.d_key]) - k = paddle.transpose(x=k, perm=[0, 2, 1, 3]) - v = paddle.reshape(x=v, shape=[0, 0, self.n_head, self.d_value]) - v = paddle.transpose(x=v, perm=[0, 2, 1, 3]) - - if cache is not None: - if static_kv and not "static_k" in cache: - # for encoder-decoder attention in inference and has not cached - cache["static_k"], cache["static_v"] = k, v - elif not static_kv: - # for decoder self-attention in inference - cache_k, cache_v = cache["k"], cache["v"] - k = paddle.concat([cache_k, k], axis=2) - v = paddle.concat([cache_v, v], axis=2) - cache["k"], cache["v"] = k, v - - return q, k, v - - def forward(self, queries, keys, values, attn_bias, cache=None): - # compute q ,k ,v - keys = queries if keys is None else keys - values = keys if values is None else values - q, k, v = self._prepare_qkv(queries, keys, values, cache) - - # scale dot product attention - product = paddle.matmul(x=q, y=k, transpose_y=True) - product = product * self.d_model**-0.5 - if attn_bias is not None: - product += attn_bias - weights = F.softmax(product) - if self.dropout_rate: - weights = F.dropout( - weights, p=self.dropout_rate, mode="downscale_in_infer") - out = paddle.matmul(weights, v) - - # combine heads - out = paddle.transpose(out, perm=[0, 2, 1, 3]) - out = paddle.reshape(x=out, shape=[0, 0, out.shape[2] * out.shape[3]]) - - # project to output - out = self.proj_fc(out) - - return out - - -class PrePostProcessLayer(nn.Layer): - """ - PrePostProcessLayer - """ - - def __init__(self, process_cmd, d_model, dropout_rate): - super(PrePostProcessLayer, self).__init__() - self.process_cmd = process_cmd - self.functors = [] - for cmd in self.process_cmd: - if cmd == "a": # add residual connection - self.functors.append(lambda x, y: x + y if y is not None else x) - elif cmd == "n": # add layer normalization - self.functors.append( - self.add_sublayer( - "layer_norm_%d" % len(self.sublayers()), - paddle.nn.LayerNorm( - normalized_shape=d_model, - weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.))))) - elif cmd == "d": # add dropout - self.functors.append(lambda x: F.dropout( - x, p=dropout_rate, mode="downscale_in_infer") - if dropout_rate else x) - - def forward(self, x, residual=None): - for i, cmd in enumerate(self.process_cmd): - if cmd == "a": - x = self.functors[i](x, residual) - else: - x = self.functors[i](x) - return x - - -class PrepareEncoder(nn.Layer): - def __init__(self, - src_vocab_size, - src_emb_dim, - src_max_len, - dropout_rate=0, - bos_idx=0, - word_emb_param_name=None, - pos_enc_param_name=None): - super(PrepareEncoder, self).__init__() - self.src_emb_dim = src_emb_dim - self.src_max_len = src_max_len - self.emb = paddle.nn.Embedding( - num_embeddings=self.src_max_len, embedding_dim=self.src_emb_dim) - self.dropout_rate = dropout_rate - - def forward(self, src_word, src_pos): - src_word_emb = src_word - src_word_emb = fluid.layers.cast(src_word_emb, 'float32') - src_word_emb = paddle.scale(x=src_word_emb, scale=self.src_emb_dim**0.5) - src_pos = paddle.squeeze(src_pos, axis=-1) - src_pos_enc = self.emb(src_pos) - src_pos_enc.stop_gradient = True - enc_input = src_word_emb + src_pos_enc - if self.dropout_rate: - out = F.dropout( - x=enc_input, p=self.dropout_rate, mode="downscale_in_infer") - else: - out = enc_input - return out - - -class PrepareDecoder(nn.Layer): - def __init__(self, - src_vocab_size, - src_emb_dim, - src_max_len, - dropout_rate=0, - bos_idx=0, - word_emb_param_name=None, - pos_enc_param_name=None): - super(PrepareDecoder, self).__init__() - self.src_emb_dim = src_emb_dim - """ - self.emb0 = Embedding(num_embeddings=src_vocab_size, - embedding_dim=src_emb_dim) - """ - self.emb0 = paddle.nn.Embedding( - num_embeddings=src_vocab_size, - embedding_dim=self.src_emb_dim, - padding_idx=bos_idx, - weight_attr=paddle.ParamAttr( - name=word_emb_param_name, - initializer=nn.initializer.Normal(0., src_emb_dim**-0.5))) - self.emb1 = paddle.nn.Embedding( - num_embeddings=src_max_len, - embedding_dim=self.src_emb_dim, - weight_attr=paddle.ParamAttr(name=pos_enc_param_name)) - self.dropout_rate = dropout_rate - - def forward(self, src_word, src_pos): - src_word = fluid.layers.cast(src_word, 'int64') - src_word = paddle.squeeze(src_word, axis=-1) - src_word_emb = self.emb0(src_word) - src_word_emb = paddle.scale(x=src_word_emb, scale=self.src_emb_dim**0.5) - src_pos = paddle.squeeze(src_pos, axis=-1) - src_pos_enc = self.emb1(src_pos) - src_pos_enc.stop_gradient = True - enc_input = src_word_emb + src_pos_enc - if self.dropout_rate: - out = F.dropout( - x=enc_input, p=self.dropout_rate, mode="downscale_in_infer") - else: - out = enc_input - return out - - -class FFN(nn.Layer): - """ - Feed-Forward Network - """ - - def __init__(self, d_inner_hid, d_model, dropout_rate): - super(FFN, self).__init__() - self.dropout_rate = dropout_rate - self.fc1 = paddle.nn.Linear( - in_features=d_model, out_features=d_inner_hid) - self.fc2 = paddle.nn.Linear( - in_features=d_inner_hid, out_features=d_model) - - def forward(self, x): - hidden = self.fc1(x) - hidden = F.relu(hidden) - if self.dropout_rate: - hidden = F.dropout( - hidden, p=self.dropout_rate, mode="downscale_in_infer") - out = self.fc2(hidden) - return out diff --git a/backend/ppocr/modeling/heads/table_att_head.py b/backend/ppocr/modeling/heads/table_att_head.py deleted file mode 100644 index e354f40..0000000 --- a/backend/ppocr/modeling/heads/table_att_head.py +++ /dev/null @@ -1,246 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F -import numpy as np - - -class TableAttentionHead(nn.Layer): - def __init__(self, - in_channels, - hidden_size, - loc_type, - in_max_len=488, - max_text_length=100, - max_elem_length=800, - max_cell_num=500, - **kwargs): - super(TableAttentionHead, self).__init__() - self.input_size = in_channels[-1] - self.hidden_size = hidden_size - self.elem_num = 30 - self.max_text_length = max_text_length - self.max_elem_length = max_elem_length - self.max_cell_num = max_cell_num - - self.structure_attention_cell = AttentionGRUCell( - self.input_size, hidden_size, self.elem_num, use_gru=False) - self.structure_generator = nn.Linear(hidden_size, self.elem_num) - self.loc_type = loc_type - self.in_max_len = in_max_len - - if self.loc_type == 1: - self.loc_generator = nn.Linear(hidden_size, 4) - else: - if self.in_max_len == 640: - self.loc_fea_trans = nn.Linear(400, self.max_elem_length + 1) - elif self.in_max_len == 800: - self.loc_fea_trans = nn.Linear(625, self.max_elem_length + 1) - else: - self.loc_fea_trans = nn.Linear(256, self.max_elem_length + 1) - self.loc_generator = nn.Linear(self.input_size + hidden_size, 4) - - def _char_to_onehot(self, input_char, onehot_dim): - input_ont_hot = F.one_hot(input_char, onehot_dim) - return input_ont_hot - - def forward(self, inputs, targets=None): - # if and else branch are both needed when you want to assign a variable - # if you modify the var in just one branch, then the modification will not work. - fea = inputs[-1] - if len(fea.shape) == 3: - pass - else: - last_shape = int(np.prod(fea.shape[2:])) # gry added - fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], last_shape]) - fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels) - batch_size = fea.shape[0] - - hidden = paddle.zeros((batch_size, self.hidden_size)) - output_hiddens = [] - if self.training and targets is not None: - structure = targets[0] - for i in range(self.max_elem_length + 1): - elem_onehots = self._char_to_onehot( - structure[:, i], onehot_dim=self.elem_num) - (outputs, hidden), alpha = self.structure_attention_cell( - hidden, fea, elem_onehots) - output_hiddens.append(paddle.unsqueeze(outputs, axis=1)) - output = paddle.concat(output_hiddens, axis=1) - structure_probs = self.structure_generator(output) - if self.loc_type == 1: - loc_preds = self.loc_generator(output) - loc_preds = F.sigmoid(loc_preds) - else: - loc_fea = fea.transpose([0, 2, 1]) - loc_fea = self.loc_fea_trans(loc_fea) - loc_fea = loc_fea.transpose([0, 2, 1]) - loc_concat = paddle.concat([output, loc_fea], axis=2) - loc_preds = self.loc_generator(loc_concat) - loc_preds = F.sigmoid(loc_preds) - else: - temp_elem = paddle.zeros(shape=[batch_size], dtype="int32") - structure_probs = None - loc_preds = None - elem_onehots = None - outputs = None - alpha = None - max_elem_length = paddle.to_tensor(self.max_elem_length) - i = 0 - while i < max_elem_length + 1: - elem_onehots = self._char_to_onehot( - temp_elem, onehot_dim=self.elem_num) - (outputs, hidden), alpha = self.structure_attention_cell( - hidden, fea, elem_onehots) - output_hiddens.append(paddle.unsqueeze(outputs, axis=1)) - structure_probs_step = self.structure_generator(outputs) - temp_elem = structure_probs_step.argmax(axis=1, dtype="int32") - i += 1 - - output = paddle.concat(output_hiddens, axis=1) - structure_probs = self.structure_generator(output) - structure_probs = F.softmax(structure_probs) - if self.loc_type == 1: - loc_preds = self.loc_generator(output) - loc_preds = F.sigmoid(loc_preds) - else: - loc_fea = fea.transpose([0, 2, 1]) - loc_fea = self.loc_fea_trans(loc_fea) - loc_fea = loc_fea.transpose([0, 2, 1]) - loc_concat = paddle.concat([output, loc_fea], axis=2) - loc_preds = self.loc_generator(loc_concat) - loc_preds = F.sigmoid(loc_preds) - return {'structure_probs': structure_probs, 'loc_preds': loc_preds} - - -class AttentionGRUCell(nn.Layer): - def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): - super(AttentionGRUCell, self).__init__() - self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) - self.h2h = nn.Linear(hidden_size, hidden_size) - self.score = nn.Linear(hidden_size, 1, bias_attr=False) - self.rnn = nn.GRUCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - self.hidden_size = hidden_size - - def forward(self, prev_hidden, batch_H, char_onehots): - batch_H_proj = self.i2h(batch_H) - prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1) - res = paddle.add(batch_H_proj, prev_hidden_proj) - res = paddle.tanh(res) - e = self.score(res) - alpha = F.softmax(e, axis=1) - alpha = paddle.transpose(alpha, [0, 2, 1]) - context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) - concat_context = paddle.concat([context, char_onehots], 1) - cur_hidden = self.rnn(concat_context, prev_hidden) - return cur_hidden, alpha - - -class AttentionLSTM(nn.Layer): - def __init__(self, in_channels, out_channels, hidden_size, **kwargs): - super(AttentionLSTM, self).__init__() - self.input_size = in_channels - self.hidden_size = hidden_size - self.num_classes = out_channels - - self.attention_cell = AttentionLSTMCell( - in_channels, hidden_size, out_channels, use_gru=False) - self.generator = nn.Linear(hidden_size, out_channels) - - def _char_to_onehot(self, input_char, onehot_dim): - input_ont_hot = F.one_hot(input_char, onehot_dim) - return input_ont_hot - - def forward(self, inputs, targets=None, batch_max_length=25): - batch_size = inputs.shape[0] - num_steps = batch_max_length - - hidden = (paddle.zeros((batch_size, self.hidden_size)), paddle.zeros( - (batch_size, self.hidden_size))) - output_hiddens = [] - - if targets is not None: - for i in range(num_steps): - # one-hot vectors for a i-th char - char_onehots = self._char_to_onehot( - targets[:, i], onehot_dim=self.num_classes) - hidden, alpha = self.attention_cell(hidden, inputs, - char_onehots) - - hidden = (hidden[1][0], hidden[1][1]) - output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1)) - output = paddle.concat(output_hiddens, axis=1) - probs = self.generator(output) - - else: - targets = paddle.zeros(shape=[batch_size], dtype="int32") - probs = None - - for i in range(num_steps): - char_onehots = self._char_to_onehot( - targets, onehot_dim=self.num_classes) - hidden, alpha = self.attention_cell(hidden, inputs, - char_onehots) - probs_step = self.generator(hidden[0]) - hidden = (hidden[1][0], hidden[1][1]) - if probs is None: - probs = paddle.unsqueeze(probs_step, axis=1) - else: - probs = paddle.concat( - [probs, paddle.unsqueeze( - probs_step, axis=1)], axis=1) - - next_input = probs_step.argmax(axis=1) - - targets = next_input - - return probs - - -class AttentionLSTMCell(nn.Layer): - def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): - super(AttentionLSTMCell, self).__init__() - self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) - self.h2h = nn.Linear(hidden_size, hidden_size) - self.score = nn.Linear(hidden_size, 1, bias_attr=False) - if not use_gru: - self.rnn = nn.LSTMCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - else: - self.rnn = nn.GRUCell( - input_size=input_size + num_embeddings, hidden_size=hidden_size) - - self.hidden_size = hidden_size - - def forward(self, prev_hidden, batch_H, char_onehots): - batch_H_proj = self.i2h(batch_H) - prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden[0]), axis=1) - res = paddle.add(batch_H_proj, prev_hidden_proj) - res = paddle.tanh(res) - e = self.score(res) - - alpha = F.softmax(e, axis=1) - alpha = paddle.transpose(alpha, [0, 2, 1]) - context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) - concat_context = paddle.concat([context, char_onehots], 1) - cur_hidden = self.rnn(concat_context, prev_hidden) - - return cur_hidden, alpha diff --git a/backend/ppocr/modeling/necks/__init__.py b/backend/ppocr/modeling/necks/__init__.py deleted file mode 100644 index e10b082..0000000 --- a/backend/ppocr/modeling/necks/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ['build_neck'] - - -def build_neck(config): - from .db_fpn import DBFPN, RSEFPN, LKPAN - from .east_fpn import EASTFPN - from .sast_fpn import SASTFPN - from .rnn import SequenceEncoder - from .pg_fpn import PGFPN - from .table_fpn import TableFPN - from .fpn import FPN - from .fce_fpn import FCEFPN - from .pren_fpn import PRENFPN - support_dict = [ - 'FPN', 'FCEFPN', 'LKPAN', 'DBFPN', 'RSEFPN', 'EASTFPN', 'SASTFPN', - 'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN' - ] - - module_name = config.pop('name') - assert module_name in support_dict, Exception('neck only support {}'.format( - support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/modeling/necks/db_fpn.py b/backend/ppocr/modeling/necks/db_fpn.py deleted file mode 100644 index 93ed2db..0000000 --- a/backend/ppocr/modeling/necks/db_fpn.py +++ /dev/null @@ -1,358 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..'))) - -from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule - - -class DSConv(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - padding, - stride=1, - groups=None, - if_act=True, - act="relu", - **kwargs): - super(DSConv, self).__init__() - if groups == None: - groups = in_channels - self.if_act = if_act - self.act = act - self.conv1 = nn.Conv2D( - in_channels=in_channels, - out_channels=in_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - bias_attr=False) - - self.bn1 = nn.BatchNorm(num_channels=in_channels, act=None) - - self.conv2 = nn.Conv2D( - in_channels=in_channels, - out_channels=int(in_channels * 4), - kernel_size=1, - stride=1, - bias_attr=False) - - self.bn2 = nn.BatchNorm(num_channels=int(in_channels * 4), act=None) - - self.conv3 = nn.Conv2D( - in_channels=int(in_channels * 4), - out_channels=out_channels, - kernel_size=1, - stride=1, - bias_attr=False) - self._c = [in_channels, out_channels] - if in_channels != out_channels: - self.conv_end = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - bias_attr=False) - - def forward(self, inputs): - - x = self.conv1(inputs) - x = self.bn1(x) - - x = self.conv2(x) - x = self.bn2(x) - if self.if_act: - if self.act == "relu": - x = F.relu(x) - elif self.act == "hardswish": - x = F.hardswish(x) - else: - print("The activation function({}) is selected incorrectly.". - format(self.act)) - exit() - - x = self.conv3(x) - if self._c[0] != self._c[1]: - x = x + self.conv_end(inputs) - return x - - -class DBFPN(nn.Layer): - def __init__(self, in_channels, out_channels, **kwargs): - super(DBFPN, self).__init__() - self.out_channels = out_channels - weight_attr = paddle.nn.initializer.KaimingUniform() - - self.in2_conv = nn.Conv2D( - in_channels=in_channels[0], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in3_conv = nn.Conv2D( - in_channels=in_channels[1], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in4_conv = nn.Conv2D( - in_channels=in_channels[2], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in5_conv = nn.Conv2D( - in_channels=in_channels[3], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p5_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p4_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p3_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p2_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - - def forward(self, x): - c2, c3, c4, c5 = x - - in5 = self.in5_conv(c5) - in4 = self.in4_conv(c4) - in3 = self.in3_conv(c3) - in2 = self.in2_conv(c2) - - out4 = in4 + F.upsample( - in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16 - out3 = in3 + F.upsample( - out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8 - out2 = in2 + F.upsample( - out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4 - - p5 = self.p5_conv(in5) - p4 = self.p4_conv(out4) - p3 = self.p3_conv(out3) - p2 = self.p2_conv(out2) - p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1) - p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1) - p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1) - - fuse = paddle.concat([p5, p4, p3, p2], axis=1) - return fuse - - -class RSELayer(nn.Layer): - def __init__(self, in_channels, out_channels, kernel_size, shortcut=True): - super(RSELayer, self).__init__() - weight_attr = paddle.nn.initializer.KaimingUniform() - self.out_channels = out_channels - self.in_conv = nn.Conv2D( - in_channels=in_channels, - out_channels=self.out_channels, - kernel_size=kernel_size, - padding=int(kernel_size // 2), - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.se_block = SEModule(self.out_channels) - self.shortcut = shortcut - - def forward(self, ins): - x = self.in_conv(ins) - if self.shortcut: - out = x + self.se_block(x) - else: - out = self.se_block(x) - return out - - -class RSEFPN(nn.Layer): - def __init__(self, in_channels, out_channels, shortcut=True, **kwargs): - super(RSEFPN, self).__init__() - self.out_channels = out_channels - self.ins_conv = nn.LayerList() - self.inp_conv = nn.LayerList() - - for i in range(len(in_channels)): - self.ins_conv.append( - RSELayer( - in_channels[i], - out_channels, - kernel_size=1, - shortcut=shortcut)) - self.inp_conv.append( - RSELayer( - out_channels, - out_channels // 4, - kernel_size=3, - shortcut=shortcut)) - - def forward(self, x): - c2, c3, c4, c5 = x - - in5 = self.ins_conv[3](c5) - in4 = self.ins_conv[2](c4) - in3 = self.ins_conv[1](c3) - in2 = self.ins_conv[0](c2) - - out4 = in4 + F.upsample( - in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16 - out3 = in3 + F.upsample( - out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8 - out2 = in2 + F.upsample( - out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4 - - p5 = self.inp_conv[3](in5) - p4 = self.inp_conv[2](out4) - p3 = self.inp_conv[1](out3) - p2 = self.inp_conv[0](out2) - - p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1) - p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1) - p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1) - - fuse = paddle.concat([p5, p4, p3, p2], axis=1) - return fuse - - -class LKPAN(nn.Layer): - def __init__(self, in_channels, out_channels, mode='large', **kwargs): - super(LKPAN, self).__init__() - self.out_channels = out_channels - weight_attr = paddle.nn.initializer.KaimingUniform() - - self.ins_conv = nn.LayerList() - self.inp_conv = nn.LayerList() - # pan head - self.pan_head_conv = nn.LayerList() - self.pan_lat_conv = nn.LayerList() - - if mode.lower() == 'lite': - p_layer = DSConv - elif mode.lower() == 'large': - p_layer = nn.Conv2D - else: - raise ValueError( - "mode can only be one of ['lite', 'large'], but received {}". - format(mode)) - - for i in range(len(in_channels)): - self.ins_conv.append( - nn.Conv2D( - in_channels=in_channels[i], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False)) - - self.inp_conv.append( - p_layer( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=9, - padding=4, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False)) - - if i > 0: - self.pan_head_conv.append( - nn.Conv2D( - in_channels=self.out_channels // 4, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - stride=2, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False)) - self.pan_lat_conv.append( - p_layer( - in_channels=self.out_channels // 4, - out_channels=self.out_channels // 4, - kernel_size=9, - padding=4, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False)) - - def forward(self, x): - c2, c3, c4, c5 = x - - in5 = self.ins_conv[3](c5) - in4 = self.ins_conv[2](c4) - in3 = self.ins_conv[1](c3) - in2 = self.ins_conv[0](c2) - - out4 = in4 + F.upsample( - in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16 - out3 = in3 + F.upsample( - out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8 - out2 = in2 + F.upsample( - out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4 - - f5 = self.inp_conv[3](in5) - f4 = self.inp_conv[2](out4) - f3 = self.inp_conv[1](out3) - f2 = self.inp_conv[0](out2) - - pan3 = f3 + self.pan_head_conv[0](f2) - pan4 = f4 + self.pan_head_conv[1](pan3) - pan5 = f5 + self.pan_head_conv[2](pan4) - - p2 = self.pan_lat_conv[0](f2) - p3 = self.pan_lat_conv[1](pan3) - p4 = self.pan_lat_conv[2](pan4) - p5 = self.pan_lat_conv[3](pan5) - - p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1) - p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1) - p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1) - - fuse = paddle.concat([p5, p4, p3, p2], axis=1) - return fuse diff --git a/backend/ppocr/modeling/necks/east_fpn.py b/backend/ppocr/modeling/necks/east_fpn.py deleted file mode 100644 index 120ff15..0000000 --- a/backend/ppocr/modeling/necks/east_fpn.py +++ /dev/null @@ -1,188 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class DeConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(DeConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.deconv = nn.Conv2DTranspose( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.deconv(x) - x = self.bn(x) - return x - - -class EASTFPN(nn.Layer): - def __init__(self, in_channels, model_name, **kwargs): - super(EASTFPN, self).__init__() - self.model_name = model_name - if self.model_name == "large": - self.out_channels = 128 - else: - self.out_channels = 64 - self.in_channels = in_channels[::-1] - self.h1_conv = ConvBNLayer( - in_channels=self.out_channels+self.in_channels[1], - out_channels=self.out_channels, - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="unet_h_1") - self.h2_conv = ConvBNLayer( - in_channels=self.out_channels+self.in_channels[2], - out_channels=self.out_channels, - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="unet_h_2") - self.h3_conv = ConvBNLayer( - in_channels=self.out_channels+self.in_channels[3], - out_channels=self.out_channels, - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="unet_h_3") - self.g0_deconv = DeConvBNLayer( - in_channels=self.in_channels[0], - out_channels=self.out_channels, - kernel_size=4, - stride=2, - padding=1, - if_act=True, - act='relu', - name="unet_g_0") - self.g1_deconv = DeConvBNLayer( - in_channels=self.out_channels, - out_channels=self.out_channels, - kernel_size=4, - stride=2, - padding=1, - if_act=True, - act='relu', - name="unet_g_1") - self.g2_deconv = DeConvBNLayer( - in_channels=self.out_channels, - out_channels=self.out_channels, - kernel_size=4, - stride=2, - padding=1, - if_act=True, - act='relu', - name="unet_g_2") - self.g3_conv = ConvBNLayer( - in_channels=self.out_channels, - out_channels=self.out_channels, - kernel_size=3, - stride=1, - padding=1, - if_act=True, - act='relu', - name="unet_g_3") - - def forward(self, x): - f = x[::-1] - - h = f[0] - g = self.g0_deconv(h) - h = paddle.concat([g, f[1]], axis=1) - h = self.h1_conv(h) - g = self.g1_deconv(h) - h = paddle.concat([g, f[2]], axis=1) - h = self.h2_conv(h) - g = self.g2_deconv(h) - h = paddle.concat([g, f[3]], axis=1) - h = self.h3_conv(h) - g = self.g3_conv(h) - - return g \ No newline at end of file diff --git a/backend/ppocr/modeling/necks/fce_fpn.py b/backend/ppocr/modeling/necks/fce_fpn.py deleted file mode 100644 index 954e964..0000000 --- a/backend/ppocr/modeling/necks/fce_fpn.py +++ /dev/null @@ -1,280 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py -""" - -import paddle.nn as nn -import paddle.nn.functional as F -from paddle import ParamAttr -from paddle.nn.initializer import XavierUniform -from paddle.nn.initializer import Normal -from paddle.regularizer import L2Decay - -__all__ = ['FCEFPN'] - - -class ConvNormLayer(nn.Layer): - def __init__(self, - ch_in, - ch_out, - filter_size, - stride, - groups=1, - norm_type='bn', - norm_decay=0., - norm_groups=32, - lr_scale=1., - freeze_norm=False, - initializer=Normal( - mean=0., std=0.01)): - super(ConvNormLayer, self).__init__() - assert norm_type in ['bn', 'sync_bn', 'gn'] - - bias_attr = False - - self.conv = nn.Conv2D( - in_channels=ch_in, - out_channels=ch_out, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr( - initializer=initializer, learning_rate=1.), - bias_attr=bias_attr) - - norm_lr = 0. if freeze_norm else 1. - param_attr = ParamAttr( - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay) if norm_decay is not None else None) - bias_attr = ParamAttr( - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay) if norm_decay is not None else None) - if norm_type == 'bn': - self.norm = nn.BatchNorm2D( - ch_out, weight_attr=param_attr, bias_attr=bias_attr) - elif norm_type == 'sync_bn': - self.norm = nn.SyncBatchNorm( - ch_out, weight_attr=param_attr, bias_attr=bias_attr) - elif norm_type == 'gn': - self.norm = nn.GroupNorm( - num_groups=norm_groups, - num_channels=ch_out, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs): - out = self.conv(inputs) - out = self.norm(out) - return out - - -class FCEFPN(nn.Layer): - """ - Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 - Args: - in_channels (list[int]): input channels of each level which can be - derived from the output shape of backbone by from_config - out_channels (list[int]): output channel of each level - spatial_scales (list[float]): the spatial scales between input feature - maps and original input image which can be derived from the output - shape of backbone by from_config - has_extra_convs (bool): whether to add extra conv to the last level. - default False - extra_stage (int): the number of extra stages added to the last level. - default 1 - use_c5 (bool): Whether to use c5 as the input of extra stage, - otherwise p5 is used. default True - norm_type (string|None): The normalization type in FPN module. If - norm_type is None, norm will not be used after conv and if - norm_type is string, bn, gn, sync_bn are available. default None - norm_decay (float): weight decay for normalization layer weights. - default 0. - freeze_norm (bool): whether to freeze normalization layer. - default False - relu_before_extra_convs (bool): whether to add relu before extra convs. - default False - - """ - - def __init__(self, - in_channels, - out_channels, - spatial_scales=[0.25, 0.125, 0.0625, 0.03125], - has_extra_convs=False, - extra_stage=1, - use_c5=True, - norm_type=None, - norm_decay=0., - freeze_norm=False, - relu_before_extra_convs=True): - super(FCEFPN, self).__init__() - self.out_channels = out_channels - for s in range(extra_stage): - spatial_scales = spatial_scales + [spatial_scales[-1] / 2.] - self.spatial_scales = spatial_scales - self.has_extra_convs = has_extra_convs - self.extra_stage = extra_stage - self.use_c5 = use_c5 - self.relu_before_extra_convs = relu_before_extra_convs - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - - self.lateral_convs = [] - self.fpn_convs = [] - fan = out_channels * 3 * 3 - - # stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone - # 0 <= st_stage < ed_stage <= 3 - st_stage = 4 - len(in_channels) - ed_stage = st_stage + len(in_channels) - 1 - for i in range(st_stage, ed_stage + 1): - if i == 3: - lateral_name = 'fpn_inner_res5_sum' - else: - lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2) - in_c = in_channels[i - st_stage] - if self.norm_type is not None: - lateral = self.add_sublayer( - lateral_name, - ConvNormLayer( - ch_in=in_c, - ch_out=out_channels, - filter_size=1, - stride=1, - norm_type=self.norm_type, - norm_decay=self.norm_decay, - freeze_norm=self.freeze_norm, - initializer=XavierUniform(fan_out=in_c))) - else: - lateral = self.add_sublayer( - lateral_name, - nn.Conv2D( - in_channels=in_c, - out_channels=out_channels, - kernel_size=1, - weight_attr=ParamAttr( - initializer=XavierUniform(fan_out=in_c)))) - self.lateral_convs.append(lateral) - - for i in range(st_stage, ed_stage + 1): - fpn_name = 'fpn_res{}_sum'.format(i + 2) - if self.norm_type is not None: - fpn_conv = self.add_sublayer( - fpn_name, - ConvNormLayer( - ch_in=out_channels, - ch_out=out_channels, - filter_size=3, - stride=1, - norm_type=self.norm_type, - norm_decay=self.norm_decay, - freeze_norm=self.freeze_norm, - initializer=XavierUniform(fan_out=fan))) - else: - fpn_conv = self.add_sublayer( - fpn_name, - nn.Conv2D( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - padding=1, - weight_attr=ParamAttr( - initializer=XavierUniform(fan_out=fan)))) - self.fpn_convs.append(fpn_conv) - - # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) - if self.has_extra_convs: - for i in range(self.extra_stage): - lvl = ed_stage + 1 + i - if i == 0 and self.use_c5: - in_c = in_channels[-1] - else: - in_c = out_channels - extra_fpn_name = 'fpn_{}'.format(lvl + 2) - if self.norm_type is not None: - extra_fpn_conv = self.add_sublayer( - extra_fpn_name, - ConvNormLayer( - ch_in=in_c, - ch_out=out_channels, - filter_size=3, - stride=2, - norm_type=self.norm_type, - norm_decay=self.norm_decay, - freeze_norm=self.freeze_norm, - initializer=XavierUniform(fan_out=fan))) - else: - extra_fpn_conv = self.add_sublayer( - extra_fpn_name, - nn.Conv2D( - in_channels=in_c, - out_channels=out_channels, - kernel_size=3, - stride=2, - padding=1, - weight_attr=ParamAttr( - initializer=XavierUniform(fan_out=fan)))) - self.fpn_convs.append(extra_fpn_conv) - - @classmethod - def from_config(cls, cfg, input_shape): - return { - 'in_channels': [i.channels for i in input_shape], - 'spatial_scales': [1.0 / i.stride for i in input_shape], - } - - def forward(self, body_feats): - laterals = [] - num_levels = len(body_feats) - - for i in range(num_levels): - laterals.append(self.lateral_convs[i](body_feats[i])) - - for i in range(1, num_levels): - lvl = num_levels - i - upsample = F.interpolate( - laterals[lvl], - scale_factor=2., - mode='nearest', ) - laterals[lvl - 1] += upsample - - fpn_output = [] - for lvl in range(num_levels): - fpn_output.append(self.fpn_convs[lvl](laterals[lvl])) - - if self.extra_stage > 0: - # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) - if not self.has_extra_convs: - assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs' - fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2)) - # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) - else: - if self.use_c5: - extra_source = body_feats[-1] - else: - extra_source = fpn_output[-1] - fpn_output.append(self.fpn_convs[num_levels](extra_source)) - - for i in range(1, self.extra_stage): - if self.relu_before_extra_convs: - fpn_output.append(self.fpn_convs[num_levels + i](F.relu( - fpn_output[-1]))) - else: - fpn_output.append(self.fpn_convs[num_levels + i]( - fpn_output[-1])) - return fpn_output diff --git a/backend/ppocr/modeling/necks/fpn.py b/backend/ppocr/modeling/necks/fpn.py deleted file mode 100644 index 48c85b1..0000000 --- a/backend/ppocr/modeling/necks/fpn.py +++ /dev/null @@ -1,138 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/neck/fpn.py -""" - -import paddle.nn as nn -import paddle -import math -import paddle.nn.functional as F - - -class Conv_BN_ReLU(nn.Layer): - def __init__(self, - in_planes, - out_planes, - kernel_size=1, - stride=1, - padding=0): - super(Conv_BN_ReLU, self).__init__() - self.conv = nn.Conv2D( - in_planes, - out_planes, - kernel_size=kernel_size, - stride=stride, - padding=padding, - bias_attr=False) - self.bn = nn.BatchNorm2D(out_planes, momentum=0.1) - self.relu = nn.ReLU() - - for m in self.sublayers(): - if isinstance(m, nn.Conv2D): - n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels - m.weight = paddle.create_parameter( - shape=m.weight.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Normal( - 0, math.sqrt(2. / n))) - elif isinstance(m, nn.BatchNorm2D): - m.weight = paddle.create_parameter( - shape=m.weight.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Constant(1.0)) - m.bias = paddle.create_parameter( - shape=m.bias.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Constant(0.0)) - - def forward(self, x): - return self.relu(self.bn(self.conv(x))) - - -class FPN(nn.Layer): - def __init__(self, in_channels, out_channels): - super(FPN, self).__init__() - - # Top layer - self.toplayer_ = Conv_BN_ReLU( - in_channels[3], out_channels, kernel_size=1, stride=1, padding=0) - # Lateral layers - self.latlayer1_ = Conv_BN_ReLU( - in_channels[2], out_channels, kernel_size=1, stride=1, padding=0) - - self.latlayer2_ = Conv_BN_ReLU( - in_channels[1], out_channels, kernel_size=1, stride=1, padding=0) - - self.latlayer3_ = Conv_BN_ReLU( - in_channels[0], out_channels, kernel_size=1, stride=1, padding=0) - - # Smooth layers - self.smooth1_ = Conv_BN_ReLU( - out_channels, out_channels, kernel_size=3, stride=1, padding=1) - - self.smooth2_ = Conv_BN_ReLU( - out_channels, out_channels, kernel_size=3, stride=1, padding=1) - - self.smooth3_ = Conv_BN_ReLU( - out_channels, out_channels, kernel_size=3, stride=1, padding=1) - - self.out_channels = out_channels * 4 - for m in self.sublayers(): - if isinstance(m, nn.Conv2D): - n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels - m.weight = paddle.create_parameter( - shape=m.weight.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Normal( - 0, math.sqrt(2. / n))) - elif isinstance(m, nn.BatchNorm2D): - m.weight = paddle.create_parameter( - shape=m.weight.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Constant(1.0)) - m.bias = paddle.create_parameter( - shape=m.bias.shape, - dtype='float32', - default_initializer=paddle.nn.initializer.Constant(0.0)) - - def _upsample(self, x, scale=1): - return F.upsample(x, scale_factor=scale, mode='bilinear') - - def _upsample_add(self, x, y, scale=1): - return F.upsample(x, scale_factor=scale, mode='bilinear') + y - - def forward(self, x): - f2, f3, f4, f5 = x - p5 = self.toplayer_(f5) - - f4 = self.latlayer1_(f4) - p4 = self._upsample_add(p5, f4, 2) - p4 = self.smooth1_(p4) - - f3 = self.latlayer2_(f3) - p3 = self._upsample_add(p4, f3, 2) - p3 = self.smooth2_(p3) - - f2 = self.latlayer3_(f2) - p2 = self._upsample_add(p3, f2, 2) - p2 = self.smooth3_(p2) - - p3 = self._upsample(p3, 2) - p4 = self._upsample(p4, 4) - p5 = self._upsample(p5, 8) - - fuse = paddle.concat([p2, p3, p4, p5], axis=1) - return fuse diff --git a/backend/ppocr/modeling/necks/pg_fpn.py b/backend/ppocr/modeling/necks/pg_fpn.py deleted file mode 100644 index 3f64539..0000000 --- a/backend/ppocr/modeling/necks/pg_fpn.py +++ /dev/null @@ -1,314 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=False) - - def forward(self, inputs): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class DeConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size=4, - stride=2, - padding=1, - groups=1, - if_act=True, - act=None, - name=None): - super(DeConvBNLayer, self).__init__() - - self.if_act = if_act - self.act = act - self.deconv = nn.Conv2DTranspose( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance", - use_global_stats=False) - - def forward(self, x): - x = self.deconv(x) - x = self.bn(x) - return x - - -class PGFPN(nn.Layer): - def __init__(self, in_channels, **kwargs): - super(PGFPN, self).__init__() - num_inputs = [2048, 2048, 1024, 512, 256] - num_outputs = [256, 256, 192, 192, 128] - self.out_channels = 128 - self.conv_bn_layer_1 = ConvBNLayer( - in_channels=3, - out_channels=32, - kernel_size=3, - stride=1, - act=None, - name='FPN_d1') - self.conv_bn_layer_2 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - act=None, - name='FPN_d2') - self.conv_bn_layer_3 = ConvBNLayer( - in_channels=256, - out_channels=128, - kernel_size=3, - stride=1, - act=None, - name='FPN_d3') - self.conv_bn_layer_4 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=2, - act=None, - name='FPN_d4') - self.conv_bn_layer_5 = ConvBNLayer( - in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - act='relu', - name='FPN_d5') - self.conv_bn_layer_6 = ConvBNLayer( - in_channels=64, - out_channels=128, - kernel_size=3, - stride=2, - act=None, - name='FPN_d6') - self.conv_bn_layer_7 = ConvBNLayer( - in_channels=128, - out_channels=128, - kernel_size=3, - stride=1, - act='relu', - name='FPN_d7') - self.conv_bn_layer_8 = ConvBNLayer( - in_channels=128, - out_channels=128, - kernel_size=1, - stride=1, - act=None, - name='FPN_d8') - - self.conv_h0 = ConvBNLayer( - in_channels=num_inputs[0], - out_channels=num_outputs[0], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(0)) - self.conv_h1 = ConvBNLayer( - in_channels=num_inputs[1], - out_channels=num_outputs[1], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(1)) - self.conv_h2 = ConvBNLayer( - in_channels=num_inputs[2], - out_channels=num_outputs[2], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(2)) - self.conv_h3 = ConvBNLayer( - in_channels=num_inputs[3], - out_channels=num_outputs[3], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(3)) - self.conv_h4 = ConvBNLayer( - in_channels=num_inputs[4], - out_channels=num_outputs[4], - kernel_size=1, - stride=1, - act=None, - name="conv_h{}".format(4)) - - self.dconv0 = DeConvBNLayer( - in_channels=num_outputs[0], - out_channels=num_outputs[0 + 1], - name="dconv_{}".format(0)) - self.dconv1 = DeConvBNLayer( - in_channels=num_outputs[1], - out_channels=num_outputs[1 + 1], - act=None, - name="dconv_{}".format(1)) - self.dconv2 = DeConvBNLayer( - in_channels=num_outputs[2], - out_channels=num_outputs[2 + 1], - act=None, - name="dconv_{}".format(2)) - self.dconv3 = DeConvBNLayer( - in_channels=num_outputs[3], - out_channels=num_outputs[3 + 1], - act=None, - name="dconv_{}".format(3)) - self.conv_g1 = ConvBNLayer( - in_channels=num_outputs[1], - out_channels=num_outputs[1], - kernel_size=3, - stride=1, - act='relu', - name="conv_g{}".format(1)) - self.conv_g2 = ConvBNLayer( - in_channels=num_outputs[2], - out_channels=num_outputs[2], - kernel_size=3, - stride=1, - act='relu', - name="conv_g{}".format(2)) - self.conv_g3 = ConvBNLayer( - in_channels=num_outputs[3], - out_channels=num_outputs[3], - kernel_size=3, - stride=1, - act='relu', - name="conv_g{}".format(3)) - self.conv_g4 = ConvBNLayer( - in_channels=num_outputs[4], - out_channels=num_outputs[4], - kernel_size=3, - stride=1, - act='relu', - name="conv_g{}".format(4)) - self.convf = ConvBNLayer( - in_channels=num_outputs[4], - out_channels=num_outputs[4], - kernel_size=1, - stride=1, - act=None, - name="conv_f{}".format(4)) - - def forward(self, x): - c0, c1, c2, c3, c4, c5, c6 = x - # FPN_Down_Fusion - f = [c0, c1, c2] - g = [None, None, None] - h = [None, None, None] - h[0] = self.conv_bn_layer_1(f[0]) - h[1] = self.conv_bn_layer_2(f[1]) - h[2] = self.conv_bn_layer_3(f[2]) - - g[0] = self.conv_bn_layer_4(h[0]) - g[1] = paddle.add(g[0], h[1]) - g[1] = F.relu(g[1]) - g[1] = self.conv_bn_layer_5(g[1]) - g[1] = self.conv_bn_layer_6(g[1]) - - g[2] = paddle.add(g[1], h[2]) - g[2] = F.relu(g[2]) - g[2] = self.conv_bn_layer_7(g[2]) - f_down = self.conv_bn_layer_8(g[2]) - - # FPN UP Fusion - f1 = [c6, c5, c4, c3, c2] - g = [None, None, None, None, None] - h = [None, None, None, None, None] - h[0] = self.conv_h0(f1[0]) - h[1] = self.conv_h1(f1[1]) - h[2] = self.conv_h2(f1[2]) - h[3] = self.conv_h3(f1[3]) - h[4] = self.conv_h4(f1[4]) - - g[0] = self.dconv0(h[0]) - g[1] = paddle.add(g[0], h[1]) - g[1] = F.relu(g[1]) - g[1] = self.conv_g1(g[1]) - g[1] = self.dconv1(g[1]) - - g[2] = paddle.add(g[1], h[2]) - g[2] = F.relu(g[2]) - g[2] = self.conv_g2(g[2]) - g[2] = self.dconv2(g[2]) - - g[3] = paddle.add(g[2], h[3]) - g[3] = F.relu(g[3]) - g[3] = self.conv_g3(g[3]) - g[3] = self.dconv3(g[3]) - - g[4] = paddle.add(x=g[3], y=h[4]) - g[4] = F.relu(g[4]) - g[4] = self.conv_g4(g[4]) - f_up = self.convf(g[4]) - f_common = paddle.add(f_down, f_up) - f_common = F.relu(f_common) - return f_common diff --git a/backend/ppocr/modeling/necks/pren_fpn.py b/backend/ppocr/modeling/necks/pren_fpn.py deleted file mode 100644 index afbdcea..0000000 --- a/backend/ppocr/modeling/necks/pren_fpn.py +++ /dev/null @@ -1,163 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Code is refer from: -https://github.com/RuijieJ/pren/blob/main/Nets/Aggregation.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F - - -class PoolAggregate(nn.Layer): - def __init__(self, n_r, d_in, d_middle=None, d_out=None): - super(PoolAggregate, self).__init__() - if not d_middle: - d_middle = d_in - if not d_out: - d_out = d_in - - self.d_in = d_in - self.d_middle = d_middle - self.d_out = d_out - self.act = nn.Swish() - - self.n_r = n_r - self.aggs = self._build_aggs() - - def _build_aggs(self): - aggs = [] - for i in range(self.n_r): - aggs.append( - self.add_sublayer( - '{}'.format(i), - nn.Sequential( - ('conv1', nn.Conv2D( - self.d_in, self.d_middle, 3, 2, 1, bias_attr=False) - ), ('bn1', nn.BatchNorm(self.d_middle)), - ('act', self.act), ('conv2', nn.Conv2D( - self.d_middle, self.d_out, 3, 2, 1, bias_attr=False - )), ('bn2', nn.BatchNorm(self.d_out))))) - return aggs - - def forward(self, x): - b = x.shape[0] - outs = [] - for agg in self.aggs: - y = agg(x) - p = F.adaptive_avg_pool2d(y, 1) - outs.append(p.reshape((b, 1, self.d_out))) - out = paddle.concat(outs, 1) - return out - - -class WeightAggregate(nn.Layer): - def __init__(self, n_r, d_in, d_middle=None, d_out=None): - super(WeightAggregate, self).__init__() - if not d_middle: - d_middle = d_in - if not d_out: - d_out = d_in - - self.n_r = n_r - self.d_out = d_out - self.act = nn.Swish() - - self.conv_n = nn.Sequential( - ('conv1', nn.Conv2D( - d_in, d_in, 3, 1, 1, - bias_attr=False)), ('bn1', nn.BatchNorm(d_in)), - ('act1', self.act), ('conv2', nn.Conv2D( - d_in, n_r, 1, bias_attr=False)), ('bn2', nn.BatchNorm(n_r)), - ('act2', nn.Sigmoid())) - self.conv_d = nn.Sequential( - ('conv1', nn.Conv2D( - d_in, d_middle, 3, 1, 1, - bias_attr=False)), ('bn1', nn.BatchNorm(d_middle)), - ('act1', self.act), ('conv2', nn.Conv2D( - d_middle, d_out, 1, - bias_attr=False)), ('bn2', nn.BatchNorm(d_out))) - - def forward(self, x): - b, _, h, w = x.shape - - hmaps = self.conv_n(x) - fmaps = self.conv_d(x) - r = paddle.bmm( - hmaps.reshape((b, self.n_r, h * w)), - fmaps.reshape((b, self.d_out, h * w)).transpose((0, 2, 1))) - return r - - -class GCN(nn.Layer): - def __init__(self, d_in, n_in, d_out=None, n_out=None, dropout=0.1): - super(GCN, self).__init__() - if not d_out: - d_out = d_in - if not n_out: - n_out = d_in - - self.conv_n = nn.Conv1D(n_in, n_out, 1) - self.linear = nn.Linear(d_in, d_out) - self.dropout = nn.Dropout(dropout) - self.act = nn.Swish() - - def forward(self, x): - x = self.conv_n(x) - x = self.dropout(self.linear(x)) - return self.act(x) - - -class PRENFPN(nn.Layer): - def __init__(self, in_channels, n_r, d_model, max_len, dropout): - super(PRENFPN, self).__init__() - assert len(in_channels) == 3, "in_channels' length must be 3." - c1, c2, c3 = in_channels # the depths are from big to small - # build fpn - assert d_model % 3 == 0, "{} can't be divided by 3.".format(d_model) - self.agg_p1 = PoolAggregate(n_r, c1, d_out=d_model // 3) - self.agg_p2 = PoolAggregate(n_r, c2, d_out=d_model // 3) - self.agg_p3 = PoolAggregate(n_r, c3, d_out=d_model // 3) - - self.agg_w1 = WeightAggregate(n_r, c1, 4 * c1, d_model // 3) - self.agg_w2 = WeightAggregate(n_r, c2, 4 * c2, d_model // 3) - self.agg_w3 = WeightAggregate(n_r, c3, 4 * c3, d_model // 3) - - self.gcn_pool = GCN(d_model, n_r, d_model, max_len, dropout) - self.gcn_weight = GCN(d_model, n_r, d_model, max_len, dropout) - - self.out_channels = d_model - - def forward(self, inputs): - f3, f5, f7 = inputs - - rp1 = self.agg_p1(f3) - rp2 = self.agg_p2(f5) - rp3 = self.agg_p3(f7) - rp = paddle.concat([rp1, rp2, rp3], 2) # [b,nr,d] - - rw1 = self.agg_w1(f3) - rw2 = self.agg_w2(f5) - rw3 = self.agg_w3(f7) - rw = paddle.concat([rw1, rw2, rw3], 2) # [b,nr,d] - - y1 = self.gcn_pool(rp) - y2 = self.gcn_weight(rw) - y = 0.5 * (y1 + y2) - return y # [b,max_len,d] diff --git a/backend/ppocr/modeling/necks/rnn.py b/backend/ppocr/modeling/necks/rnn.py deleted file mode 100644 index c8a774b..0000000 --- a/backend/ppocr/modeling/necks/rnn.py +++ /dev/null @@ -1,191 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn - -from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr -from ppocr.modeling.backbones.rec_svtrnet import Block, ConvBNLayer, trunc_normal_, zeros_, ones_ - - -class Im2Seq(nn.Layer): - def __init__(self, in_channels, **kwargs): - super().__init__() - self.out_channels = in_channels - - def forward(self, x): - B, C, H, W = x.shape - assert H == 1 - x = x.squeeze(axis=2) - x = x.transpose([0, 2, 1]) # (NTC)(batch, width, channels) - return x - - -class EncoderWithRNN(nn.Layer): - def __init__(self, in_channels, hidden_size): - super(EncoderWithRNN, self).__init__() - self.out_channels = hidden_size * 2 - self.lstm = nn.LSTM( - in_channels, hidden_size, direction='bidirectional', num_layers=2) - - def forward(self, x): - x, _ = self.lstm(x) - return x - - -class EncoderWithFC(nn.Layer): - def __init__(self, in_channels, hidden_size): - super(EncoderWithFC, self).__init__() - self.out_channels = hidden_size - weight_attr, bias_attr = get_para_bias_attr( - l2_decay=0.00001, k=in_channels) - self.fc = nn.Linear( - in_channels, - hidden_size, - weight_attr=weight_attr, - bias_attr=bias_attr, - name='reduce_encoder_fea') - - def forward(self, x): - x = self.fc(x) - return x - - -class EncoderWithSVTR(nn.Layer): - def __init__( - self, - in_channels, - dims=64, # XS - depth=2, - hidden_dims=120, - use_guide=False, - num_heads=8, - qkv_bias=True, - mlp_ratio=2.0, - drop_rate=0.1, - attn_drop_rate=0.1, - drop_path=0., - qk_scale=None): - super(EncoderWithSVTR, self).__init__() - self.depth = depth - self.use_guide = use_guide - self.conv1 = ConvBNLayer( - in_channels, in_channels // 8, padding=1, act=nn.Swish) - self.conv2 = ConvBNLayer( - in_channels // 8, hidden_dims, kernel_size=1, act=nn.Swish) - - self.svtr_block = nn.LayerList([ - Block( - dim=hidden_dims, - num_heads=num_heads, - mixer='Global', - HW=None, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - act_layer=nn.Swish, - attn_drop=attn_drop_rate, - drop_path=drop_path, - norm_layer='nn.LayerNorm', - epsilon=1e-05, - prenorm=False) for i in range(depth) - ]) - self.norm = nn.LayerNorm(hidden_dims, epsilon=1e-6) - self.conv3 = ConvBNLayer( - hidden_dims, in_channels, kernel_size=1, act=nn.Swish) - # last conv-nxn, the input is concat of input tensor and conv3 output tensor - self.conv4 = ConvBNLayer( - 2 * in_channels, in_channels // 8, padding=1, act=nn.Swish) - - self.conv1x1 = ConvBNLayer( - in_channels // 8, dims, kernel_size=1, act=nn.Swish) - self.out_channels = dims - self.apply(self._init_weights) - - def _init_weights(self, m): - if isinstance(m, nn.Linear): - trunc_normal_(m.weight) - if isinstance(m, nn.Linear) and m.bias is not None: - zeros_(m.bias) - elif isinstance(m, nn.LayerNorm): - zeros_(m.bias) - ones_(m.weight) - - def forward(self, x): - # for use guide - if self.use_guide: - z = x.clone() - z.stop_gradient = True - else: - z = x - # for short cut - h = z - # reduce dim - z = self.conv1(z) - z = self.conv2(z) - # SVTR global block - B, C, H, W = z.shape - z = z.flatten(2).transpose([0, 2, 1]) - for blk in self.svtr_block: - z = blk(z) - z = self.norm(z) - # last stage - z = z.reshape([0, H, W, C]).transpose([0, 3, 1, 2]) - z = self.conv3(z) - z = paddle.concat((h, z), axis=1) - z = self.conv1x1(self.conv4(z)) - return z - - -class SequenceEncoder(nn.Layer): - def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs): - super(SequenceEncoder, self).__init__() - self.encoder_reshape = Im2Seq(in_channels) - self.out_channels = self.encoder_reshape.out_channels - self.encoder_type = encoder_type - if encoder_type == 'reshape': - self.only_reshape = True - else: - support_encoder_dict = { - 'reshape': Im2Seq, - 'fc': EncoderWithFC, - 'rnn': EncoderWithRNN, - 'svtr': EncoderWithSVTR - } - assert encoder_type in support_encoder_dict, '{} must in {}'.format( - encoder_type, support_encoder_dict.keys()) - if encoder_type == "svtr": - self.encoder = support_encoder_dict[encoder_type]( - self.encoder_reshape.out_channels, **kwargs) - else: - self.encoder = support_encoder_dict[encoder_type]( - self.encoder_reshape.out_channels, hidden_size) - self.out_channels = self.encoder.out_channels - self.only_reshape = False - - def forward(self, x): - if self.encoder_type != 'svtr': - x = self.encoder_reshape(x) - if not self.only_reshape: - x = self.encoder(x) - return x - else: - x = self.encoder(x) - x = self.encoder_reshape(x) - return x diff --git a/backend/ppocr/modeling/necks/sast_fpn.py b/backend/ppocr/modeling/necks/sast_fpn.py deleted file mode 100644 index 9b60245..0000000 --- a/backend/ppocr/modeling/necks/sast_fpn.py +++ /dev/null @@ -1,284 +0,0 @@ -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class DeConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - groups=1, - if_act=True, - act=None, - name=None): - super(DeConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.deconv = nn.Conv2DTranspose( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=act, - param_attr=ParamAttr(name="bn_" + name + "_scale"), - bias_attr=ParamAttr(name="bn_" + name + "_offset"), - moving_mean_name="bn_" + name + "_mean", - moving_variance_name="bn_" + name + "_variance") - - def forward(self, x): - x = self.deconv(x) - x = self.bn(x) - return x - - -class FPN_Up_Fusion(nn.Layer): - def __init__(self, in_channels): - super(FPN_Up_Fusion, self).__init__() - in_channels = in_channels[::-1] - out_channels = [256, 256, 192, 192, 128] - - self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 1, 1, act=None, name='fpn_up_h0') - self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 1, 1, act=None, name='fpn_up_h1') - self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 1, 1, act=None, name='fpn_up_h2') - self.h3_conv = ConvBNLayer(in_channels[3], out_channels[3], 1, 1, act=None, name='fpn_up_h3') - self.h4_conv = ConvBNLayer(in_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_h4') - - self.g0_conv = DeConvBNLayer(out_channels[0], out_channels[1], 4, 2, act=None, name='fpn_up_g0') - - self.g1_conv = nn.Sequential( - ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_up_g1_1'), - DeConvBNLayer(out_channels[1], out_channels[2], 4, 2, act=None, name='fpn_up_g1_2') - ) - self.g2_conv = nn.Sequential( - ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_up_g2_1'), - DeConvBNLayer(out_channels[2], out_channels[3], 4, 2, act=None, name='fpn_up_g2_2') - ) - self.g3_conv = nn.Sequential( - ConvBNLayer(out_channels[3], out_channels[3], 3, 1, act='relu', name='fpn_up_g3_1'), - DeConvBNLayer(out_channels[3], out_channels[4], 4, 2, act=None, name='fpn_up_g3_2') - ) - - self.g4_conv = nn.Sequential( - ConvBNLayer(out_channels[4], out_channels[4], 3, 1, act='relu', name='fpn_up_fusion_1'), - ConvBNLayer(out_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_fusion_2') - ) - - def _add_relu(self, x1, x2): - x = paddle.add(x=x1, y=x2) - x = F.relu(x) - return x - - def forward(self, x): - f = x[2:][::-1] - h0 = self.h0_conv(f[0]) - h1 = self.h1_conv(f[1]) - h2 = self.h2_conv(f[2]) - h3 = self.h3_conv(f[3]) - h4 = self.h4_conv(f[4]) - - g0 = self.g0_conv(h0) - g1 = self._add_relu(g0, h1) - g1 = self.g1_conv(g1) - g2 = self.g2_conv(self._add_relu(g1, h2)) - g3 = self.g3_conv(self._add_relu(g2, h3)) - g4 = self.g4_conv(self._add_relu(g3, h4)) - - return g4 - - -class FPN_Down_Fusion(nn.Layer): - def __init__(self, in_channels): - super(FPN_Down_Fusion, self).__init__() - out_channels = [32, 64, 128] - - self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 3, 1, act=None, name='fpn_down_h0') - self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 3, 1, act=None, name='fpn_down_h1') - self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 3, 1, act=None, name='fpn_down_h2') - - self.g0_conv = ConvBNLayer(out_channels[0], out_channels[1], 3, 2, act=None, name='fpn_down_g0') - - self.g1_conv = nn.Sequential( - ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_down_g1_1'), - ConvBNLayer(out_channels[1], out_channels[2], 3, 2, act=None, name='fpn_down_g1_2') - ) - - self.g2_conv = nn.Sequential( - ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_down_fusion_1'), - ConvBNLayer(out_channels[2], out_channels[2], 1, 1, act=None, name='fpn_down_fusion_2') - ) - - def forward(self, x): - f = x[:3] - h0 = self.h0_conv(f[0]) - h1 = self.h1_conv(f[1]) - h2 = self.h2_conv(f[2]) - g0 = self.g0_conv(h0) - g1 = paddle.add(x=g0, y=h1) - g1 = F.relu(g1) - g1 = self.g1_conv(g1) - g2 = paddle.add(x=g1, y=h2) - g2 = F.relu(g2) - g2 = self.g2_conv(g2) - return g2 - - -class Cross_Attention(nn.Layer): - def __init__(self, in_channels): - super(Cross_Attention, self).__init__() - self.theta_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_theta') - self.phi_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_phi') - self.g_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_g') - - self.fh_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_weight') - self.fh_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_sc') - - self.fv_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_weight') - self.fv_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_sc') - - self.f_attn_conv = ConvBNLayer(in_channels * 2, in_channels, 1, 1, act='relu', name='f_attn') - - def _cal_fweight(self, f, shape): - f_theta, f_phi, f_g = f - #flatten - f_theta = paddle.transpose(f_theta, [0, 2, 3, 1]) - f_theta = paddle.reshape(f_theta, [shape[0] * shape[1], shape[2], 128]) - f_phi = paddle.transpose(f_phi, [0, 2, 3, 1]) - f_phi = paddle.reshape(f_phi, [shape[0] * shape[1], shape[2], 128]) - f_g = paddle.transpose(f_g, [0, 2, 3, 1]) - f_g = paddle.reshape(f_g, [shape[0] * shape[1], shape[2], 128]) - #correlation - f_attn = paddle.matmul(f_theta, paddle.transpose(f_phi, [0, 2, 1])) - #scale - f_attn = f_attn / (128**0.5) - f_attn = F.softmax(f_attn) - #weighted sum - f_weight = paddle.matmul(f_attn, f_g) - f_weight = paddle.reshape( - f_weight, [shape[0], shape[1], shape[2], 128]) - return f_weight - - def forward(self, f_common): - f_shape = paddle.shape(f_common) - # print('f_shape: ', f_shape) - - f_theta = self.theta_conv(f_common) - f_phi = self.phi_conv(f_common) - f_g = self.g_conv(f_common) - - ######## horizon ######## - fh_weight = self._cal_fweight([f_theta, f_phi, f_g], - [f_shape[0], f_shape[2], f_shape[3]]) - fh_weight = paddle.transpose(fh_weight, [0, 3, 1, 2]) - fh_weight = self.fh_weight_conv(fh_weight) - #short cut - fh_sc = self.fh_sc_conv(f_common) - f_h = F.relu(fh_weight + fh_sc) - - ######## vertical ######## - fv_theta = paddle.transpose(f_theta, [0, 1, 3, 2]) - fv_phi = paddle.transpose(f_phi, [0, 1, 3, 2]) - fv_g = paddle.transpose(f_g, [0, 1, 3, 2]) - fv_weight = self._cal_fweight([fv_theta, fv_phi, fv_g], - [f_shape[0], f_shape[3], f_shape[2]]) - fv_weight = paddle.transpose(fv_weight, [0, 3, 2, 1]) - fv_weight = self.fv_weight_conv(fv_weight) - #short cut - fv_sc = self.fv_sc_conv(f_common) - f_v = F.relu(fv_weight + fv_sc) - - ######## merge ######## - f_attn = paddle.concat([f_h, f_v], axis=1) - f_attn = self.f_attn_conv(f_attn) - return f_attn - - -class SASTFPN(nn.Layer): - def __init__(self, in_channels, with_cab=False, **kwargs): - super(SASTFPN, self).__init__() - self.in_channels = in_channels - self.with_cab = with_cab - self.FPN_Down_Fusion = FPN_Down_Fusion(self.in_channels) - self.FPN_Up_Fusion = FPN_Up_Fusion(self.in_channels) - self.out_channels = 128 - self.cross_attention = Cross_Attention(self.out_channels) - - def forward(self, x): - #down fpn - f_down = self.FPN_Down_Fusion(x) - - #up fpn - f_up = self.FPN_Up_Fusion(x) - - #fusion - f_common = paddle.add(x=f_down, y=f_up) - f_common = F.relu(f_common) - - if self.with_cab: - # print('enhence f_common with CAB.') - f_common = self.cross_attention(f_common) - - return f_common diff --git a/backend/ppocr/modeling/necks/table_fpn.py b/backend/ppocr/modeling/necks/table_fpn.py deleted file mode 100644 index 734f15a..0000000 --- a/backend/ppocr/modeling/necks/table_fpn.py +++ /dev/null @@ -1,110 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - - -class TableFPN(nn.Layer): - def __init__(self, in_channels, out_channels, **kwargs): - super(TableFPN, self).__init__() - self.out_channels = 512 - weight_attr = paddle.nn.initializer.KaimingUniform() - self.in2_conv = nn.Conv2D( - in_channels=in_channels[0], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in3_conv = nn.Conv2D( - in_channels=in_channels[1], - out_channels=self.out_channels, - kernel_size=1, - stride = 1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in4_conv = nn.Conv2D( - in_channels=in_channels[2], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.in5_conv = nn.Conv2D( - in_channels=in_channels[3], - out_channels=self.out_channels, - kernel_size=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p5_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p4_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p3_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.p2_conv = nn.Conv2D( - in_channels=self.out_channels, - out_channels=self.out_channels // 4, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), - bias_attr=False) - self.fuse_conv = nn.Conv2D( - in_channels=self.out_channels * 4, - out_channels=512, - kernel_size=3, - padding=1, - weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False) - - def forward(self, x): - c2, c3, c4, c5 = x - - in5 = self.in5_conv(c5) - in4 = self.in4_conv(c4) - in3 = self.in3_conv(c3) - in2 = self.in2_conv(c2) - - out4 = in4 + F.upsample( - in5, size=in4.shape[2:4], mode="nearest", align_mode=1) # 1/16 - out3 = in3 + F.upsample( - out4, size=in3.shape[2:4], mode="nearest", align_mode=1) # 1/8 - out2 = in2 + F.upsample( - out3, size=in2.shape[2:4], mode="nearest", align_mode=1) # 1/4 - - p4 = F.upsample(out4, size=in5.shape[2:4], mode="nearest", align_mode=1) - p3 = F.upsample(out3, size=in5.shape[2:4], mode="nearest", align_mode=1) - p2 = F.upsample(out2, size=in5.shape[2:4], mode="nearest", align_mode=1) - fuse = paddle.concat([in5, p4, p3, p2], axis=1) - fuse_conv = self.fuse_conv(fuse) * 0.005 - return [c5 + fuse_conv] diff --git a/backend/ppocr/modeling/transforms/__init__.py b/backend/ppocr/modeling/transforms/__init__.py deleted file mode 100755 index 405ab3c..0000000 --- a/backend/ppocr/modeling/transforms/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ['build_transform'] - - -def build_transform(config): - from .tps import TPS - from .stn import STN_ON - - support_dict = ['TPS', 'STN_ON'] - - module_name = config.pop('name') - assert module_name in support_dict, Exception( - 'transform only support {}'.format(support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/modeling/transforms/stn.py b/backend/ppocr/modeling/transforms/stn.py deleted file mode 100644 index 6f2bdda..0000000 --- a/backend/ppocr/modeling/transforms/stn.py +++ /dev/null @@ -1,135 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/stn_head.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import numpy as np - -from .tps_spatial_transformer import TPSSpatialTransformer - - -def conv3x3_block(in_channels, out_channels, stride=1): - n = 3 * 3 * out_channels - w = math.sqrt(2. / n) - conv_layer = nn.Conv2D( - in_channels, - out_channels, - kernel_size=3, - stride=stride, - padding=1, - weight_attr=nn.initializer.Normal( - mean=0.0, std=w), - bias_attr=nn.initializer.Constant(0)) - block = nn.Sequential(conv_layer, nn.BatchNorm2D(out_channels), nn.ReLU()) - return block - - -class STN(nn.Layer): - def __init__(self, in_channels, num_ctrlpoints, activation='none'): - super(STN, self).__init__() - self.in_channels = in_channels - self.num_ctrlpoints = num_ctrlpoints - self.activation = activation - self.stn_convnet = nn.Sequential( - conv3x3_block(in_channels, 32), #32x64 - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(32, 64), #16x32 - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(64, 128), # 8*16 - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(128, 256), # 4*8 - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(256, 256), # 2*4, - nn.MaxPool2D( - kernel_size=2, stride=2), - conv3x3_block(256, 256)) # 1*2 - self.stn_fc1 = nn.Sequential( - nn.Linear( - 2 * 256, - 512, - weight_attr=nn.initializer.Normal(0, 0.001), - bias_attr=nn.initializer.Constant(0)), - nn.BatchNorm1D(512), - nn.ReLU()) - fc2_bias = self.init_stn() - self.stn_fc2 = nn.Linear( - 512, - num_ctrlpoints * 2, - weight_attr=nn.initializer.Constant(0.0), - bias_attr=nn.initializer.Assign(fc2_bias)) - - def init_stn(self): - margin = 0.01 - sampling_num_per_side = int(self.num_ctrlpoints / 2) - ctrl_pts_x = np.linspace(margin, 1. - margin, sampling_num_per_side) - ctrl_pts_y_top = np.ones(sampling_num_per_side) * margin - ctrl_pts_y_bottom = np.ones(sampling_num_per_side) * (1 - margin) - ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) - ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) - ctrl_points = np.concatenate( - [ctrl_pts_top, ctrl_pts_bottom], axis=0).astype(np.float32) - if self.activation == 'none': - pass - elif self.activation == 'sigmoid': - ctrl_points = -np.log(1. / ctrl_points - 1.) - ctrl_points = paddle.to_tensor(ctrl_points) - fc2_bias = paddle.reshape( - ctrl_points, shape=[ctrl_points.shape[0] * ctrl_points.shape[1]]) - return fc2_bias - - def forward(self, x): - x = self.stn_convnet(x) - batch_size, _, h, w = x.shape - x = paddle.reshape(x, shape=(batch_size, -1)) - img_feat = self.stn_fc1(x) - x = self.stn_fc2(0.1 * img_feat) - if self.activation == 'sigmoid': - x = F.sigmoid(x) - x = paddle.reshape(x, shape=[-1, self.num_ctrlpoints, 2]) - return img_feat, x - - -class STN_ON(nn.Layer): - def __init__(self, in_channels, tps_inputsize, tps_outputsize, - num_control_points, tps_margins, stn_activation): - super(STN_ON, self).__init__() - self.tps = TPSSpatialTransformer( - output_image_size=tuple(tps_outputsize), - num_control_points=num_control_points, - margins=tuple(tps_margins)) - self.stn_head = STN(in_channels=in_channels, - num_ctrlpoints=num_control_points, - activation=stn_activation) - self.tps_inputsize = tps_inputsize - self.out_channels = in_channels - - def forward(self, image): - stn_input = paddle.nn.functional.interpolate( - image, self.tps_inputsize, mode="bilinear", align_corners=True) - stn_img_feat, ctrl_points = self.stn_head(stn_input) - x, _ = self.tps(image, ctrl_points) - return x diff --git a/backend/ppocr/modeling/transforms/tps.py b/backend/ppocr/modeling/transforms/tps.py deleted file mode 100644 index 9bdab0f..0000000 --- a/backend/ppocr/modeling/transforms/tps.py +++ /dev/null @@ -1,308 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/clovaai/deep-text-recognition-benchmark/blob/master/modules/transformation.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import numpy as np - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - bn_name = "bn_" + name - self.bn = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - return x - - -class LocalizationNetwork(nn.Layer): - def __init__(self, in_channels, num_fiducial, loc_lr, model_name): - super(LocalizationNetwork, self).__init__() - self.F = num_fiducial - F = num_fiducial - if model_name == "large": - num_filters_list = [64, 128, 256, 512] - fc_dim = 256 - else: - num_filters_list = [16, 32, 64, 128] - fc_dim = 64 - - self.block_list = [] - for fno in range(0, len(num_filters_list)): - num_filters = num_filters_list[fno] - name = "loc_conv%d" % fno - conv = self.add_sublayer( - name, - ConvBNLayer( - in_channels=in_channels, - out_channels=num_filters, - kernel_size=3, - act='relu', - name=name)) - self.block_list.append(conv) - if fno == len(num_filters_list) - 1: - pool = nn.AdaptiveAvgPool2D(1) - else: - pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - in_channels = num_filters - self.block_list.append(pool) - name = "loc_fc1" - stdv = 1.0 / math.sqrt(num_filters_list[-1] * 1.0) - self.fc1 = nn.Linear( - in_channels, - fc_dim, - weight_attr=ParamAttr( - learning_rate=loc_lr, - name=name + "_w", - initializer=nn.initializer.Uniform(-stdv, stdv)), - bias_attr=ParamAttr(name=name + '.b_0'), - name=name) - - # Init fc2 in LocalizationNetwork - initial_bias = self.get_initial_fiducials() - initial_bias = initial_bias.reshape(-1) - name = "loc_fc2" - param_attr = ParamAttr( - learning_rate=loc_lr, - initializer=nn.initializer.Assign(np.zeros([fc_dim, F * 2])), - name=name + "_w") - bias_attr = ParamAttr( - learning_rate=loc_lr, - initializer=nn.initializer.Assign(initial_bias), - name=name + "_b") - self.fc2 = nn.Linear( - fc_dim, - F * 2, - weight_attr=param_attr, - bias_attr=bias_attr, - name=name) - self.out_channels = F * 2 - - def forward(self, x): - """ - Estimating parameters of geometric transformation - Args: - image: input - Return: - batch_C_prime: the matrix of the geometric transformation - """ - B = x.shape[0] - i = 0 - for block in self.block_list: - x = block(x) - x = x.squeeze(axis=2).squeeze(axis=2) - x = self.fc1(x) - - x = F.relu(x) - x = self.fc2(x) - x = x.reshape(shape=[-1, self.F, 2]) - return x - - def get_initial_fiducials(self): - """ see RARE paper Fig. 6 (a) """ - F = self.F - ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2)) - ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2)) - ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2)) - ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) - ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) - initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0) - return initial_bias - - -class GridGenerator(nn.Layer): - def __init__(self, in_channels, num_fiducial): - super(GridGenerator, self).__init__() - self.eps = 1e-6 - self.F = num_fiducial - - name = "ex_fc" - initializer = nn.initializer.Constant(value=0.0) - param_attr = ParamAttr( - learning_rate=0.0, initializer=initializer, name=name + "_w") - bias_attr = ParamAttr( - learning_rate=0.0, initializer=initializer, name=name + "_b") - self.fc = nn.Linear( - in_channels, - 6, - weight_attr=param_attr, - bias_attr=bias_attr, - name=name) - - def forward(self, batch_C_prime, I_r_size): - """ - Generate the grid for the grid_sampler. - Args: - batch_C_prime: the matrix of the geometric transformation - I_r_size: the shape of the input image - Return: - batch_P_prime: the grid for the grid_sampler - """ - C = self.build_C_paddle() - P = self.build_P_paddle(I_r_size) - - inv_delta_C_tensor = self.build_inv_delta_C_paddle(C).astype('float32') - P_hat_tensor = self.build_P_hat_paddle( - C, paddle.to_tensor(P)).astype('float32') - - inv_delta_C_tensor.stop_gradient = True - P_hat_tensor.stop_gradient = True - - batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime) - - batch_C_ex_part_tensor.stop_gradient = True - - batch_C_prime_with_zeros = paddle.concat( - [batch_C_prime, batch_C_ex_part_tensor], axis=1) - batch_T = paddle.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros) - batch_P_prime = paddle.matmul(P_hat_tensor, batch_T) - return batch_P_prime - - def build_C_paddle(self): - """ Return coordinates of fiducial points in I_r; C """ - F = self.F - ctrl_pts_x = paddle.linspace(-1.0, 1.0, int(F / 2), dtype='float64') - ctrl_pts_y_top = -1 * paddle.ones([int(F / 2)], dtype='float64') - ctrl_pts_y_bottom = paddle.ones([int(F / 2)], dtype='float64') - ctrl_pts_top = paddle.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) - ctrl_pts_bottom = paddle.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) - C = paddle.concat([ctrl_pts_top, ctrl_pts_bottom], axis=0) - return C # F x 2 - - def build_P_paddle(self, I_r_size): - I_r_height, I_r_width = I_r_size - I_r_grid_x = (paddle.arange( - -I_r_width, I_r_width, 2, dtype='float64') + 1.0 - ) / paddle.to_tensor(np.array([I_r_width])) - - I_r_grid_y = (paddle.arange( - -I_r_height, I_r_height, 2, dtype='float64') + 1.0 - ) / paddle.to_tensor(np.array([I_r_height])) - - # P: self.I_r_width x self.I_r_height x 2 - P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) - P = paddle.transpose(P, perm=[1, 0, 2]) - # n (= self.I_r_width x self.I_r_height) x 2 - return P.reshape([-1, 2]) - - def build_inv_delta_C_paddle(self, C): - """ Return inv_delta_C which is needed to calculate T """ - F = self.F - hat_eye = paddle.eye(F, dtype='float64') # F x F - hat_C = paddle.norm( - C.reshape([1, F, 2]) - C.reshape([F, 1, 2]), axis=2) + hat_eye - hat_C = (hat_C**2) * paddle.log(hat_C) - delta_C = paddle.concat( # F+3 x F+3 - [ - paddle.concat( - [paddle.ones( - (F, 1), dtype='float64'), C, hat_C], axis=1), # F x F+3 - paddle.concat( - [ - paddle.zeros( - (2, 3), dtype='float64'), paddle.transpose( - C, perm=[1, 0]) - ], - axis=1), # 2 x F+3 - paddle.concat( - [ - paddle.zeros( - (1, 3), dtype='float64'), paddle.ones( - (1, F), dtype='float64') - ], - axis=1) # 1 x F+3 - ], - axis=0) - inv_delta_C = paddle.inverse(delta_C) - return inv_delta_C # F+3 x F+3 - - def build_P_hat_paddle(self, C, P): - F = self.F - eps = self.eps - n = P.shape[0] # n (= self.I_r_width x self.I_r_height) - # P_tile: n x 2 -> n x 1 x 2 -> n x F x 2 - P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1)) - C_tile = paddle.unsqueeze(C, axis=0) # 1 x F x 2 - P_diff = P_tile - C_tile # n x F x 2 - # rbf_norm: n x F - rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False) - - # rbf: n x F - rbf = paddle.multiply( - paddle.square(rbf_norm), paddle.log(rbf_norm + eps)) - P_hat = paddle.concat( - [paddle.ones( - (n, 1), dtype='float64'), P, rbf], axis=1) - return P_hat # n x F+3 - - def get_expand_tensor(self, batch_C_prime): - B, H, C = batch_C_prime.shape - batch_C_prime = batch_C_prime.reshape([B, H * C]) - batch_C_ex_part_tensor = self.fc(batch_C_prime) - batch_C_ex_part_tensor = batch_C_ex_part_tensor.reshape([-1, 3, 2]) - return batch_C_ex_part_tensor - - -class TPS(nn.Layer): - def __init__(self, in_channels, num_fiducial, loc_lr, model_name): - super(TPS, self).__init__() - self.loc_net = LocalizationNetwork(in_channels, num_fiducial, loc_lr, - model_name) - self.grid_generator = GridGenerator(self.loc_net.out_channels, - num_fiducial) - self.out_channels = in_channels - - def forward(self, image): - image.stop_gradient = False - batch_C_prime = self.loc_net(image) - batch_P_prime = self.grid_generator(batch_C_prime, image.shape[2:]) - batch_P_prime = batch_P_prime.reshape( - [-1, image.shape[2], image.shape[3], 2]) - batch_I_r = F.grid_sample(x=image, grid=batch_P_prime) - return batch_I_r diff --git a/backend/ppocr/modeling/transforms/tps_spatial_transformer.py b/backend/ppocr/modeling/transforms/tps_spatial_transformer.py deleted file mode 100644 index cb1cb10..0000000 --- a/backend/ppocr/modeling/transforms/tps_spatial_transformer.py +++ /dev/null @@ -1,156 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/tps_spatial_transformer.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import paddle -from paddle import nn, ParamAttr -from paddle.nn import functional as F -import numpy as np -import itertools - - -def grid_sample(input, grid, canvas=None): - input.stop_gradient = False - output = F.grid_sample(input, grid) - if canvas is None: - return output - else: - input_mask = paddle.ones(shape=input.shape) - output_mask = F.grid_sample(input_mask, grid) - padded_output = output * output_mask + canvas * (1 - output_mask) - return padded_output - - -# phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2 -def compute_partial_repr(input_points, control_points): - N = input_points.shape[0] - M = control_points.shape[0] - pairwise_diff = paddle.reshape( - input_points, shape=[N, 1, 2]) - paddle.reshape( - control_points, shape=[1, M, 2]) - # original implementation, very slow - # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance - pairwise_diff_square = pairwise_diff * pairwise_diff - pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, - 1] - repr_matrix = 0.5 * pairwise_dist * paddle.log(pairwise_dist) - # fix numerical error for 0 * log(0), substitute all nan with 0 - mask = np.array(repr_matrix != repr_matrix) - repr_matrix[mask] = 0 - return repr_matrix - - -# output_ctrl_pts are specified, according to our task. -def build_output_control_points(num_control_points, margins): - margin_x, margin_y = margins - num_ctrl_pts_per_side = num_control_points // 2 - ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side) - ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y - ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y) - ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) - ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) - output_ctrl_pts_arr = np.concatenate( - [ctrl_pts_top, ctrl_pts_bottom], axis=0) - output_ctrl_pts = paddle.to_tensor(output_ctrl_pts_arr) - return output_ctrl_pts - - -class TPSSpatialTransformer(nn.Layer): - def __init__(self, - output_image_size=None, - num_control_points=None, - margins=None): - super(TPSSpatialTransformer, self).__init__() - self.output_image_size = output_image_size - self.num_control_points = num_control_points - self.margins = margins - - self.target_height, self.target_width = output_image_size - target_control_points = build_output_control_points(num_control_points, - margins) - N = num_control_points - - # create padded kernel matrix - forward_kernel = paddle.zeros(shape=[N + 3, N + 3]) - target_control_partial_repr = compute_partial_repr( - target_control_points, target_control_points) - target_control_partial_repr = paddle.cast(target_control_partial_repr, - forward_kernel.dtype) - forward_kernel[:N, :N] = target_control_partial_repr - forward_kernel[:N, -3] = 1 - forward_kernel[-3, :N] = 1 - target_control_points = paddle.cast(target_control_points, - forward_kernel.dtype) - forward_kernel[:N, -2:] = target_control_points - forward_kernel[-2:, :N] = paddle.transpose( - target_control_points, perm=[1, 0]) - # compute inverse matrix - inverse_kernel = paddle.inverse(forward_kernel) - - # create target cordinate matrix - HW = self.target_height * self.target_width - target_coordinate = list( - itertools.product( - range(self.target_height), range(self.target_width))) - target_coordinate = paddle.to_tensor(target_coordinate) # HW x 2 - Y, X = paddle.split( - target_coordinate, target_coordinate.shape[1], axis=1) - Y = Y / (self.target_height - 1) - X = X / (self.target_width - 1) - target_coordinate = paddle.concat( - [X, Y], axis=1) # convert from (y, x) to (x, y) - target_coordinate_partial_repr = compute_partial_repr( - target_coordinate, target_control_points) - target_coordinate_repr = paddle.concat( - [ - target_coordinate_partial_repr, paddle.ones(shape=[HW, 1]), - target_coordinate - ], - axis=1) - - # register precomputed matrices - self.inverse_kernel = inverse_kernel - self.padding_matrix = paddle.zeros(shape=[3, 2]) - self.target_coordinate_repr = target_coordinate_repr - self.target_control_points = target_control_points - - def forward(self, input, source_control_points): - assert source_control_points.ndimension() == 3 - assert source_control_points.shape[1] == self.num_control_points - assert source_control_points.shape[2] == 2 - batch_size = paddle.shape(source_control_points)[0] - - padding_matrix = paddle.expand( - self.padding_matrix, shape=[batch_size, 3, 2]) - Y = paddle.concat([source_control_points, padding_matrix], 1) - mapping_matrix = paddle.matmul(self.inverse_kernel, Y) - source_coordinate = paddle.matmul(self.target_coordinate_repr, - mapping_matrix) - - grid = paddle.reshape( - source_coordinate, - shape=[-1, self.target_height, self.target_width, 2]) - grid = paddle.clip(grid, 0, - 1) # the source_control_points may be out of [0, 1]. - # the input to grid_sample is normalized [-1, 1], but what we get is [0, 1] - grid = 2.0 * grid - 1.0 - output_maps = grid_sample(input, grid, canvas=None) - return output_maps, source_coordinate diff --git a/backend/ppocr/optimizer/__init__.py b/backend/ppocr/optimizer/__init__.py deleted file mode 100644 index a6bd2eb..0000000 --- a/backend/ppocr/optimizer/__init__.py +++ /dev/null @@ -1,62 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -import copy -import paddle - -__all__ = ['build_optimizer'] - - -def build_lr_scheduler(lr_config, epochs, step_each_epoch): - from . import learning_rate - lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch}) - lr_name = lr_config.pop('name', 'Const') - lr = getattr(learning_rate, lr_name)(**lr_config)() - return lr - - -def build_optimizer(config, epochs, step_each_epoch, model): - from . import regularizer, optimizer - config = copy.deepcopy(config) - # step1 build lr - lr = build_lr_scheduler(config.pop('lr'), epochs, step_each_epoch) - - # step2 build regularization - if 'regularizer' in config and config['regularizer'] is not None: - reg_config = config.pop('regularizer') - reg_name = reg_config.pop('name') - if not hasattr(regularizer, reg_name): - reg_name += 'Decay' - reg = getattr(regularizer, reg_name)(**reg_config)() - elif 'weight_decay' in config: - reg = config.pop('weight_decay') - else: - reg = None - - # step3 build optimizer - optim_name = config.pop('name') - if 'clip_norm' in config: - clip_norm = config.pop('clip_norm') - grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm) - else: - grad_clip = None - optim = getattr(optimizer, optim_name)(learning_rate=lr, - weight_decay=reg, - grad_clip=grad_clip, - **config) - return optim(model), lr diff --git a/backend/ppocr/optimizer/learning_rate.py b/backend/ppocr/optimizer/learning_rate.py deleted file mode 100644 index fe251f3..0000000 --- a/backend/ppocr/optimizer/learning_rate.py +++ /dev/null @@ -1,310 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from paddle.optimizer import lr -from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay - - -class Linear(object): - """ - Linear learning rate decay - Args: - lr (float): The initial learning rate. It is a python float number. - epochs(int): The decay step size. It determines the decay cycle. - end_lr(float, optional): The minimum final learning rate. Default: 0.0001. - power(float, optional): Power of polynomial. Default: 1.0. - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - epochs, - step_each_epoch, - end_lr=0.0, - power=1.0, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Linear, self).__init__() - self.learning_rate = learning_rate - self.epochs = epochs * step_each_epoch - self.end_lr = end_lr - self.power = power - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = lr.PolynomialDecay( - learning_rate=self.learning_rate, - decay_steps=self.epochs, - end_lr=self.end_lr, - power=self.power, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate - - -class Cosine(object): - """ - Cosine learning rate decay - lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1) - Args: - lr(float): initial learning rate - step_each_epoch(int): steps each epoch - epochs(int): total training epochs - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - step_each_epoch, - epochs, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Cosine, self).__init__() - self.learning_rate = learning_rate - self.T_max = step_each_epoch * epochs - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = lr.CosineAnnealingDecay( - learning_rate=self.learning_rate, - T_max=self.T_max, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate - - -class Step(object): - """ - Piecewise learning rate decay - Args: - step_each_epoch(int): steps each epoch - learning_rate (float): The initial learning rate. It is a python float number. - step_size (int): the interval to update. - gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . - It should be less than 1.0. Default: 0.1. - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - step_size, - step_each_epoch, - gamma, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Step, self).__init__() - self.step_size = step_each_epoch * step_size - self.learning_rate = learning_rate - self.gamma = gamma - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = lr.StepDecay( - learning_rate=self.learning_rate, - step_size=self.step_size, - gamma=self.gamma, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate - - -class Piecewise(object): - """ - Piecewise learning rate decay - Args: - boundaries(list): A list of steps numbers. The type of element in the list is python int. - values(list): A list of learning rate values that will be picked during different epoch boundaries. - The type of element in the list is python float. - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - step_each_epoch, - decay_epochs, - values, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Piecewise, self).__init__() - self.boundaries = [step_each_epoch * e for e in decay_epochs] - self.values = values - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = lr.PiecewiseDecay( - boundaries=self.boundaries, - values=self.values, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.values[0], - last_epoch=self.last_epoch) - return learning_rate - - -class CyclicalCosine(object): - """ - Cyclical cosine learning rate decay - Args: - learning_rate(float): initial learning rate - step_each_epoch(int): steps each epoch - epochs(int): total training epochs - cycle(int): period of the cosine learning rate - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - step_each_epoch, - epochs, - cycle, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(CyclicalCosine, self).__init__() - self.learning_rate = learning_rate - self.T_max = step_each_epoch * epochs - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - self.cycle = round(cycle * step_each_epoch) - - def __call__(self): - learning_rate = CyclicalCosineDecay( - learning_rate=self.learning_rate, - T_max=self.T_max, - cycle=self.cycle, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate - - -class OneCycle(object): - """ - One Cycle learning rate decay - Args: - max_lr(float): Upper learning rate boundaries - epochs(int): total training epochs - step_each_epoch(int): steps each epoch - anneal_strategy(str): {‘cos’, ‘linear’} Specifies the annealing strategy: “cos” for cosine annealing, “linear” for linear annealing. - Default: ‘cos’ - three_phase(bool): If True, use a third phase of the schedule to annihilate the learning rate according to ‘final_div_factor’ - instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by ‘pct_start’). - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - max_lr, - epochs, - step_each_epoch, - anneal_strategy='cos', - three_phase=False, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(OneCycle, self).__init__() - self.max_lr = max_lr - self.epochs = epochs - self.steps_per_epoch = step_each_epoch - self.anneal_strategy = anneal_strategy - self.three_phase = three_phase - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = OneCycleDecay( - max_lr=self.max_lr, - epochs=self.epochs, - steps_per_epoch=self.steps_per_epoch, - anneal_strategy=self.anneal_strategy, - three_phase=self.three_phase, - last_epoch=self.last_epoch) - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.max_lr, - last_epoch=self.last_epoch) - return learning_rate - - -class Const(object): - """ - Const learning rate decay - Args: - learning_rate(float): initial learning rate - step_each_epoch(int): steps each epoch - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - """ - - def __init__(self, - learning_rate, - step_each_epoch, - warmup_epoch=0, - last_epoch=-1, - **kwargs): - super(Const, self).__init__() - self.learning_rate = learning_rate - self.last_epoch = last_epoch - self.warmup_epoch = round(warmup_epoch * step_each_epoch) - - def __call__(self): - learning_rate = self.learning_rate - if self.warmup_epoch > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_epoch, - start_lr=0.0, - end_lr=self.learning_rate, - last_epoch=self.last_epoch) - return learning_rate diff --git a/backend/ppocr/optimizer/lr_scheduler.py b/backend/ppocr/optimizer/lr_scheduler.py deleted file mode 100644 index f62f1f3..0000000 --- a/backend/ppocr/optimizer/lr_scheduler.py +++ /dev/null @@ -1,162 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -from paddle.optimizer.lr import LRScheduler - - -class CyclicalCosineDecay(LRScheduler): - def __init__(self, - learning_rate, - T_max, - cycle=1, - last_epoch=-1, - eta_min=0.0, - verbose=False): - """ - Cyclical cosine learning rate decay - A learning rate which can be referred in https://arxiv.org/pdf/2012.12645.pdf - Args: - learning rate(float): learning rate - T_max(int): maximum epoch num - cycle(int): period of the cosine decay - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - eta_min(float): minimum learning rate during training - verbose(bool): whether to print learning rate for each epoch - """ - super(CyclicalCosineDecay, self).__init__(learning_rate, last_epoch, - verbose) - self.cycle = cycle - self.eta_min = eta_min - - def get_lr(self): - if self.last_epoch == 0: - return self.base_lr - reletive_epoch = self.last_epoch % self.cycle - lr = self.eta_min + 0.5 * (self.base_lr - self.eta_min) * \ - (1 + math.cos(math.pi * reletive_epoch / self.cycle)) - return lr - - -class OneCycleDecay(LRScheduler): - """ - One Cycle learning rate decay - A learning rate which can be referred in https://arxiv.org/abs/1708.07120 - Code refered in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR - """ - - def __init__(self, - max_lr, - epochs=None, - steps_per_epoch=None, - pct_start=0.3, - anneal_strategy='cos', - div_factor=25., - final_div_factor=1e4, - three_phase=False, - last_epoch=-1, - verbose=False): - - # Validate total_steps - if epochs <= 0 or not isinstance(epochs, int): - raise ValueError( - "Expected positive integer epochs, but got {}".format(epochs)) - if steps_per_epoch <= 0 or not isinstance(steps_per_epoch, int): - raise ValueError( - "Expected positive integer steps_per_epoch, but got {}".format( - steps_per_epoch)) - self.total_steps = epochs * steps_per_epoch - - self.max_lr = max_lr - self.initial_lr = self.max_lr / div_factor - self.min_lr = self.initial_lr / final_div_factor - - if three_phase: - self._schedule_phases = [ - { - 'end_step': float(pct_start * self.total_steps) - 1, - 'start_lr': self.initial_lr, - 'end_lr': self.max_lr, - }, - { - 'end_step': float(2 * pct_start * self.total_steps) - 2, - 'start_lr': self.max_lr, - 'end_lr': self.initial_lr, - }, - { - 'end_step': self.total_steps - 1, - 'start_lr': self.initial_lr, - 'end_lr': self.min_lr, - }, - ] - else: - self._schedule_phases = [ - { - 'end_step': float(pct_start * self.total_steps) - 1, - 'start_lr': self.initial_lr, - 'end_lr': self.max_lr, - }, - { - 'end_step': self.total_steps - 1, - 'start_lr': self.max_lr, - 'end_lr': self.min_lr, - }, - ] - - # Validate pct_start - if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): - raise ValueError( - "Expected float between 0 and 1 pct_start, but got {}".format( - pct_start)) - - # Validate anneal_strategy - if anneal_strategy not in ['cos', 'linear']: - raise ValueError( - "anneal_strategy must by one of 'cos' or 'linear', instead got {}". - format(anneal_strategy)) - elif anneal_strategy == 'cos': - self.anneal_func = self._annealing_cos - elif anneal_strategy == 'linear': - self.anneal_func = self._annealing_linear - - super(OneCycleDecay, self).__init__(max_lr, last_epoch, verbose) - - def _annealing_cos(self, start, end, pct): - "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0." - cos_out = math.cos(math.pi * pct) + 1 - return end + (start - end) / 2.0 * cos_out - - def _annealing_linear(self, start, end, pct): - "Linearly anneal from `start` to `end` as pct goes from 0.0 to 1.0." - return (end - start) * pct + start - - def get_lr(self): - computed_lr = 0.0 - step_num = self.last_epoch - - if step_num > self.total_steps: - raise ValueError( - "Tried to step {} times. The specified number of total steps is {}" - .format(step_num + 1, self.total_steps)) - start_step = 0 - for i, phase in enumerate(self._schedule_phases): - end_step = phase['end_step'] - if step_num <= end_step or i == len(self._schedule_phases) - 1: - pct = (step_num - start_step) / (end_step - start_step) - computed_lr = self.anneal_func(phase['start_lr'], - phase['end_lr'], pct) - break - start_step = phase['end_step'] - - return computed_lr diff --git a/backend/ppocr/optimizer/optimizer.py b/backend/ppocr/optimizer/optimizer.py deleted file mode 100644 index dd8544e..0000000 --- a/backend/ppocr/optimizer/optimizer.py +++ /dev/null @@ -1,234 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from paddle import optimizer as optim - - -class Momentum(object): - """ - Simple Momentum optimizer with velocity state. - Args: - learning_rate (float|Variable) - The learning rate used to update parameters. - Can be a float value or a Variable with one float value as data element. - momentum (float) - Momentum factor. - regularization (WeightDecayRegularizer, optional) - The strategy of regularization. - """ - - def __init__(self, - learning_rate, - momentum, - weight_decay=None, - grad_clip=None, - **args): - super(Momentum, self).__init__() - self.learning_rate = learning_rate - self.momentum = momentum - self.weight_decay = weight_decay - self.grad_clip = grad_clip - - def __call__(self, model): - train_params = [ - param for param in model.parameters() if param.trainable is True - ] - opt = optim.Momentum( - learning_rate=self.learning_rate, - momentum=self.momentum, - weight_decay=self.weight_decay, - grad_clip=self.grad_clip, - parameters=train_params) - return opt - - -class Adam(object): - def __init__(self, - learning_rate=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - parameter_list=None, - weight_decay=None, - grad_clip=None, - name=None, - lazy_mode=False, - **kwargs): - self.learning_rate = learning_rate - self.beta1 = beta1 - self.beta2 = beta2 - self.epsilon = epsilon - self.parameter_list = parameter_list - self.learning_rate = learning_rate - self.weight_decay = weight_decay - self.grad_clip = grad_clip - self.name = name - self.lazy_mode = lazy_mode - - def __call__(self, model): - train_params = [ - param for param in model.parameters() if param.trainable is True - ] - opt = optim.Adam( - learning_rate=self.learning_rate, - beta1=self.beta1, - beta2=self.beta2, - epsilon=self.epsilon, - weight_decay=self.weight_decay, - grad_clip=self.grad_clip, - name=self.name, - lazy_mode=self.lazy_mode, - parameters=train_params) - return opt - - -class RMSProp(object): - """ - Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method. - Args: - learning_rate (float|Variable) - The learning rate used to update parameters. - Can be a float value or a Variable with one float value as data element. - momentum (float) - Momentum factor. - rho (float) - rho value in equation. - epsilon (float) - avoid division by zero, default is 1e-6. - regularization (WeightDecayRegularizer, optional) - The strategy of regularization. - """ - - def __init__(self, - learning_rate, - momentum=0.0, - rho=0.95, - epsilon=1e-6, - weight_decay=None, - grad_clip=None, - **args): - super(RMSProp, self).__init__() - self.learning_rate = learning_rate - self.momentum = momentum - self.rho = rho - self.epsilon = epsilon - self.weight_decay = weight_decay - self.grad_clip = grad_clip - - def __call__(self, model): - train_params = [ - param for param in model.parameters() if param.trainable is True - ] - opt = optim.RMSProp( - learning_rate=self.learning_rate, - momentum=self.momentum, - rho=self.rho, - epsilon=self.epsilon, - weight_decay=self.weight_decay, - grad_clip=self.grad_clip, - parameters=train_params) - return opt - - -class Adadelta(object): - def __init__(self, - learning_rate=0.001, - epsilon=1e-08, - rho=0.95, - parameter_list=None, - weight_decay=None, - grad_clip=None, - name=None, - **kwargs): - self.learning_rate = learning_rate - self.epsilon = epsilon - self.rho = rho - self.parameter_list = parameter_list - self.learning_rate = learning_rate - self.weight_decay = weight_decay - self.grad_clip = grad_clip - self.name = name - - def __call__(self, model): - train_params = [ - param for param in model.parameters() if param.trainable is True - ] - opt = optim.Adadelta( - learning_rate=self.learning_rate, - epsilon=self.epsilon, - rho=self.rho, - weight_decay=self.weight_decay, - grad_clip=self.grad_clip, - name=self.name, - parameters=train_params) - return opt - - -class AdamW(object): - def __init__(self, - learning_rate=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-8, - weight_decay=0.01, - multi_precision=False, - grad_clip=None, - no_weight_decay_name=None, - one_dim_param_no_weight_decay=False, - name=None, - lazy_mode=False, - **args): - super().__init__() - self.learning_rate = learning_rate - self.beta1 = beta1 - self.beta2 = beta2 - self.epsilon = epsilon - self.grad_clip = grad_clip - self.weight_decay = 0.01 if weight_decay is None else weight_decay - self.grad_clip = grad_clip - self.name = name - self.lazy_mode = lazy_mode - self.multi_precision = multi_precision - self.no_weight_decay_name_list = no_weight_decay_name.split( - ) if no_weight_decay_name else [] - self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay - - def __call__(self, model): - parameters = [ - param for param in model.parameters() if param.trainable is True - ] - - self.no_weight_decay_param_name_list = [ - p.name for n, p in model.named_parameters() - if any(nd in n for nd in self.no_weight_decay_name_list) - ] - - if self.one_dim_param_no_weight_decay: - self.no_weight_decay_param_name_list += [ - p.name for n, p in model.named_parameters() if len(p.shape) == 1 - ] - - opt = optim.AdamW( - learning_rate=self.learning_rate, - beta1=self.beta1, - beta2=self.beta2, - epsilon=self.epsilon, - parameters=parameters, - weight_decay=self.weight_decay, - multi_precision=self.multi_precision, - grad_clip=self.grad_clip, - name=self.name, - lazy_mode=self.lazy_mode, - apply_decay_param_fun=self._apply_decay_param_fun) - return opt - - def _apply_decay_param_fun(self, name): - return name not in self.no_weight_decay_param_name_list diff --git a/backend/ppocr/optimizer/regularizer.py b/backend/ppocr/optimizer/regularizer.py deleted file mode 100644 index 2ce68f7..0000000 --- a/backend/ppocr/optimizer/regularizer.py +++ /dev/null @@ -1,51 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle - - -class L1Decay(object): - """ - L1 Weight Decay Regularization, which encourages the weights to be sparse. - Args: - factor(float): regularization coeff. Default:0.0. - """ - - def __init__(self, factor=0.0): - super(L1Decay, self).__init__() - self.coeff = factor - - def __call__(self): - reg = paddle.regularizer.L1Decay(self.coeff) - return reg - - -class L2Decay(object): - """ - L2 Weight Decay Regularization, which helps to prevent the model over-fitting. - Args: - factor(float): regularization coeff. Default:0.0. - """ - - def __init__(self, factor=0.0): - super(L2Decay, self).__init__() - self.coeff = float(factor) - - def __call__(self): - return self.coeff \ No newline at end of file diff --git a/backend/ppocr/postprocess/__init__.py b/backend/ppocr/postprocess/__init__.py deleted file mode 100644 index f50b5f1..0000000 --- a/backend/ppocr/postprocess/__init__.py +++ /dev/null @@ -1,61 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import copy - -__all__ = ['build_post_process'] - -from .db_postprocess import DBPostProcess, DistillationDBPostProcess -from .east_postprocess import EASTPostProcess -from .sast_postprocess import SASTPostProcess -from .fce_postprocess import FCEPostProcess -from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, \ - DistillationCTCLabelDecode, TableLabelDecode, NRTRLabelDecode, SARLabelDecode, \ - SEEDLabelDecode, PRENLabelDecode -from .cls_postprocess import ClsPostProcess -from .pg_postprocess import PGPostProcess -from .vqa_token_ser_layoutlm_postprocess import VQASerTokenLayoutLMPostProcess -from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess - - -def build_post_process(config, global_config=None): - support_dict = [ - 'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'FCEPostProcess', - 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', - 'PGPostProcess', 'DistillationCTCLabelDecode', 'TableLabelDecode', - 'DistillationDBPostProcess', 'NRTRLabelDecode', 'SARLabelDecode', - 'SEEDLabelDecode', 'VQASerTokenLayoutLMPostProcess', - 'VQAReTokenLayoutLMPostProcess', 'PRENLabelDecode', - 'DistillationSARLabelDecode' - ] - - if config['name'] == 'PSEPostProcess': - from .pse_postprocess import PSEPostProcess - support_dict.append('PSEPostProcess') - - config = copy.deepcopy(config) - module_name = config.pop('name') - if module_name == "None": - return - if global_config is not None: - config.update(global_config) - assert module_name in support_dict, Exception( - 'post process only support {}'.format(support_dict)) - module_class = eval(module_name)(**config) - return module_class diff --git a/backend/ppocr/postprocess/cls_postprocess.py b/backend/ppocr/postprocess/cls_postprocess.py deleted file mode 100644 index 9a27ba0..0000000 --- a/backend/ppocr/postprocess/cls_postprocess.py +++ /dev/null @@ -1,42 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle - - -class ClsPostProcess(object): - """ Convert between text-label and text-index """ - - def __init__(self, label_list=None, key=None, **kwargs): - super(ClsPostProcess, self).__init__() - self.label_list = label_list - self.key = key - - def __call__(self, preds, label=None, *args, **kwargs): - if self.key is not None: - preds = preds[self.key] - - label_list = self.label_list - if label_list is None: - label_list = {idx: idx for idx in range(preds.shape[-1])} - - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - - pred_idxs = preds.argmax(axis=1) - decode_out = [(label_list[idx], preds[i, idx]) - for i, idx in enumerate(pred_idxs)] - if label is None: - return decode_out - label = [(label_list[idx], 1.0) for idx in label] - return decode_out, label diff --git a/backend/ppocr/postprocess/db_postprocess.py b/backend/ppocr/postprocess/db_postprocess.py deleted file mode 100755 index 27b428e..0000000 --- a/backend/ppocr/postprocess/db_postprocess.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refered from: -https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import cv2 -import paddle -from shapely.geometry import Polygon -import pyclipper - - -class DBPostProcess(object): - """ - The post process for Differentiable Binarization (DB). - """ - - def __init__(self, - thresh=0.3, - box_thresh=0.7, - max_candidates=1000, - unclip_ratio=2.0, - use_dilation=False, - score_mode="fast", - **kwargs): - self.thresh = thresh - self.box_thresh = box_thresh - self.max_candidates = max_candidates - self.unclip_ratio = unclip_ratio - self.min_size = 3 - self.score_mode = score_mode - assert score_mode in [ - "slow", "fast" - ], "Score mode must be in [slow, fast] but got: {}".format(score_mode) - - self.dilation_kernel = None if not use_dilation else np.array( - [[1, 1], [1, 1]]) - - def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' - _bitmap: single map with shape (1, H, W), - whose values are binarized as {0, 1} - ''' - - bitmap = _bitmap - height, width = bitmap.shape - - outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, - cv2.CHAIN_APPROX_SIMPLE) - if len(outs) == 3: - img, contours, _ = outs[0], outs[1], outs[2] - elif len(outs) == 2: - contours, _ = outs[0], outs[1] - - num_contours = min(len(contours), self.max_candidates) - - boxes = [] - scores = [] - for index in range(num_contours): - contour = contours[index] - points, sside = self.get_mini_boxes(contour) - if sside < self.min_size: - continue - points = np.array(points) - if self.score_mode == "fast": - score = self.box_score_fast(pred, points.reshape(-1, 2)) - else: - score = self.box_score_slow(pred, contour) - if self.box_thresh > score: - continue - - box = self.unclip(points).reshape(-1, 1, 2) - box, sside = self.get_mini_boxes(box) - if sside < self.min_size + 2: - continue - box = np.array(box) - - box[:, 0] = np.clip( - np.round(box[:, 0] / width * dest_width), 0, dest_width) - box[:, 1] = np.clip( - np.round(box[:, 1] / height * dest_height), 0, dest_height) - boxes.append(box.astype(np.int16)) - scores.append(score) - return np.array(boxes, dtype=np.int16), scores - - def unclip(self, box): - unclip_ratio = self.unclip_ratio - poly = Polygon(box) - distance = poly.area * unclip_ratio / poly.length - offset = pyclipper.PyclipperOffset() - offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - expanded = np.array(offset.Execute(distance)) - return expanded - - def get_mini_boxes(self, contour): - bounding_box = cv2.minAreaRect(contour) - points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) - - index_1, index_2, index_3, index_4 = 0, 1, 2, 3 - if points[1][1] > points[0][1]: - index_1 = 0 - index_4 = 1 - else: - index_1 = 1 - index_4 = 0 - if points[3][1] > points[2][1]: - index_2 = 2 - index_3 = 3 - else: - index_2 = 3 - index_3 = 2 - - box = [ - points[index_1], points[index_2], points[index_3], points[index_4] - ] - return box, min(bounding_box[1]) - - def box_score_fast(self, bitmap, _box): - ''' - box_score_fast: use bbox mean score as the mean score - ''' - h, w = bitmap.shape[:2] - box = _box.copy() - xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) - xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) - ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) - ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) - - mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) - box[:, 0] = box[:, 0] - xmin - box[:, 1] = box[:, 1] - ymin - cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) - return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - - def box_score_slow(self, bitmap, contour): - ''' - box_score_slow: use polyon mean score as the mean score - ''' - h, w = bitmap.shape[:2] - contour = contour.copy() - contour = np.reshape(contour, (-1, 2)) - - xmin = np.clip(np.min(contour[:, 0]), 0, w - 1) - xmax = np.clip(np.max(contour[:, 0]), 0, w - 1) - ymin = np.clip(np.min(contour[:, 1]), 0, h - 1) - ymax = np.clip(np.max(contour[:, 1]), 0, h - 1) - - mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) - - contour[:, 0] = contour[:, 0] - xmin - contour[:, 1] = contour[:, 1] - ymin - - cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1) - return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - - def __call__(self, outs_dict, shape_list): - pred = outs_dict['maps'] - if isinstance(pred, paddle.Tensor): - pred = pred.numpy() - pred = pred[:, 0, :, :] - segmentation = pred > self.thresh - - boxes_batch = [] - for batch_index in range(pred.shape[0]): - src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] - if self.dilation_kernel is not None: - mask = cv2.dilate( - np.array(segmentation[batch_index]).astype(np.uint8), - self.dilation_kernel) - else: - mask = segmentation[batch_index] - boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask, - src_w, src_h) - - boxes_batch.append({'points': boxes}) - return boxes_batch - - -class DistillationDBPostProcess(object): - def __init__(self, - model_name=["student"], - key=None, - thresh=0.3, - box_thresh=0.6, - max_candidates=1000, - unclip_ratio=1.5, - use_dilation=False, - score_mode="fast", - **kwargs): - self.model_name = model_name - self.key = key - self.post_process = DBPostProcess( - thresh=thresh, - box_thresh=box_thresh, - max_candidates=max_candidates, - unclip_ratio=unclip_ratio, - use_dilation=use_dilation, - score_mode=score_mode) - - def __call__(self, predicts, shape_list): - results = {} - for k in self.model_name: - results[k] = self.post_process(predicts[k], shape_list=shape_list) - return results diff --git a/backend/ppocr/postprocess/east_postprocess.py b/backend/ppocr/postprocess/east_postprocess.py deleted file mode 100755 index c194c81..0000000 --- a/backend/ppocr/postprocess/east_postprocess.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from .locality_aware_nms import nms_locality -import cv2 -import paddle - -import os -import sys - - -class EASTPostProcess(object): - """ - The post process for EAST. - """ - - def __init__(self, - score_thresh=0.8, - cover_thresh=0.1, - nms_thresh=0.2, - **kwargs): - - self.score_thresh = score_thresh - self.cover_thresh = cover_thresh - self.nms_thresh = nms_thresh - - def restore_rectangle_quad(self, origin, geometry): - """ - Restore rectangle from quadrangle. - """ - # quad - origin_concat = np.concatenate( - (origin, origin, origin, origin), axis=1) # (n, 8) - pred_quads = origin_concat - geometry - pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2) - return pred_quads - - def detect(self, - score_map, - geo_map, - score_thresh=0.8, - cover_thresh=0.1, - nms_thresh=0.2): - """ - restore text boxes from score map and geo map - """ - - score_map = score_map[0] - geo_map = np.swapaxes(geo_map, 1, 0) - geo_map = np.swapaxes(geo_map, 1, 2) - # filter the score map - xy_text = np.argwhere(score_map > score_thresh) - if len(xy_text) == 0: - return [] - # sort the text boxes via the y axis - xy_text = xy_text[np.argsort(xy_text[:, 0])] - #restore quad proposals - text_box_restored = self.restore_rectangle_quad( - xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) - boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) - boxes[:, :8] = text_box_restored.reshape((-1, 8)) - boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] - - try: - import lanms - boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh) - except: - print( - 'you should install lanms by pip3 install lanms-nova to speed up nms_locality' - ) - boxes = nms_locality(boxes.astype(np.float64), nms_thresh) - if boxes.shape[0] == 0: - return [] - # Here we filter some low score boxes by the average score map, - # this is different from the orginal paper. - for i, box in enumerate(boxes): - mask = np.zeros_like(score_map, dtype=np.uint8) - cv2.fillPoly(mask, box[:8].reshape( - (-1, 4, 2)).astype(np.int32) // 4, 1) - boxes[i, 8] = cv2.mean(score_map, mask)[0] - boxes = boxes[boxes[:, 8] > cover_thresh] - return boxes - - def sort_poly(self, p): - """ - Sort polygons. - """ - min_axis = np.argmin(np.sum(p, axis=1)) - p = p[[min_axis, (min_axis + 1) % 4,\ - (min_axis + 2) % 4, (min_axis + 3) % 4]] - if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]): - return p - else: - return p[[0, 3, 2, 1]] - - def __call__(self, outs_dict, shape_list): - score_list = outs_dict['f_score'] - geo_list = outs_dict['f_geo'] - if isinstance(score_list, paddle.Tensor): - score_list = score_list.numpy() - geo_list = geo_list.numpy() - img_num = len(shape_list) - dt_boxes_list = [] - for ino in range(img_num): - score = score_list[ino] - geo = geo_list[ino] - boxes = self.detect( - score_map=score, - geo_map=geo, - score_thresh=self.score_thresh, - cover_thresh=self.cover_thresh, - nms_thresh=self.nms_thresh) - boxes_norm = [] - if len(boxes) > 0: - h, w = score.shape[1:] - src_h, src_w, ratio_h, ratio_w = shape_list[ino] - boxes = boxes[:, :8].reshape((-1, 4, 2)) - boxes[:, :, 0] /= ratio_w - boxes[:, :, 1] /= ratio_h - for i_box, box in enumerate(boxes): - box = self.sort_poly(box.astype(np.int32)) - if np.linalg.norm(box[0] - box[1]) < 5 \ - or np.linalg.norm(box[3] - box[0]) < 5: - continue - boxes_norm.append(box) - dt_boxes_list.append({'points': np.array(boxes_norm)}) - return dt_boxes_list diff --git a/backend/ppocr/postprocess/fce_postprocess.py b/backend/ppocr/postprocess/fce_postprocess.py deleted file mode 100755 index 8e0716f..0000000 --- a/backend/ppocr/postprocess/fce_postprocess.py +++ /dev/null @@ -1,241 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/open-mmlab/mmocr/blob/v0.3.0/mmocr/models/textdet/postprocess/wrapper.py -""" - -import cv2 -import paddle -import numpy as np -from numpy.fft import ifft -from ppocr.utils.poly_nms import poly_nms, valid_boundary - - -def fill_hole(input_mask): - h, w = input_mask.shape - canvas = np.zeros((h + 2, w + 2), np.uint8) - canvas[1:h + 1, 1:w + 1] = input_mask.copy() - - mask = np.zeros((h + 4, w + 4), np.uint8) - - cv2.floodFill(canvas, mask, (0, 0), 1) - canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool) - - return ~canvas | input_mask - - -def fourier2poly(fourier_coeff, num_reconstr_points=50): - """ Inverse Fourier transform - Args: - fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1), - with n and k being candidates number and Fourier degree - respectively. - num_reconstr_points (int): Number of reconstructed polygon points. - Returns: - Polygons (ndarray): The reconstructed polygons shaped (n, n') - """ - - a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype='complex') - k = (len(fourier_coeff[0]) - 1) // 2 - - a[:, 0:k + 1] = fourier_coeff[:, k:] - a[:, -k:] = fourier_coeff[:, :k] - - poly_complex = ifft(a) * num_reconstr_points - polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2)) - polygon[:, :, 0] = poly_complex.real - polygon[:, :, 1] = poly_complex.imag - return polygon.astype('int32').reshape((len(fourier_coeff), -1)) - - -class FCEPostProcess(object): - """ - The post process for FCENet. - """ - - def __init__(self, - scales, - fourier_degree=5, - num_reconstr_points=50, - decoding_type='fcenet', - score_thr=0.3, - nms_thr=0.1, - alpha=1.0, - beta=1.0, - box_type='poly', - **kwargs): - - self.scales = scales - self.fourier_degree = fourier_degree - self.num_reconstr_points = num_reconstr_points - self.decoding_type = decoding_type - self.score_thr = score_thr - self.nms_thr = nms_thr - self.alpha = alpha - self.beta = beta - self.box_type = box_type - - def __call__(self, preds, shape_list): - score_maps = [] - for key, value in preds.items(): - if isinstance(value, paddle.Tensor): - value = value.numpy() - cls_res = value[:, :4, :, :] - reg_res = value[:, 4:, :, :] - score_maps.append([cls_res, reg_res]) - - return self.get_boundary(score_maps, shape_list) - - def resize_boundary(self, boundaries, scale_factor): - """Rescale boundaries via scale_factor. - - Args: - boundaries (list[list[float]]): The boundary list. Each boundary - with size 2k+1 with k>=4. - scale_factor(ndarray): The scale factor of size (4,). - - Returns: - boundaries (list[list[float]]): The scaled boundaries. - """ - boxes = [] - scores = [] - for b in boundaries: - sz = len(b) - valid_boundary(b, True) - scores.append(b[-1]) - b = (np.array(b[:sz - 1]) * - (np.tile(scale_factor[:2], int( - (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist() - boxes.append(np.array(b).reshape([-1, 2])) - - return np.array(boxes, dtype=np.float32), scores - - def get_boundary(self, score_maps, shape_list): - assert len(score_maps) == len(self.scales) - boundaries = [] - for idx, score_map in enumerate(score_maps): - scale = self.scales[idx] - boundaries = boundaries + self._get_boundary_single(score_map, - scale) - - # nms - boundaries = poly_nms(boundaries, self.nms_thr) - boundaries, scores = self.resize_boundary( - boundaries, (1 / shape_list[0, 2:]).tolist()[::-1]) - - boxes_batch = [dict(points=boundaries, scores=scores)] - return boxes_batch - - def _get_boundary_single(self, score_map, scale): - assert len(score_map) == 2 - assert score_map[1].shape[1] == 4 * self.fourier_degree + 2 - - return self.fcenet_decode( - preds=score_map, - fourier_degree=self.fourier_degree, - num_reconstr_points=self.num_reconstr_points, - scale=scale, - alpha=self.alpha, - beta=self.beta, - box_type=self.box_type, - score_thr=self.score_thr, - nms_thr=self.nms_thr) - - def fcenet_decode(self, - preds, - fourier_degree, - num_reconstr_points, - scale, - alpha=1.0, - beta=2.0, - box_type='poly', - score_thr=0.3, - nms_thr=0.1): - """Decoding predictions of FCENet to instances. - - Args: - preds (list(Tensor)): The head output tensors. - fourier_degree (int): The maximum Fourier transform degree k. - num_reconstr_points (int): The points number of the polygon - reconstructed from predicted Fourier coefficients. - scale (int): The down-sample scale of the prediction. - alpha (float) : The parameter to calculate final scores. Score_{final} - = (Score_{text region} ^ alpha) - * (Score_{text center region}^ beta) - beta (float) : The parameter to calculate final score. - box_type (str): Boundary encoding type 'poly' or 'quad'. - score_thr (float) : The threshold used to filter out the final - candidates. - nms_thr (float) : The threshold of nms. - - Returns: - boundaries (list[list[float]]): The instance boundary and confidence - list. - """ - assert isinstance(preds, list) - assert len(preds) == 2 - assert box_type in ['poly', 'quad'] - - cls_pred = preds[0][0] - tr_pred = cls_pred[0:2] - tcl_pred = cls_pred[2:] - - reg_pred = preds[1][0].transpose([1, 2, 0]) - x_pred = reg_pred[:, :, :2 * fourier_degree + 1] - y_pred = reg_pred[:, :, 2 * fourier_degree + 1:] - - score_pred = (tr_pred[1]**alpha) * (tcl_pred[1]**beta) - tr_pred_mask = (score_pred) > score_thr - tr_mask = fill_hole(tr_pred_mask) - - tr_contours, _ = cv2.findContours( - tr_mask.astype(np.uint8), cv2.RETR_TREE, - cv2.CHAIN_APPROX_SIMPLE) # opencv4 - - mask = np.zeros_like(tr_mask) - boundaries = [] - for cont in tr_contours: - deal_map = mask.copy().astype(np.int8) - cv2.drawContours(deal_map, [cont], -1, 1, -1) - - score_map = score_pred * deal_map - score_mask = score_map > 0 - xy_text = np.argwhere(score_mask) - dxy = xy_text[:, 1] + xy_text[:, 0] * 1j - - x, y = x_pred[score_mask], y_pred[score_mask] - c = x + y * 1j - c[:, fourier_degree] = c[:, fourier_degree] + dxy - c *= scale - - polygons = fourier2poly(c, num_reconstr_points) - score = score_map[score_mask].reshape(-1, 1) - polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr) - - boundaries = boundaries + polygons - - boundaries = poly_nms(boundaries, nms_thr) - - if box_type == 'quad': - new_boundaries = [] - for boundary in boundaries: - poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32) - score = boundary[-1] - points = cv2.boxPoints(cv2.minAreaRect(poly)) - points = np.int0(points) - new_boundaries.append(points.reshape(-1).tolist() + [score]) - boundaries = new_boundaries - - return boundaries diff --git a/backend/ppocr/postprocess/locality_aware_nms.py b/backend/ppocr/postprocess/locality_aware_nms.py deleted file mode 100644 index d305ef6..0000000 --- a/backend/ppocr/postprocess/locality_aware_nms.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -Locality aware nms. -This code is refered from: https://github.com/songdejia/EAST/blob/master/locality_aware_nms.py -""" - -import numpy as np -from shapely.geometry import Polygon - - -def intersection(g, p): - """ - Intersection. - """ - g = Polygon(g[:8].reshape((4, 2))) - p = Polygon(p[:8].reshape((4, 2))) - g = g.buffer(0) - p = p.buffer(0) - if not g.is_valid or not p.is_valid: - return 0 - inter = Polygon(g).intersection(Polygon(p)).area - union = g.area + p.area - inter - if union == 0: - return 0 - else: - return inter / union - - -def intersection_iog(g, p): - """ - Intersection_iog. - """ - g = Polygon(g[:8].reshape((4, 2))) - p = Polygon(p[:8].reshape((4, 2))) - if not g.is_valid or not p.is_valid: - return 0 - inter = Polygon(g).intersection(Polygon(p)).area - #union = g.area + p.area - inter - union = p.area - if union == 0: - print("p_area is very small") - return 0 - else: - return inter / union - - -def weighted_merge(g, p): - """ - Weighted merge. - """ - g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8]) - g[8] = (g[8] + p[8]) - return g - - -def standard_nms(S, thres): - """ - Standard nms. - """ - order = np.argsort(S[:, 8])[::-1] - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) - - inds = np.where(ovr <= thres)[0] - order = order[inds + 1] - - return S[keep] - - -def standard_nms_inds(S, thres): - """ - Standard nms, retun inds. - """ - order = np.argsort(S[:, 8])[::-1] - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) - - inds = np.where(ovr <= thres)[0] - order = order[inds + 1] - - return keep - - -def nms(S, thres): - """ - nms. - """ - order = np.argsort(S[:, 8])[::-1] - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) - - inds = np.where(ovr <= thres)[0] - order = order[inds + 1] - - return keep - - -def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2): - """ - soft_nms - :para boxes_in, N x 9 (coords + score) - :para threshould, eliminate cases min score(0.001) - :para Nt_thres, iou_threshi - :para sigma, gaussian weght - :method, linear or gaussian - """ - boxes = boxes_in.copy() - N = boxes.shape[0] - if N is None or N < 1: - return np.array([]) - pos, maxpos = 0, 0 - weight = 0.0 - inds = np.arange(N) - tbox, sbox = boxes[0].copy(), boxes[0].copy() - for i in range(N): - maxscore = boxes[i, 8] - maxpos = i - tbox = boxes[i].copy() - ti = inds[i] - pos = i + 1 - #get max box - while pos < N: - if maxscore < boxes[pos, 8]: - maxscore = boxes[pos, 8] - maxpos = pos - pos = pos + 1 - #add max box as a detection - boxes[i, :] = boxes[maxpos, :] - inds[i] = inds[maxpos] - #swap - boxes[maxpos, :] = tbox - inds[maxpos] = ti - tbox = boxes[i].copy() - pos = i + 1 - #NMS iteration - while pos < N: - sbox = boxes[pos].copy() - ts_iou_val = intersection(tbox, sbox) - if ts_iou_val > 0: - if method == 1: - if ts_iou_val > Nt_thres: - weight = 1 - ts_iou_val - else: - weight = 1 - elif method == 2: - weight = np.exp(-1.0 * ts_iou_val**2 / sigma) - else: - if ts_iou_val > Nt_thres: - weight = 0 - else: - weight = 1 - boxes[pos, 8] = weight * boxes[pos, 8] - #if box score falls below thresold, discard the box by - #swaping last box update N - if boxes[pos, 8] < threshold: - boxes[pos, :] = boxes[N - 1, :] - inds[pos] = inds[N - 1] - N = N - 1 - pos = pos - 1 - pos = pos + 1 - - return boxes[:N] - - -def nms_locality(polys, thres=0.3): - """ - locality aware nms of EAST - :param polys: a N*9 numpy array. first 8 coordinates, then prob - :return: boxes after nms - """ - S = [] - p = None - for g in polys: - if p is not None and intersection(g, p) > thres: - p = weighted_merge(g, p) - else: - if p is not None: - S.append(p) - p = g - if p is not None: - S.append(p) - - if len(S) == 0: - return np.array([]) - return standard_nms(np.array(S), thres) - - -if __name__ == '__main__': - # 343,350,448,135,474,143,369,359 - print( - Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]])) - .area) \ No newline at end of file diff --git a/backend/ppocr/postprocess/pg_postprocess.py b/backend/ppocr/postprocess/pg_postprocess.py deleted file mode 100644 index 0b14551..0000000 --- a/backend/ppocr/postprocess/pg_postprocess.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys - -__dir__ = os.path.dirname(__file__) -sys.path.append(__dir__) -sys.path.append(os.path.join(__dir__, '..')) -from ppocr.utils.e2e_utils.pgnet_pp_utils import PGNet_PostProcess - - -class PGPostProcess(object): - """ - The post process for PGNet. - """ - - def __init__(self, character_dict_path, valid_set, score_thresh, mode, - **kwargs): - self.character_dict_path = character_dict_path - self.valid_set = valid_set - self.score_thresh = score_thresh - self.mode = mode - - # c++ la-nms is faster, but only support python 3.5 - self.is_python35 = False - if sys.version_info.major == 3 and sys.version_info.minor == 5: - self.is_python35 = True - - def __call__(self, outs_dict, shape_list): - post = PGNet_PostProcess(self.character_dict_path, self.valid_set, - self.score_thresh, outs_dict, shape_list) - if self.mode == 'fast': - data = post.pg_postprocess_fast() - else: - data = post.pg_postprocess_slow() - return data diff --git a/backend/ppocr/postprocess/pse_postprocess/__init__.py b/backend/ppocr/postprocess/pse_postprocess/__init__.py deleted file mode 100644 index 680473b..0000000 --- a/backend/ppocr/postprocess/pse_postprocess/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .pse_postprocess import PSEPostProcess \ No newline at end of file diff --git a/backend/ppocr/postprocess/pse_postprocess/pse/README.md b/backend/ppocr/postprocess/pse_postprocess/pse/README.md deleted file mode 100644 index 6a19d5d..0000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse/README.md +++ /dev/null @@ -1,6 +0,0 @@ -## 编译 -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/post_processing/pse -```python -python3 setup.py build_ext --inplace -``` diff --git a/backend/ppocr/postprocess/pse_postprocess/pse/__init__.py b/backend/ppocr/postprocess/pse_postprocess/pse/__init__.py deleted file mode 100644 index 1903a91..0000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import os -import subprocess - -python_path = sys.executable - -ori_path = os.getcwd() -os.chdir('ppocr/postprocess/pse_postprocess/pse') -if subprocess.call( - '{} setup.py build_ext --inplace'.format(python_path), shell=True) != 0: - raise RuntimeError( - 'Cannot compile pse: {}, if your system is windows, you need to install all the default components of `desktop development using C++` in visual studio 2019+'. - format(os.path.dirname(os.path.realpath(__file__)))) -os.chdir(ori_path) - -from .pse import pse diff --git a/backend/ppocr/postprocess/pse_postprocess/pse/pse.pyx b/backend/ppocr/postprocess/pse_postprocess/pse/pse.pyx deleted file mode 100644 index b2be49e..0000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse/pse.pyx +++ /dev/null @@ -1,70 +0,0 @@ - -import numpy as np -import cv2 -cimport numpy as np -cimport cython -cimport libcpp -cimport libcpp.pair -cimport libcpp.queue -from libcpp.pair cimport * -from libcpp.queue cimport * - -@cython.boundscheck(False) -@cython.wraparound(False) -cdef np.ndarray[np.int32_t, ndim=2] _pse(np.ndarray[np.uint8_t, ndim=3] kernels, - np.ndarray[np.int32_t, ndim=2] label, - int kernel_num, - int label_num, - float min_area=0): - cdef np.ndarray[np.int32_t, ndim=2] pred - pred = np.zeros((label.shape[0], label.shape[1]), dtype=np.int32) - - for label_idx in range(1, label_num): - if np.sum(label == label_idx) < min_area: - label[label == label_idx] = 0 - - cdef libcpp.queue.queue[libcpp.pair.pair[np.int16_t,np.int16_t]] que = \ - queue[libcpp.pair.pair[np.int16_t,np.int16_t]]() - cdef libcpp.queue.queue[libcpp.pair.pair[np.int16_t,np.int16_t]] nxt_que = \ - queue[libcpp.pair.pair[np.int16_t,np.int16_t]]() - cdef np.int16_t* dx = [-1, 1, 0, 0] - cdef np.int16_t* dy = [0, 0, -1, 1] - cdef np.int16_t tmpx, tmpy - - points = np.array(np.where(label > 0)).transpose((1, 0)) - for point_idx in range(points.shape[0]): - tmpx, tmpy = points[point_idx, 0], points[point_idx, 1] - que.push(pair[np.int16_t,np.int16_t](tmpx, tmpy)) - pred[tmpx, tmpy] = label[tmpx, tmpy] - - cdef libcpp.pair.pair[np.int16_t,np.int16_t] cur - cdef int cur_label - for kernel_idx in range(kernel_num - 1, -1, -1): - while not que.empty(): - cur = que.front() - que.pop() - cur_label = pred[cur.first, cur.second] - - is_edge = True - for j in range(4): - tmpx = cur.first + dx[j] - tmpy = cur.second + dy[j] - if tmpx < 0 or tmpx >= label.shape[0] or tmpy < 0 or tmpy >= label.shape[1]: - continue - if kernels[kernel_idx, tmpx, tmpy] == 0 or pred[tmpx, tmpy] > 0: - continue - - que.push(pair[np.int16_t,np.int16_t](tmpx, tmpy)) - pred[tmpx, tmpy] = cur_label - is_edge = False - if is_edge: - nxt_que.push(cur) - - que, nxt_que = nxt_que, que - - return pred - -def pse(kernels, min_area): - kernel_num = kernels.shape[0] - label_num, label = cv2.connectedComponents(kernels[-1], connectivity=4) - return _pse(kernels[:-1], label, kernel_num, label_num, min_area) \ No newline at end of file diff --git a/backend/ppocr/postprocess/pse_postprocess/pse/setup.py b/backend/ppocr/postprocess/pse_postprocess/pse/setup.py deleted file mode 100644 index 0374678..0000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse/setup.py +++ /dev/null @@ -1,14 +0,0 @@ -from distutils.core import setup, Extension -from Cython.Build import cythonize -import numpy - -setup(ext_modules=cythonize(Extension( - 'pse', - sources=['pse.pyx'], - language='c++', - include_dirs=[numpy.get_include()], - library_dirs=[], - libraries=[], - extra_compile_args=['-O3'], - extra_link_args=[] -))) diff --git a/backend/ppocr/postprocess/pse_postprocess/pse_postprocess.py b/backend/ppocr/postprocess/pse_postprocess/pse_postprocess.py deleted file mode 100755 index 34f1b8c..0000000 --- a/backend/ppocr/postprocess/pse_postprocess/pse_postprocess.py +++ /dev/null @@ -1,118 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import cv2 -import paddle -from paddle.nn import functional as F - -from ppocr.postprocess.pse_postprocess.pse import pse - - -class PSEPostProcess(object): - """ - The post process for PSE. - """ - - def __init__(self, - thresh=0.5, - box_thresh=0.85, - min_area=16, - box_type='quad', - scale=4, - **kwargs): - assert box_type in ['quad', 'poly'], 'Only quad and poly is supported' - self.thresh = thresh - self.box_thresh = box_thresh - self.min_area = min_area - self.box_type = box_type - self.scale = scale - - def __call__(self, outs_dict, shape_list): - pred = outs_dict['maps'] - if not isinstance(pred, paddle.Tensor): - pred = paddle.to_tensor(pred) - pred = F.interpolate( - pred, scale_factor=4 // self.scale, mode='bilinear') - - score = F.sigmoid(pred[:, 0, :, :]) - - kernels = (pred > self.thresh).astype('float32') - text_mask = kernels[:, 0, :, :] - kernels[:, 0:, :, :] = kernels[:, 0:, :, :] * text_mask - - score = score.numpy() - kernels = kernels.numpy().astype(np.uint8) - - boxes_batch = [] - for batch_index in range(pred.shape[0]): - boxes, scores = self.boxes_from_bitmap(score[batch_index], - kernels[batch_index], - shape_list[batch_index]) - - boxes_batch.append({'points': boxes, 'scores': scores}) - return boxes_batch - - def boxes_from_bitmap(self, score, kernels, shape): - label = pse(kernels, self.min_area) - return self.generate_box(score, label, shape) - - def generate_box(self, score, label, shape): - src_h, src_w, ratio_h, ratio_w = shape - label_num = np.max(label) + 1 - - boxes = [] - scores = [] - for i in range(1, label_num): - ind = label == i - points = np.array(np.where(ind)).transpose((1, 0))[:, ::-1] - - if points.shape[0] < self.min_area: - label[ind] = 0 - continue - - score_i = np.mean(score[ind]) - if score_i < self.box_thresh: - label[ind] = 0 - continue - - if self.box_type == 'quad': - rect = cv2.minAreaRect(points) - bbox = cv2.boxPoints(rect) - elif self.box_type == 'poly': - box_height = np.max(points[:, 1]) + 10 - box_width = np.max(points[:, 0]) + 10 - - mask = np.zeros((box_height, box_width), np.uint8) - mask[points[:, 1], points[:, 0]] = 255 - - contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, - cv2.CHAIN_APPROX_SIMPLE) - bbox = np.squeeze(contours[0], 1) - else: - raise NotImplementedError - - bbox[:, 0] = np.clip(np.round(bbox[:, 0] / ratio_w), 0, src_w) - bbox[:, 1] = np.clip(np.round(bbox[:, 1] / ratio_h), 0, src_h) - boxes.append(bbox) - scores.append(score_i) - return boxes, scores diff --git a/backend/ppocr/postprocess/rec_postprocess.py b/backend/ppocr/postprocess/rec_postprocess.py deleted file mode 100644 index bf0fd89..0000000 --- a/backend/ppocr/postprocess/rec_postprocess.py +++ /dev/null @@ -1,754 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import paddle -from paddle.nn import functional as F -import re - - -class BaseRecLabelDecode(object): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False): - self.beg_str = "sos" - self.end_str = "eos" - - self.character_str = [] - if character_dict_path is None: - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - else: - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - self.character_str.append(line) - if use_space_char: - self.character_str.append(" ") - dict_character = list(self.character_str) - - dict_character = self.add_special_char(dict_character) - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def add_special_char(self, dict_character): - return dict_character - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - ignored_tokens = self.get_ignored_tokens() - batch_size = len(text_index) - for batch_idx in range(batch_size): - selection = np.ones(len(text_index[batch_idx]), dtype=bool) - if is_remove_duplicate: - selection[1:] = text_index[batch_idx][1:] != text_index[ - batch_idx][:-1] - for ignored_token in ignored_tokens: - selection &= text_index[batch_idx] != ignored_token - - char_list = [ - self.character[text_id] - for text_id in text_index[batch_idx][selection] - ] - if text_prob is not None: - conf_list = text_prob[batch_idx][selection] - else: - conf_list = [1] * len(selection) - if len(conf_list) == 0: - conf_list = [0] - - text = ''.join(char_list) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def get_ignored_tokens(self): - return [0] # for ctc blank - - -class CTCLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(CTCLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def __call__(self, preds, label=None, *args, **kwargs): - if isinstance(preds, tuple) or isinstance(preds, list): - preds = preds[-1] - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) - if label is None: - return text - label = self.decode(label) - return text, label - - def add_special_char(self, dict_character): - dict_character = ['blank'] + dict_character - return dict_character - - -class DistillationCTCLabelDecode(CTCLabelDecode): - """ - Convert - Convert between text-label and text-index - """ - - def __init__(self, - character_dict_path=None, - use_space_char=False, - model_name=["student"], - key=None, - multi_head=False, - **kwargs): - super(DistillationCTCLabelDecode, self).__init__(character_dict_path, - use_space_char) - if not isinstance(model_name, list): - model_name = [model_name] - self.model_name = model_name - - self.key = key - self.multi_head = multi_head - - def __call__(self, preds, label=None, *args, **kwargs): - output = dict() - for name in self.model_name: - pred = preds[name] - if self.key is not None: - pred = pred[self.key] - if self.multi_head and isinstance(pred, dict): - pred = pred['ctc'] - output[name] = super().__call__(pred, label=label, *args, **kwargs) - return output - - -class NRTRLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=True, **kwargs): - super(NRTRLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def __call__(self, preds, label=None, *args, **kwargs): - - if len(preds) == 2: - preds_id = preds[0] - preds_prob = preds[1] - if isinstance(preds_id, paddle.Tensor): - preds_id = preds_id.numpy() - if isinstance(preds_prob, paddle.Tensor): - preds_prob = preds_prob.numpy() - if preds_id[0][0] == 2: - preds_idx = preds_id[:, 1:] - preds_prob = preds_prob[:, 1:] - else: - preds_idx = preds_id - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - if label is None: - return text - label = self.decode(label[:, 1:]) - else: - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - if label is None: - return text - label = self.decode(label[:, 1:]) - return text, label - - def add_special_char(self, dict_character): - dict_character = ['blank', '', '', ''] + dict_character - return dict_character - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - batch_size = len(text_index) - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] == 3: # end - break - try: - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - except: - continue - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - text = ''.join(char_list) - result_list.append((text.lower(), np.mean(conf_list).tolist())) - return result_list - - -class AttnLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(AttnLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def add_special_char(self, dict_character): - self.beg_str = "sos" - self.end_str = "eos" - dict_character = dict_character - dict_character = [self.beg_str] + dict_character + [self.end_str] - return dict_character - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - ignored_tokens = self.get_ignored_tokens() - [beg_idx, end_idx] = self.get_ignored_tokens() - batch_size = len(text_index) - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] in ignored_tokens: - continue - if int(text_index[batch_idx][idx]) == int(end_idx): - break - if is_remove_duplicate: - # only for predict - if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ - batch_idx][idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - text = ''.join(char_list) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def __call__(self, preds, label=None, *args, **kwargs): - """ - text = self.decode(text) - if label is None: - return text - else: - label = self.decode(label, is_remove_duplicate=False) - return text, label - """ - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - if label is None: - return text - label = self.decode(label, is_remove_duplicate=False) - return text, label - - def get_ignored_tokens(self): - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "unsupport type %s in get_beg_end_flag_idx" \ - % beg_or_end - return idx - - -class SEEDLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(SEEDLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def add_special_char(self, dict_character): - self.padding_str = "padding" - self.end_str = "eos" - self.unknown = "unknown" - dict_character = dict_character + [ - self.end_str, self.padding_str, self.unknown - ] - return dict_character - - def get_ignored_tokens(self): - end_idx = self.get_beg_end_flag_idx("eos") - return [end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "sos": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "eos": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "unsupport type %s in get_beg_end_flag_idx" % beg_or_end - return idx - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - [end_idx] = self.get_ignored_tokens() - batch_size = len(text_index) - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if int(text_index[batch_idx][idx]) == int(end_idx): - break - if is_remove_duplicate: - # only for predict - if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ - batch_idx][idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - text = ''.join(char_list) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def __call__(self, preds, label=None, *args, **kwargs): - """ - text = self.decode(text) - if label is None: - return text - else: - label = self.decode(label, is_remove_duplicate=False) - return text, label - """ - preds_idx = preds["rec_pred"] - if isinstance(preds_idx, paddle.Tensor): - preds_idx = preds_idx.numpy() - if "rec_pred_scores" in preds: - preds_idx = preds["rec_pred"] - preds_prob = preds["rec_pred_scores"] - else: - preds_idx = preds["rec_pred"].argmax(axis=2) - preds_prob = preds["rec_pred"].max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - if label is None: - return text - label = self.decode(label, is_remove_duplicate=False) - return text, label - - -class SRNLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(SRNLabelDecode, self).__init__(character_dict_path, - use_space_char) - self.max_text_length = kwargs.get('max_text_length', 25) - - def __call__(self, preds, label=None, *args, **kwargs): - pred = preds['predict'] - char_num = len(self.character_str) + 2 - if isinstance(pred, paddle.Tensor): - pred = pred.numpy() - pred = np.reshape(pred, [-1, char_num]) - - preds_idx = np.argmax(pred, axis=1) - preds_prob = np.max(pred, axis=1) - - preds_idx = np.reshape(preds_idx, [-1, self.max_text_length]) - - preds_prob = np.reshape(preds_prob, [-1, self.max_text_length]) - - text = self.decode(preds_idx, preds_prob) - - if label is None: - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - return text - label = self.decode(label) - return text, label - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - ignored_tokens = self.get_ignored_tokens() - batch_size = len(text_index) - - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] in ignored_tokens: - continue - if is_remove_duplicate: - # only for predict - if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ - batch_idx][idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - - text = ''.join(char_list) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def add_special_char(self, dict_character): - dict_character = dict_character + [self.beg_str, self.end_str] - return dict_character - - def get_ignored_tokens(self): - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end): - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "unsupport type %s in get_beg_end_flag_idx" \ - % beg_or_end - return idx - - -class TableLabelDecode(object): - """ """ - - def __init__(self, character_dict_path, **kwargs): - list_character, list_elem = self.load_char_elem_dict( - character_dict_path) - list_character = self.add_special_char(list_character) - list_elem = self.add_special_char(list_elem) - self.dict_character = {} - self.dict_idx_character = {} - for i, char in enumerate(list_character): - self.dict_idx_character[i] = char - self.dict_character[char] = i - self.dict_elem = {} - self.dict_idx_elem = {} - for i, elem in enumerate(list_elem): - self.dict_idx_elem[i] = elem - self.dict_elem[elem] = i - - def load_char_elem_dict(self, character_dict_path): - list_character = [] - list_elem = [] - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - substr = lines[0].decode('utf-8').strip("\n").strip("\r\n").split( - "\t") - character_num = int(substr[0]) - elem_num = int(substr[1]) - for cno in range(1, 1 + character_num): - character = lines[cno].decode('utf-8').strip("\n").strip("\r\n") - list_character.append(character) - for eno in range(1 + character_num, 1 + character_num + elem_num): - elem = lines[eno].decode('utf-8').strip("\n").strip("\r\n") - list_elem.append(elem) - return list_character, list_elem - - def add_special_char(self, list_character): - self.beg_str = "sos" - self.end_str = "eos" - list_character = [self.beg_str] + list_character + [self.end_str] - return list_character - - def __call__(self, preds): - structure_probs = preds['structure_probs'] - loc_preds = preds['loc_preds'] - if isinstance(structure_probs, paddle.Tensor): - structure_probs = structure_probs.numpy() - if isinstance(loc_preds, paddle.Tensor): - loc_preds = loc_preds.numpy() - structure_idx = structure_probs.argmax(axis=2) - structure_probs = structure_probs.max(axis=2) - structure_str, structure_pos, result_score_list, result_elem_idx_list = self.decode( - structure_idx, structure_probs, 'elem') - res_html_code_list = [] - res_loc_list = [] - batch_num = len(structure_str) - for bno in range(batch_num): - res_loc = [] - for sno in range(len(structure_str[bno])): - text = structure_str[bno][sno] - if text in ['', ' 0 and tmp_elem_idx == end_idx: - break - if tmp_elem_idx in ignored_tokens: - continue - - char_list.append(current_dict[tmp_elem_idx]) - elem_pos_list.append(idx) - score_list.append(structure_probs[batch_idx, idx]) - elem_idx_list.append(tmp_elem_idx) - result_list.append(char_list) - result_pos_list.append(elem_pos_list) - result_score_list.append(score_list) - result_elem_idx_list.append(elem_idx_list) - return result_list, result_pos_list, result_score_list, result_elem_idx_list - - def get_ignored_tokens(self, char_or_elem): - beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem) - end_idx = self.get_beg_end_flag_idx("end", char_or_elem) - return [beg_idx, end_idx] - - def get_beg_end_flag_idx(self, beg_or_end, char_or_elem): - if char_or_elem == "char": - if beg_or_end == "beg": - idx = self.dict_character[self.beg_str] - elif beg_or_end == "end": - idx = self.dict_character[self.end_str] - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \ - % beg_or_end - elif char_or_elem == "elem": - if beg_or_end == "beg": - idx = self.dict_elem[self.beg_str] - elif beg_or_end == "end": - idx = self.dict_elem[self.end_str] - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \ - % beg_or_end - else: - assert False, "Unsupport type %s in char_or_elem" \ - % char_or_elem - return idx - - -class SARLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(SARLabelDecode, self).__init__(character_dict_path, - use_space_char) - - self.rm_symbol = kwargs.get('rm_symbol', False) - - def add_special_char(self, dict_character): - beg_end_str = "" - unknown_str = "" - padding_str = "" - dict_character = dict_character + [unknown_str] - self.unknown_idx = len(dict_character) - 1 - dict_character = dict_character + [beg_end_str] - self.start_idx = len(dict_character) - 1 - self.end_idx = len(dict_character) - 1 - dict_character = dict_character + [padding_str] - self.padding_idx = len(dict_character) - 1 - return dict_character - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - ignored_tokens = self.get_ignored_tokens() - - batch_size = len(text_index) - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] in ignored_tokens: - continue - if int(text_index[batch_idx][idx]) == int(self.end_idx): - if text_prob is None and idx == 0: - continue - else: - break - if is_remove_duplicate: - # only for predict - if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ - batch_idx][idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - text = ''.join(char_list) - if self.rm_symbol: - comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]') - text = text.lower() - text = comp.sub('', text) - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def __call__(self, preds, label=None, *args, **kwargs): - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) - - if label is None: - return text - label = self.decode(label, is_remove_duplicate=False) - return text, label - - def get_ignored_tokens(self): - return [self.padding_idx] - - -class DistillationSARLabelDecode(SARLabelDecode): - """ - Convert - Convert between text-label and text-index - """ - - def __init__(self, - character_dict_path=None, - use_space_char=False, - model_name=["student"], - key=None, - multi_head=False, - **kwargs): - super(DistillationSARLabelDecode, self).__init__(character_dict_path, - use_space_char) - if not isinstance(model_name, list): - model_name = [model_name] - self.model_name = model_name - - self.key = key - self.multi_head = multi_head - - def __call__(self, preds, label=None, *args, **kwargs): - output = dict() - for name in self.model_name: - pred = preds[name] - if self.key is not None: - pred = pred[self.key] - if self.multi_head and isinstance(pred, dict): - pred = pred['sar'] - output[name] = super().__call__(pred, label=label, *args, **kwargs) - return output - - -class PRENLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(PRENLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def add_special_char(self, dict_character): - padding_str = '' # 0 - end_str = '' # 1 - unknown_str = '' # 2 - - dict_character = [padding_str, end_str, unknown_str] + dict_character - self.padding_idx = 0 - self.end_idx = 1 - self.unknown_idx = 2 - - return dict_character - - def decode(self, text_index, text_prob=None): - """ convert text-index into text-label. """ - result_list = [] - batch_size = len(text_index) - - for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] == self.end_idx: - break - if text_index[batch_idx][idx] in \ - [self.padding_idx, self.unknown_idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) - - text = ''.join(char_list) - if len(text) > 0: - result_list.append((text, np.mean(conf_list).tolist())) - else: - # here confidence of empty recog result is 1 - result_list.append(('', 1)) - return result_list - - def __call__(self, preds, label=None, *args, **kwargs): - preds = preds.numpy() - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob) - if label is None: - return text - label = self.decode(label) - return text, label diff --git a/backend/ppocr/postprocess/sast_postprocess.py b/backend/ppocr/postprocess/sast_postprocess.py deleted file mode 100755 index bee75c0..0000000 --- a/backend/ppocr/postprocess/sast_postprocess.py +++ /dev/null @@ -1,355 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys - -__dir__ = os.path.dirname(__file__) -sys.path.append(__dir__) -sys.path.append(os.path.join(__dir__, '..')) - -import numpy as np -from .locality_aware_nms import nms_locality -import paddle -import cv2 -import time - - -class SASTPostProcess(object): - """ - The post process for SAST. - """ - - def __init__(self, - score_thresh=0.5, - nms_thresh=0.2, - sample_pts_num=2, - shrink_ratio_of_width=0.3, - expand_scale=1.0, - tcl_map_thresh=0.5, - **kwargs): - - self.score_thresh = score_thresh - self.nms_thresh = nms_thresh - self.sample_pts_num = sample_pts_num - self.shrink_ratio_of_width = shrink_ratio_of_width - self.expand_scale = expand_scale - self.tcl_map_thresh = tcl_map_thresh - - # c++ la-nms is faster, but only support python 3.5 - self.is_python35 = False - if sys.version_info.major == 3 and sys.version_info.minor == 5: - self.is_python35 = True - - def point_pair2poly(self, point_pair_list): - """ - Transfer vertical point_pairs into poly point in clockwise. - """ - # constract poly - point_num = len(point_pair_list) * 2 - point_list = [0] * point_num - for idx, point_pair in enumerate(point_pair_list): - point_list[idx] = point_pair[0] - point_list[point_num - 1 - idx] = point_pair[1] - return np.array(point_list).reshape(-1, 2) - - def shrink_quad_along_width(self, - quad, - begin_width_ratio=0., - end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - def expand_poly_along_width(self, poly, shrink_ratio_of_width=0.3): - """ - expand poly along width. - """ - point_num = poly.shape[0] - left_quad = np.array( - [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) - left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \ - (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) - left_quad_expand = self.shrink_quad_along_width(left_quad, left_ratio, - 1.0) - right_quad = np.array( - [ - poly[point_num // 2 - 2], poly[point_num // 2 - 1], - poly[point_num // 2], poly[point_num // 2 + 1] - ], - dtype=np.float32) - right_ratio = 1.0 + \ - shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \ - (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) - right_quad_expand = self.shrink_quad_along_width(right_quad, 0.0, - right_ratio) - poly[0] = left_quad_expand[0] - poly[-1] = left_quad_expand[-1] - poly[point_num // 2 - 1] = right_quad_expand[1] - poly[point_num // 2] = right_quad_expand[2] - return poly - - def restore_quad(self, tcl_map, tcl_map_thresh, tvo_map): - """Restore quad.""" - xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh) - xy_text = xy_text[:, ::-1] # (n, 2) - - # Sort the text boxes via the y axis - xy_text = xy_text[np.argsort(xy_text[:, 1])] - - scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0] - scores = scores[:, np.newaxis] - - # Restore - point_num = int(tvo_map.shape[-1] / 2) - assert point_num == 4 - tvo_map = tvo_map[xy_text[:, 1], xy_text[:, 0], :] - xy_text_tile = np.tile(xy_text, (1, point_num)) # (n, point_num * 2) - quads = xy_text_tile - tvo_map - - return scores, quads, xy_text - - def quad_area(self, quad): - """ - compute area of a quad. - """ - edge = [(quad[1][0] - quad[0][0]) * (quad[1][1] + quad[0][1]), - (quad[2][0] - quad[1][0]) * (quad[2][1] + quad[1][1]), - (quad[3][0] - quad[2][0]) * (quad[3][1] + quad[2][1]), - (quad[0][0] - quad[3][0]) * (quad[0][1] + quad[3][1])] - return np.sum(edge) / 2. - - def nms(self, dets): - if self.is_python35: - import lanms - dets = lanms.merge_quadrangle_n9(dets, self.nms_thresh) - else: - dets = nms_locality(dets, self.nms_thresh) - return dets - - def cluster_by_quads_tco(self, tcl_map, tcl_map_thresh, quads, tco_map): - """ - Cluster pixels in tcl_map based on quads. - """ - instance_count = quads.shape[0] + 1 # contain background - instance_label_map = np.zeros(tcl_map.shape[:2], dtype=np.int32) - if instance_count == 1: - return instance_count, instance_label_map - - # predict text center - xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh) - n = xy_text.shape[0] - xy_text = xy_text[:, ::-1] # (n, 2) - tco = tco_map[xy_text[:, 1], xy_text[:, 0], :] # (n, 2) - pred_tc = xy_text - tco - - # get gt text center - m = quads.shape[0] - gt_tc = np.mean(quads, axis=1) # (m, 2) - - pred_tc_tile = np.tile(pred_tc[:, np.newaxis, :], - (1, m, 1)) # (n, m, 2) - gt_tc_tile = np.tile(gt_tc[np.newaxis, :, :], (n, 1, 1)) # (n, m, 2) - dist_mat = np.linalg.norm(pred_tc_tile - gt_tc_tile, axis=2) # (n, m) - xy_text_assign = np.argmin(dist_mat, axis=1) + 1 # (n,) - - instance_label_map[xy_text[:, 1], xy_text[:, 0]] = xy_text_assign - return instance_count, instance_label_map - - def estimate_sample_pts_num(self, quad, xy_text): - """ - Estimate sample points number. - """ - eh = (np.linalg.norm(quad[0] - quad[3]) + - np.linalg.norm(quad[1] - quad[2])) / 2.0 - ew = (np.linalg.norm(quad[0] - quad[1]) + - np.linalg.norm(quad[2] - quad[3])) / 2.0 - - dense_sample_pts_num = max(2, int(ew)) - dense_xy_center_line = xy_text[np.linspace( - 0, - xy_text.shape[0] - 1, - dense_sample_pts_num, - endpoint=True, - dtype=np.float32).astype(np.int32)] - - dense_xy_center_line_diff = dense_xy_center_line[ - 1:] - dense_xy_center_line[:-1] - estimate_arc_len = np.sum( - np.linalg.norm( - dense_xy_center_line_diff, axis=1)) - - sample_pts_num = max(2, int(estimate_arc_len / eh)) - return sample_pts_num - - def detect_sast(self, - tcl_map, - tvo_map, - tbo_map, - tco_map, - ratio_w, - ratio_h, - src_w, - src_h, - shrink_ratio_of_width=0.3, - tcl_map_thresh=0.5, - offset_expand=1.0, - out_strid=4.0): - """ - first resize the tcl_map, tvo_map and tbo_map to the input_size, then restore the polys - """ - # restore quad - scores, quads, xy_text = self.restore_quad(tcl_map, tcl_map_thresh, - tvo_map) - dets = np.hstack((quads, scores)).astype(np.float32, copy=False) - dets = self.nms(dets) - if dets.shape[0] == 0: - return [] - quads = dets[:, :-1].reshape(-1, 4, 2) - - # Compute quad area - quad_areas = [] - for quad in quads: - quad_areas.append(-self.quad_area(quad)) - - # instance segmentation - # instance_count, instance_label_map = cv2.connectedComponents(tcl_map.astype(np.uint8), connectivity=8) - instance_count, instance_label_map = self.cluster_by_quads_tco( - tcl_map, tcl_map_thresh, quads, tco_map) - - # restore single poly with tcl instance. - poly_list = [] - for instance_idx in range(1, instance_count): - xy_text = np.argwhere(instance_label_map == instance_idx)[:, ::-1] - quad = quads[instance_idx - 1] - q_area = quad_areas[instance_idx - 1] - if q_area < 5: - continue - - # - len1 = float(np.linalg.norm(quad[0] - quad[1])) - len2 = float(np.linalg.norm(quad[1] - quad[2])) - min_len = min(len1, len2) - if min_len < 3: - continue - - # filter small CC - if xy_text.shape[0] <= 0: - continue - - # filter low confidence instance - xy_text_scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0] - if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.1: - # if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05: - continue - - # sort xy_text - left_center_pt = np.array( - [[(quad[0, 0] + quad[-1, 0]) / 2.0, - (quad[0, 1] + quad[-1, 1]) / 2.0]]) # (1, 2) - right_center_pt = np.array( - [[(quad[1, 0] + quad[2, 0]) / 2.0, - (quad[1, 1] + quad[2, 1]) / 2.0]]) # (1, 2) - proj_unit_vec = (right_center_pt - left_center_pt) / \ - (np.linalg.norm(right_center_pt - left_center_pt) + 1e-6) - proj_value = np.sum(xy_text * proj_unit_vec, axis=1) - xy_text = xy_text[np.argsort(proj_value)] - - # Sample pts in tcl map - if self.sample_pts_num == 0: - sample_pts_num = self.estimate_sample_pts_num(quad, xy_text) - else: - sample_pts_num = self.sample_pts_num - xy_center_line = xy_text[np.linspace( - 0, - xy_text.shape[0] - 1, - sample_pts_num, - endpoint=True, - dtype=np.float32).astype(np.int32)] - - point_pair_list = [] - for x, y in xy_center_line: - # get corresponding offset - offset = tbo_map[y, x, :].reshape(2, 2) - if offset_expand != 1.0: - offset_length = np.linalg.norm( - offset, axis=1, keepdims=True) - expand_length = np.clip( - offset_length * (offset_expand - 1), - a_min=0.5, - a_max=3.0) - offset_detal = offset / offset_length * expand_length - offset = offset + offset_detal - # original point - ori_yx = np.array([y, x], dtype=np.float32) - point_pair = (ori_yx + offset)[:, ::-1] * out_strid / np.array( - [ratio_w, ratio_h]).reshape(-1, 2) - point_pair_list.append(point_pair) - - # ndarry: (x, 2), expand poly along width - detected_poly = self.point_pair2poly(point_pair_list) - detected_poly = self.expand_poly_along_width(detected_poly, - shrink_ratio_of_width) - detected_poly[:, 0] = np.clip( - detected_poly[:, 0], a_min=0, a_max=src_w) - detected_poly[:, 1] = np.clip( - detected_poly[:, 1], a_min=0, a_max=src_h) - poly_list.append(detected_poly) - - return poly_list - - def __call__(self, outs_dict, shape_list): - score_list = outs_dict['f_score'] - border_list = outs_dict['f_border'] - tvo_list = outs_dict['f_tvo'] - tco_list = outs_dict['f_tco'] - if isinstance(score_list, paddle.Tensor): - score_list = score_list.numpy() - border_list = border_list.numpy() - tvo_list = tvo_list.numpy() - tco_list = tco_list.numpy() - - img_num = len(shape_list) - poly_lists = [] - for ino in range(img_num): - p_score = score_list[ino].transpose((1, 2, 0)) - p_border = border_list[ino].transpose((1, 2, 0)) - p_tvo = tvo_list[ino].transpose((1, 2, 0)) - p_tco = tco_list[ino].transpose((1, 2, 0)) - src_h, src_w, ratio_h, ratio_w = shape_list[ino] - - poly_list = self.detect_sast( - p_score, - p_tvo, - p_border, - p_tco, - ratio_w, - ratio_h, - src_w, - src_h, - shrink_ratio_of_width=self.shrink_ratio_of_width, - tcl_map_thresh=self.tcl_map_thresh, - offset_expand=self.expand_scale) - poly_lists.append({'points': np.array(poly_list)}) - - return poly_lists diff --git a/backend/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py b/backend/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py deleted file mode 100644 index 1d55d13..0000000 --- a/backend/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle - - -class VQAReTokenLayoutLMPostProcess(object): - """ Convert between text-label and text-index """ - - def __init__(self, **kwargs): - super(VQAReTokenLayoutLMPostProcess, self).__init__() - - def __call__(self, preds, label=None, *args, **kwargs): - if label is not None: - return self._metric(preds, label) - else: - return self._infer(preds, *args, **kwargs) - - def _metric(self, preds, label): - return preds['pred_relations'], label[6], label[5] - - def _infer(self, preds, *args, **kwargs): - ser_results = kwargs['ser_results'] - entity_idx_dict_batch = kwargs['entity_idx_dict_batch'] - pred_relations = preds['pred_relations'] - - # merge relations and ocr info - results = [] - for pred_relation, ser_result, entity_idx_dict in zip( - pred_relations, ser_results, entity_idx_dict_batch): - result = [] - used_tail_id = [] - for relation in pred_relation: - if relation['tail_id'] in used_tail_id: - continue - used_tail_id.append(relation['tail_id']) - ocr_info_head = ser_result[entity_idx_dict[relation['head_id']]] - ocr_info_tail = ser_result[entity_idx_dict[relation['tail_id']]] - result.append((ocr_info_head, ocr_info_tail)) - results.append(result) - return results diff --git a/backend/ppocr/postprocess/vqa_token_ser_layoutlm_postprocess.py b/backend/ppocr/postprocess/vqa_token_ser_layoutlm_postprocess.py deleted file mode 100644 index 782cdea..0000000 --- a/backend/ppocr/postprocess/vqa_token_ser_layoutlm_postprocess.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import paddle -from ppocr.utils.utility import load_vqa_bio_label_maps - - -class VQASerTokenLayoutLMPostProcess(object): - """ Convert between text-label and text-index """ - - def __init__(self, class_path, **kwargs): - super(VQASerTokenLayoutLMPostProcess, self).__init__() - label2id_map, self.id2label_map = load_vqa_bio_label_maps(class_path) - - self.label2id_map_for_draw = dict() - for key in label2id_map: - if key.startswith("I-"): - self.label2id_map_for_draw[key] = label2id_map["B" + key[1:]] - else: - self.label2id_map_for_draw[key] = label2id_map[key] - - self.id2label_map_for_show = dict() - for key in self.label2id_map_for_draw: - val = self.label2id_map_for_draw[key] - if key == "O": - self.id2label_map_for_show[val] = key - if key.startswith("B-") or key.startswith("I-"): - self.id2label_map_for_show[val] = key[2:] - else: - self.id2label_map_for_show[val] = key - - def __call__(self, preds, batch=None, *args, **kwargs): - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - - if batch is not None: - return self._metric(preds, batch[1]) - else: - return self._infer(preds, **kwargs) - - def _metric(self, preds, label): - pred_idxs = preds.argmax(axis=2) - decode_out_list = [[] for _ in range(pred_idxs.shape[0])] - label_decode_out_list = [[] for _ in range(pred_idxs.shape[0])] - - for i in range(pred_idxs.shape[0]): - for j in range(pred_idxs.shape[1]): - if label[i, j] != -100: - label_decode_out_list[i].append(self.id2label_map[label[i, - j]]) - decode_out_list[i].append(self.id2label_map[pred_idxs[i, - j]]) - return decode_out_list, label_decode_out_list - - def _infer(self, preds, attention_masks, segment_offset_ids, ocr_infos): - results = [] - - for pred, attention_mask, segment_offset_id, ocr_info in zip( - preds, attention_masks, segment_offset_ids, ocr_infos): - pred = np.argmax(pred, axis=1) - pred = [self.id2label_map[idx] for idx in pred] - - for idx in range(len(segment_offset_id)): - if idx == 0: - start_id = 0 - else: - start_id = segment_offset_id[idx - 1] - - end_id = segment_offset_id[idx] - - curr_pred = pred[start_id:end_id] - curr_pred = [self.label2id_map_for_draw[p] for p in curr_pred] - - if len(curr_pred) <= 0: - pred_id = 0 - else: - counts = np.bincount(curr_pred) - pred_id = np.argmax(counts) - ocr_info[idx]["pred_id"] = int(pred_id) - ocr_info[idx]["pred"] = self.id2label_map_for_show[int(pred_id)] - results.append(ocr_info) - return results diff --git a/backend/ppocr/utils/__init__.py b/backend/ppocr/utils/__init__.py deleted file mode 100755 index abf198b..0000000 --- a/backend/ppocr/utils/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/backend/ppocr/utils/dict/ar_dict.txt b/backend/ppocr/utils/dict/ar_dict.txt deleted file mode 100644 index fc63802..0000000 --- a/backend/ppocr/utils/dict/ar_dict.txt +++ /dev/null @@ -1,117 +0,0 @@ -a -r -b -i -c -_ -m -g -/ -1 -0 -I -L -S -V -R -C -2 -v -l -6 -3 -9 -. -j -p -ا -ل -م -ر -ج -و -ح -ي -ة -5 -8 -7 -أ -ب -ض -4 -ك -س -ه -ث -ن -ط -ع -ت -غ -خ -ف -ئ -ز -إ -د -ص -ظ -ذ -ش -ى -ق -ؤ -آ -ء -s -e -n -w -t -u -z -d -A -N -G -h -o -E -T -H -O -B -y -F -U -J -X -W -P -Z -M -k -q -Y -Q -D -f -K -x -' -% -- -# -@ -! -& -$ -, -: -é -? -+ -É -( - diff --git a/backend/ppocr/utils/dict/arabic_dict.txt b/backend/ppocr/utils/dict/arabic_dict.txt deleted file mode 100644 index 916d421..0000000 --- a/backend/ppocr/utils/dict/arabic_dict.txt +++ /dev/null @@ -1,161 +0,0 @@ -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ء -آ -أ -ؤ -إ -ئ -ا -ب -ة -ت -ث -ج -ح -خ -د -ذ -ر -ز -س -ش -ص -ض -ط -ظ -ع -غ -ف -ق -ك -ل -م -ن -ه -و -ى -ي -ً -ٌ -ٍ -َ -ُ -ِ -ّ -ْ -ٓ -ٔ -ٰ -ٱ -ٹ -پ -چ -ڈ -ڑ -ژ -ک -ڭ -گ -ں -ھ -ۀ -ہ -ۂ -ۃ -ۆ -ۇ -ۈ -ۋ -ی -ې -ے -ۓ -ە -١ -٢ -٣ -٤ -٥ -٦ -٧ -٨ -٩ diff --git a/backend/ppocr/utils/dict/be_dict.txt b/backend/ppocr/utils/dict/be_dict.txt deleted file mode 100644 index f8458ba..0000000 --- a/backend/ppocr/utils/dict/be_dict.txt +++ /dev/null @@ -1,145 +0,0 @@ -b -e -_ -i -m -g -/ -2 -0 -I -L -S -V -R -C -1 -v -a -l -6 -9 -4 -3 -. -j -p -п -а -з -б -у -г -н -ц -ь -8 -м -л -і -о -ў -ы -7 -5 -М -х -с -р -ф -я -е -д -ж -ю -ч -й -к -Д -в -Б -т -І -ш -ё -э -К -Л -Н -А -Ж -Г -В -П -З -Е -О -Р -С -У -Ё -Й -Т -Ч -Э -Ц -Ю -Ш -Ф -Х -Я -Ь -Ы -Ў -s -c -n -w -M -o -t -T -E -A -B -u -h -y -k -r -H -d -Y -O -U -F -f -x -D -G -N -K -P -z -J -X -W -Z -Q -% -- -q -@ -' -! -# -& -, -: -$ -( -? -é -+ -É - diff --git a/backend/ppocr/utils/dict/bg_dict.txt b/backend/ppocr/utils/dict/bg_dict.txt deleted file mode 100644 index 84713c3..0000000 --- a/backend/ppocr/utils/dict/bg_dict.txt +++ /dev/null @@ -1,140 +0,0 @@ -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -А -Б -В -Г -Д -Е -Ж -З -И -Й -К -Л -М -Н -О -П -Р -С -Т -У -Ф -Х -Ц -Ч -Ш -Щ -Ъ -Ю -Я -а -б -в -г -д -е -ж -з -и -й -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ъ -ь -ю -я - diff --git a/backend/ppocr/utils/dict/ch_dict.txt b/backend/ppocr/utils/dict/ch_dict.txt deleted file mode 100644 index 84b885d..0000000 --- a/backend/ppocr/utils/dict/ch_dict.txt +++ /dev/null @@ -1,6623 +0,0 @@ -' -疗 -绚 -诚 -娇 -溜 -题 -贿 -者 -廖 -更 -纳 -加 -奉 -公 -一 -就 -汴 -计 -与 -路 -房 -原 -妇 -2 -0 -8 -- -7 -其 -> -: -] -, -, -骑 -刈 -全 -消 -昏 -傈 -安 -久 -钟 -嗅 -不 -影 -处 -驽 -蜿 -资 -关 -椤 -地 -瘸 -专 -问 -忖 -票 -嫉 -炎 -韵 -要 -月 -田 -节 -陂 -鄙 -捌 -备 -拳 -伺 -眼 -网 -盎 -大 -傍 -心 -东 -愉 -汇 -蹿 -科 -每 -业 -里 -航 -晏 -字 -平 -录 -先 -1 -3 -彤 -鲶 -产 -稍 -督 -腴 -有 -象 -岳 -注 -绍 -在 -泺 -文 -定 -核 -名 -水 -过 -理 -让 -偷 -率 -等 -这 -发 -” -为 -含 -肥 -酉 -相 -鄱 -七 -编 -猥 -锛 -日 -镀 -蒂 -掰 -倒 -辆 -栾 -栗 -综 -涩 -州 -雌 -滑 -馀 -了 -机 -块 -司 -宰 -甙 -兴 -矽 -抚 -保 -用 -沧 -秩 -如 -收 -息 -滥 -页 -疑 -埠 -! -! -姥 -异 -橹 -钇 -向 -下 -跄 -的 -椴 -沫 -国 -绥 -獠 -报 -开 -民 -蜇 -何 -分 -凇 -长 -讥 -藏 -掏 -施 -羽 -中 -讲 -派 -嘟 -人 -提 -浼 -间 -世 -而 -古 -多 -倪 -唇 -饯 -控 -庚 -首 -赛 -蜓 -味 -断 -制 -觉 -技 -替 -艰 -溢 -潮 -夕 -钺 -外 -摘 -枋 -动 -双 -单 -啮 -户 -枇 -确 -锦 -曜 -杜 -或 -能 -效 -霜 -盒 -然 -侗 -电 -晁 -放 -步 -鹃 -新 -杖 -蜂 -吒 -濂 -瞬 -评 -总 -隍 -对 -独 -合 -也 -是 -府 -青 -天 -诲 -墙 -组 -滴 -级 -邀 -帘 -示 -已 -时 -骸 -仄 -泅 -和 -遨 -店 -雇 -疫 -持 -巍 -踮 -境 -只 -亨 -目 -鉴 -崤 -闲 -体 -泄 -杂 -作 -般 -轰 -化 -解 -迂 -诿 -蛭 -璀 -腾 -告 -版 -服 -省 -师 -小 -规 -程 -线 -海 -办 -引 -二 -桧 -牌 -砺 -洄 -裴 -修 -图 -痫 -胡 -许 -犊 -事 -郛 -基 -柴 -呼 -食 -研 -奶 -律 -蛋 -因 -葆 -察 -戏 -褒 -戒 -再 -李 -骁 -工 -貂 -油 -鹅 -章 -啄 -休 -场 -给 -睡 -纷 -豆 -器 -捎 -说 -敏 -学 -会 -浒 -设 -诊 -格 -廓 -查 -来 -霓 -室 -溆 -¢ -诡 -寥 -焕 -舜 -柒 -狐 -回 -戟 -砾 -厄 -实 -翩 -尿 -五 -入 -径 -惭 -喹 -股 -宇 -篝 -| -; -美 -期 -云 -九 -祺 -扮 -靠 -锝 -槌 -系 -企 -酰 -阊 -暂 -蚕 -忻 -豁 -本 -羹 -执 -条 -钦 -H -獒 -限 -进 -季 -楦 -于 -芘 -玖 -铋 -茯 -未 -答 -粘 -括 -样 -精 -欠 -矢 -甥 -帷 -嵩 -扣 -令 -仔 -风 -皈 -行 -支 -部 -蓉 -刮 -站 -蜡 -救 -钊 -汗 -松 -嫌 -成 -可 -. -鹤 -院 -从 -交 -政 -怕 -活 -调 -球 -局 -验 -髌 -第 -韫 -谗 -串 -到 -圆 -年 -米 -/ -* -友 -忿 -检 -区 -看 -自 -敢 -刃 -个 -兹 -弄 -流 -留 -同 -没 -齿 -星 -聆 -轼 -湖 -什 -三 -建 -蛔 -儿 -椋 -汕 -震 -颧 -鲤 -跟 -力 -情 -璺 -铨 -陪 -务 -指 -族 -训 -滦 -鄣 -濮 -扒 -商 -箱 -十 -召 -慷 -辗 -所 -莞 -管 -护 -臭 -横 -硒 -嗓 -接 -侦 -六 -露 -党 -馋 -驾 -剖 -高 -侬 -妪 -幂 -猗 -绺 -骐 -央 -酐 -孝 -筝 -课 -徇 -缰 -门 -男 -西 -项 -句 -谙 -瞒 -秃 -篇 -教 -碲 -罚 -声 -呐 -景 -前 -富 -嘴 -鳌 -稀 -免 -朋 -啬 -睐 -去 -赈 -鱼 -住 -肩 -愕 -速 -旁 -波 -厅 -健 -茼 -厥 -鲟 -谅 -投 -攸 -炔 -数 -方 -击 -呋 -谈 -绩 -别 -愫 -僚 -躬 -鹧 -胪 -炳 -招 -喇 -膨 -泵 -蹦 -毛 -结 -5 -4 -谱 -识 -陕 -粽 -婚 -拟 -构 -且 -搜 -任 -潘 -比 -郢 -妨 -醪 -陀 -桔 -碘 -扎 -选 -哈 -骷 -楷 -亿 -明 -缆 -脯 -监 -睫 -逻 -婵 -共 -赴 -淝 -凡 -惦 -及 -达 -揖 -谩 -澹 -减 -焰 -蛹 -番 -祁 -柏 -员 -禄 -怡 -峤 -龙 -白 -叽 -生 -闯 -起 -细 -装 -谕 -竟 -聚 -钙 -上 -导 -渊 -按 -艾 -辘 -挡 -耒 -盹 -饪 -臀 -记 -邮 -蕙 -受 -各 -医 -搂 -普 -滇 -朗 -茸 -带 -翻 -酚 -( -光 -堤 -墟 -蔷 -万 -幻 -〓 -瑙 -辈 -昧 -盏 -亘 -蛀 -吉 -铰 -请 -子 -假 -闻 -税 -井 -诩 -哨 -嫂 -好 -面 -琐 -校 -馊 -鬣 -缂 -营 -访 -炖 -占 -农 -缀 -否 -经 -钚 -棵 -趟 -张 -亟 -吏 -茶 -谨 -捻 -论 -迸 -堂 -玉 -信 -吧 -瞠 -乡 -姬 -寺 -咬 -溏 -苄 -皿 -意 -赉 -宝 -尔 -钰 -艺 -特 -唳 -踉 -都 -荣 -倚 -登 -荐 -丧 -奇 -涵 -批 -炭 -近 -符 -傩 -感 -道 -着 -菊 -虹 -仲 -众 -懈 -濯 -颞 -眺 -南 -释 -北 -缝 -标 -既 -茗 -整 -撼 -迤 -贲 -挎 -耱 -拒 -某 -妍 -卫 -哇 -英 -矶 -藩 -治 -他 -元 -领 -膜 -遮 -穗 -蛾 -飞 -荒 -棺 -劫 -么 -市 -火 -温 -拈 -棚 -洼 -转 -果 -奕 -卸 -迪 -伸 -泳 -斗 -邡 -侄 -涨 -屯 -萋 -胭 -氡 -崮 -枞 -惧 -冒 -彩 -斜 -手 -豚 -随 -旭 -淑 -妞 -形 -菌 -吲 -沱 -争 -驯 -歹 -挟 -兆 -柱 -传 -至 -包 -内 -响 -临 -红 -功 -弩 -衡 -寂 -禁 -老 -棍 -耆 -渍 -织 -害 -氵 -渑 -布 -载 -靥 -嗬 -虽 -苹 -咨 -娄 -库 -雉 -榜 -帜 -嘲 -套 -瑚 -亲 -簸 -欧 -边 -6 -腿 -旮 -抛 -吹 -瞳 -得 -镓 -梗 -厨 -继 -漾 -愣 -憨 -士 -策 -窑 -抑 -躯 -襟 -脏 -参 -贸 -言 -干 -绸 -鳄 -穷 -藜 -音 -折 -详 -) -举 -悍 -甸 -癌 -黎 -谴 -死 -罩 -迁 -寒 -驷 -袖 -媒 -蒋 -掘 -模 -纠 -恣 -观 -祖 -蛆 -碍 -位 -稿 -主 -澧 -跌 -筏 -京 -锏 -帝 -贴 -证 -糠 -才 -黄 -鲸 -略 -炯 -饱 -四 -出 -园 -犀 -牧 -容 -汉 -杆 -浈 -汰 -瑷 -造 -虫 -瘩 -怪 -驴 -济 -应 -花 -沣 -谔 -夙 -旅 -价 -矿 -以 -考 -s -u -呦 -晒 -巡 -茅 -准 -肟 -瓴 -詹 -仟 -褂 -译 -桌 -混 -宁 -怦 -郑 -抿 -些 -余 -鄂 -饴 -攒 -珑 -群 -阖 -岔 -琨 -藓 -预 -环 -洮 -岌 -宀 -杲 -瀵 -最 -常 -囡 -周 -踊 -女 -鼓 -袭 -喉 -简 -范 -薯 -遐 -疏 -粱 -黜 -禧 -法 -箔 -斤 -遥 -汝 -奥 -直 -贞 -撑 -置 -绱 -集 -她 -馅 -逗 -钧 -橱 -魉 -[ -恙 -躁 -唤 -9 -旺 -膘 -待 -脾 -惫 -购 -吗 -依 -盲 -度 -瘿 -蠖 -俾 -之 -镗 -拇 -鲵 -厝 -簧 -续 -款 -展 -啃 -表 -剔 -品 -钻 -腭 -损 -清 -锶 -统 -涌 -寸 -滨 -贪 -链 -吠 -冈 -伎 -迥 -咏 -吁 -览 -防 -迅 -失 -汾 -阔 -逵 -绀 -蔑 -列 -川 -凭 -努 -熨 -揪 -利 -俱 -绉 -抢 -鸨 -我 -即 -责 -膦 -易 -毓 -鹊 -刹 -玷 -岿 -空 -嘞 -绊 -排 -术 -估 -锷 -违 -们 -苟 -铜 -播 -肘 -件 -烫 -审 -鲂 -广 -像 -铌 -惰 -铟 -巳 -胍 -鲍 -康 -憧 -色 -恢 -想 -拷 -尤 -疳 -知 -S -Y -F -D -A -峄 -裕 -帮 -握 -搔 -氐 -氘 -难 -墒 -沮 -雨 -叁 -缥 -悴 -藐 -湫 -娟 -苑 -稠 -颛 -簇 -后 -阕 -闭 -蕤 -缚 -怎 -佞 -码 -嘤 -蔡 -痊 -舱 -螯 -帕 -赫 -昵 -升 -烬 -岫 -、 -疵 -蜻 -髁 -蕨 -隶 -烛 -械 -丑 -盂 -梁 -强 -鲛 -由 -拘 -揉 -劭 -龟 -撤 -钩 -呕 -孛 -费 -妻 -漂 -求 -阑 -崖 -秤 -甘 -通 -深 -补 -赃 -坎 -床 -啪 -承 -吼 -量 -暇 -钼 -烨 -阂 -擎 -脱 -逮 -称 -P -神 -属 -矗 -华 -届 -狍 -葑 -汹 -育 -患 -窒 -蛰 -佼 -静 -槎 -运 -鳗 -庆 -逝 -曼 -疱 -克 -代 -官 -此 -麸 -耧 -蚌 -晟 -例 -础 -榛 -副 -测 -唰 -缢 -迹 -灬 -霁 -身 -岁 -赭 -扛 -又 -菡 -乜 -雾 -板 -读 -陷 -徉 -贯 -郁 -虑 -变 -钓 -菜 -圾 -现 -琢 -式 -乐 -维 -渔 -浜 -左 -吾 -脑 -钡 -警 -T -啵 -拴 -偌 -漱 -湿 -硕 -止 -骼 -魄 -积 -燥 -联 -踢 -玛 -则 -窿 -见 -振 -畿 -送 -班 -钽 -您 -赵 -刨 -印 -讨 -踝 -籍 -谡 -舌 -崧 -汽 -蔽 -沪 -酥 -绒 -怖 -财 -帖 -肱 -私 -莎 -勋 -羔 -霸 -励 -哼 -帐 -将 -帅 -渠 -纪 -婴 -娩 -岭 -厘 -滕 -吻 -伤 -坝 -冠 -戊 -隆 -瘁 -介 -涧 -物 -黍 -并 -姗 -奢 -蹑 -掣 -垸 -锴 -命 -箍 -捉 -病 -辖 -琰 -眭 -迩 -艘 -绌 -繁 -寅 -若 -毋 -思 -诉 -类 -诈 -燮 -轲 -酮 -狂 -重 -反 -职 -筱 -县 -委 -磕 -绣 -奖 -晋 -濉 -志 -徽 -肠 -呈 -獐 -坻 -口 -片 -碰 -几 -村 -柿 -劳 -料 -获 -亩 -惕 -晕 -厌 -号 -罢 -池 -正 -鏖 -煨 -家 -棕 -复 -尝 -懋 -蜥 -锅 -岛 -扰 -队 -坠 -瘾 -钬 -@ -卧 -疣 -镇 -譬 -冰 -彷 -频 -黯 -据 -垄 -采 -八 -缪 -瘫 -型 -熹 -砰 -楠 -襁 -箐 -但 -嘶 -绳 -啤 -拍 -盥 -穆 -傲 -洗 -盯 -塘 -怔 -筛 -丿 -台 -恒 -喂 -葛 -永 -¥ -烟 -酒 -桦 -书 -砂 -蚝 -缉 -态 -瀚 -袄 -圳 -轻 -蛛 -超 -榧 -遛 -姒 -奘 -铮 -右 -荽 -望 -偻 -卡 -丶 -氰 -附 -做 -革 -索 -戚 -坨 -桷 -唁 -垅 -榻 -岐 -偎 -坛 -莨 -山 -殊 -微 -骇 -陈 -爨 -推 -嗝 -驹 -澡 -藁 -呤 -卤 -嘻 -糅 -逛 -侵 -郓 -酌 -德 -摇 -※ -鬃 -被 -慨 -殡 -羸 -昌 -泡 -戛 -鞋 -河 -宪 -沿 -玲 -鲨 -翅 -哽 -源 -铅 -语 -照 -邯 -址 -荃 -佬 -顺 -鸳 -町 -霭 -睾 -瓢 -夸 -椁 -晓 -酿 -痈 -咔 -侏 -券 -噎 -湍 -签 -嚷 -离 -午 -尚 -社 -锤 -背 -孟 -使 -浪 -缦 -潍 -鞅 -军 -姹 -驶 -笑 -鳟 -鲁 -》 -孽 -钜 -绿 -洱 -礴 -焯 -椰 -颖 -囔 -乌 -孔 -巴 -互 -性 -椽 -哞 -聘 -昨 -早 -暮 -胶 -炀 -隧 -低 -彗 -昝 -铁 -呓 -氽 -藉 -喔 -癖 -瑗 -姨 -权 -胱 -韦 -堑 -蜜 -酋 -楝 -砝 -毁 -靓 -歙 -锲 -究 -屋 -喳 -骨 -辨 -碑 -武 -鸠 -宫 -辜 -烊 -适 -坡 -殃 -培 -佩 -供 -走 -蜈 -迟 -翼 -况 -姣 -凛 -浔 -吃 -飘 -债 -犟 -金 -促 -苛 -崇 -坂 -莳 -畔 -绂 -兵 -蠕 -斋 -根 -砍 -亢 -欢 -恬 -崔 -剁 -餐 -榫 -快 -扶 -‖ -濒 -缠 -鳜 -当 -彭 -驭 -浦 -篮 -昀 -锆 -秸 -钳 -弋 -娣 -瞑 -夷 -龛 -苫 -拱 -致 -% -嵊 -障 -隐 -弑 -初 -娓 -抉 -汩 -累 -蓖 -" -唬 -助 -苓 -昙 -押 -毙 -破 -城 -郧 -逢 -嚏 -獭 -瞻 -溱 -婿 -赊 -跨 -恼 -璧 -萃 -姻 -貉 -灵 -炉 -密 -氛 -陶 -砸 -谬 -衔 -点 -琛 -沛 -枳 -层 -岱 -诺 -脍 -榈 -埂 -征 -冷 -裁 -打 -蹴 -素 -瘘 -逞 -蛐 -聊 -激 -腱 -萘 -踵 -飒 -蓟 -吆 -取 -咙 -簋 -涓 -矩 -曝 -挺 -揣 -座 -你 -史 -舵 -焱 -尘 -苏 -笈 -脚 -溉 -榨 -诵 -樊 -邓 -焊 -义 -庶 -儋 -蟋 -蒲 -赦 -呷 -杞 -诠 -豪 -还 -试 -颓 -茉 -太 -除 -紫 -逃 -痴 -草 -充 -鳕 -珉 -祗 -墨 -渭 -烩 -蘸 -慕 -璇 -镶 -穴 -嵘 -恶 -骂 -险 -绋 -幕 -碉 -肺 -戳 -刘 -潞 -秣 -纾 -潜 -銮 -洛 -须 -罘 -销 -瘪 -汞 -兮 -屉 -r -林 -厕 -质 -探 -划 -狸 -殚 -善 -煊 -烹 -〒 -锈 -逯 -宸 -辍 -泱 -柚 -袍 -远 -蹋 -嶙 -绝 -峥 -娥 -缍 -雀 -徵 -认 -镱 -谷 -= -贩 -勉 -撩 -鄯 -斐 -洋 -非 -祚 -泾 -诒 -饿 -撬 -威 -晷 -搭 -芍 -锥 -笺 -蓦 -候 -琊 -档 -礁 -沼 -卵 -荠 -忑 -朝 -凹 -瑞 -头 -仪 -弧 -孵 -畏 -铆 -突 -衲 -车 -浩 -气 -茂 -悖 -厢 -枕 -酝 -戴 -湾 -邹 -飚 -攘 -锂 -写 -宵 -翁 -岷 -无 -喜 -丈 -挑 -嗟 -绛 -殉 -议 -槽 -具 -醇 -淞 -笃 -郴 -阅 -饼 -底 -壕 -砚 -弈 -询 -缕 -庹 -翟 -零 -筷 -暨 -舟 -闺 -甯 -撞 -麂 -茌 -蔼 -很 -珲 -捕 -棠 -角 -阉 -媛 -娲 -诽 -剿 -尉 -爵 -睬 -韩 -诰 -匣 -危 -糍 -镯 -立 -浏 -阳 -少 -盆 -舔 -擘 -匪 -申 -尬 -铣 -旯 -抖 -赘 -瓯 -居 -ˇ -哮 -游 -锭 -茏 -歌 -坏 -甚 -秒 -舞 -沙 -仗 -劲 -潺 -阿 -燧 -郭 -嗖 -霏 -忠 -材 -奂 -耐 -跺 -砀 -输 -岖 -媳 -氟 -极 -摆 -灿 -今 -扔 -腻 -枝 -奎 -药 -熄 -吨 -话 -q -额 -慑 -嘌 -协 -喀 -壳 -埭 -视 -著 -於 -愧 -陲 -翌 -峁 -颅 -佛 -腹 -聋 -侯 -咎 -叟 -秀 -颇 -存 -较 -罪 -哄 -岗 -扫 -栏 -钾 -羌 -己 -璨 -枭 -霉 -煌 -涸 -衿 -键 -镝 -益 -岢 -奏 -连 -夯 -睿 -冥 -均 -糖 -狞 -蹊 -稻 -爸 -刿 -胥 -煜 -丽 -肿 -璃 -掸 -跚 -灾 -垂 -樾 -濑 -乎 -莲 -窄 -犹 -撮 -战 -馄 -软 -络 -显 -鸢 -胸 -宾 -妲 -恕 -埔 -蝌 -份 -遇 -巧 -瞟 -粒 -恰 -剥 -桡 -博 -讯 -凯 -堇 -阶 -滤 -卖 -斌 -骚 -彬 -兑 -磺 -樱 -舷 -两 -娱 -福 -仃 -差 -找 -桁 -÷ -净 -把 -阴 -污 -戬 -雷 -碓 -蕲 -楚 -罡 -焖 -抽 -妫 -咒 -仑 -闱 -尽 -邑 -菁 -爱 -贷 -沥 -鞑 -牡 -嗉 -崴 -骤 -塌 -嗦 -订 -拮 -滓 -捡 -锻 -次 -坪 -杩 -臃 -箬 -融 -珂 -鹗 -宗 -枚 -降 -鸬 -妯 -阄 -堰 -盐 -毅 -必 -杨 -崃 -俺 -甬 -状 -莘 -货 -耸 -菱 -腼 -铸 -唏 -痤 -孚 -澳 -懒 -溅 -翘 -疙 -杷 -淼 -缙 -骰 -喊 -悉 -砻 -坷 -艇 -赁 -界 -谤 -纣 -宴 -晃 -茹 -归 -饭 -梢 -铡 -街 -抄 -肼 -鬟 -苯 -颂 -撷 -戈 -炒 -咆 -茭 -瘙 -负 -仰 -客 -琉 -铢 -封 -卑 -珥 -椿 -镧 -窨 -鬲 -寿 -御 -袤 -铃 -萎 -砖 -餮 -脒 -裳 -肪 -孕 -嫣 -馗 -嵇 -恳 -氯 -江 -石 -褶 -冢 -祸 -阻 -狈 -羞 -银 -靳 -透 -咳 -叼 -敷 -芷 -啥 -它 -瓤 -兰 -痘 -懊 -逑 -肌 -往 -捺 -坊 -甩 -呻 -〃 -沦 -忘 -膻 -祟 -菅 -剧 -崆 -智 -坯 -臧 -霍 -墅 -攻 -眯 -倘 -拢 -骠 -铐 -庭 -岙 -瓠 -′ -缺 -泥 -迢 -捶 -? -? -郏 -喙 -掷 -沌 -纯 -秘 -种 -听 -绘 -固 -螨 -团 -香 -盗 -妒 -埚 -蓝 -拖 -旱 -荞 -铀 -血 -遏 -汲 -辰 -叩 -拽 -幅 -硬 -惶 -桀 -漠 -措 -泼 -唑 -齐 -肾 -念 -酱 -虚 -屁 -耶 -旗 -砦 -闵 -婉 -馆 -拭 -绅 -韧 -忏 -窝 -醋 -葺 -顾 -辞 -倜 -堆 -辋 -逆 -玟 -贱 -疾 -董 -惘 -倌 -锕 -淘 -嘀 -莽 -俭 -笏 -绑 -鲷 -杈 -择 -蟀 -粥 -嗯 -驰 -逾 -案 -谪 -褓 -胫 -哩 -昕 -颚 -鲢 -绠 -躺 -鹄 -崂 -儒 -俨 -丝 -尕 -泌 -啊 -萸 -彰 -幺 -吟 -骄 -苣 -弦 -脊 -瑰 -〈 -诛 -镁 -析 -闪 -剪 -侧 -哟 -框 -螃 -守 -嬗 -燕 -狭 -铈 -缮 -概 -迳 -痧 -鲲 -俯 -售 -笼 -痣 -扉 -挖 -满 -咋 -援 -邱 -扇 -歪 -便 -玑 -绦 -峡 -蛇 -叨 -〖 -泽 -胃 -斓 -喋 -怂 -坟 -猪 -该 -蚬 -炕 -弥 -赞 -棣 -晔 -娠 -挲 -狡 -创 -疖 -铕 -镭 -稷 -挫 -弭 -啾 -翔 -粉 -履 -苘 -哦 -楼 -秕 -铂 -土 -锣 -瘟 -挣 -栉 -习 -享 -桢 -袅 -磨 -桂 -谦 -延 -坚 -蔚 -噗 -署 -谟 -猬 -钎 -恐 -嬉 -雒 -倦 -衅 -亏 -璩 -睹 -刻 -殿 -王 -算 -雕 -麻 -丘 -柯 -骆 -丸 -塍 -谚 -添 -鲈 -垓 -桎 -蚯 -芥 -予 -飕 -镦 -谌 -窗 -醚 -菀 -亮 -搪 -莺 -蒿 -羁 -足 -J -真 -轶 -悬 -衷 -靛 -翊 -掩 -哒 -炅 -掐 -冼 -妮 -l -谐 -稚 -荆 -擒 -犯 -陵 -虏 -浓 -崽 -刍 -陌 -傻 -孜 -千 -靖 -演 -矜 -钕 -煽 -杰 -酗 -渗 -伞 -栋 -俗 -泫 -戍 -罕 -沾 -疽 -灏 -煦 -芬 -磴 -叱 -阱 -榉 -湃 -蜀 -叉 -醒 -彪 -租 -郡 -篷 -屎 -良 -垢 -隗 -弱 -陨 -峪 -砷 -掴 -颁 -胎 -雯 -绵 -贬 -沐 -撵 -隘 -篙 -暖 -曹 -陡 -栓 -填 -臼 -彦 -瓶 -琪 -潼 -哪 -鸡 -摩 -啦 -俟 -锋 -域 -耻 -蔫 -疯 -纹 -撇 -毒 -绶 -痛 -酯 -忍 -爪 -赳 -歆 -嘹 -辕 -烈 -册 -朴 -钱 -吮 -毯 -癜 -娃 -谀 -邵 -厮 -炽 -璞 -邃 -丐 -追 -词 -瓒 -忆 -轧 -芫 -谯 -喷 -弟 -半 -冕 -裙 -掖 -墉 -绮 -寝 -苔 -势 -顷 -褥 -切 -衮 -君 -佳 -嫒 -蚩 -霞 -佚 -洙 -逊 -镖 -暹 -唛 -& -殒 -顶 -碗 -獗 -轭 -铺 -蛊 -废 -恹 -汨 -崩 -珍 -那 -杵 -曲 -纺 -夏 -薰 -傀 -闳 -淬 -姘 -舀 -拧 -卷 -楂 -恍 -讪 -厩 -寮 -篪 -赓 -乘 -灭 -盅 -鞣 -沟 -慎 -挂 -饺 -鼾 -杳 -树 -缨 -丛 -絮 -娌 -臻 -嗳 -篡 -侩 -述 -衰 -矛 -圈 -蚜 -匕 -筹 -匿 -濞 -晨 -叶 -骋 -郝 -挚 -蚴 -滞 -增 -侍 -描 -瓣 -吖 -嫦 -蟒 -匾 -圣 -赌 -毡 -癞 -恺 -百 -曳 -需 -篓 -肮 -庖 -帏 -卿 -驿 -遗 -蹬 -鬓 -骡 -歉 -芎 -胳 -屐 -禽 -烦 -晌 -寄 -媾 -狄 -翡 -苒 -船 -廉 -终 -痞 -殇 -々 -畦 -饶 -改 -拆 -悻 -萄 -£ -瓿 -乃 -訾 -桅 -匮 -溧 -拥 -纱 -铍 -骗 -蕃 -龋 -缬 -父 -佐 -疚 -栎 -醍 -掳 -蓄 -x -惆 -颜 -鲆 -榆 -〔 -猎 -敌 -暴 -谥 -鲫 -贾 -罗 -玻 -缄 -扦 -芪 -癣 -落 -徒 -臾 -恿 -猩 -托 -邴 -肄 -牵 -春 -陛 -耀 -刊 -拓 -蓓 -邳 -堕 -寇 -枉 -淌 -啡 -湄 -兽 -酷 -萼 -碚 -濠 -萤 -夹 -旬 -戮 -梭 -琥 -椭 -昔 -勺 -蜊 -绐 -晚 -孺 -僵 -宣 -摄 -冽 -旨 -萌 -忙 -蚤 -眉 -噼 -蟑 -付 -契 -瓜 -悼 -颡 -壁 -曾 -窕 -颢 -澎 -仿 -俑 -浑 -嵌 -浣 -乍 -碌 -褪 -乱 -蔟 -隙 -玩 -剐 -葫 -箫 -纲 -围 -伐 -决 -伙 -漩 -瑟 -刑 -肓 -镳 -缓 -蹭 -氨 -皓 -典 -畲 -坍 -铑 -檐 -塑 -洞 -倬 -储 -胴 -淳 -戾 -吐 -灼 -惺 -妙 -毕 -珐 -缈 -虱 -盖 -羰 -鸿 -磅 -谓 -髅 -娴 -苴 -唷 -蚣 -霹 -抨 -贤 -唠 -犬 -誓 -逍 -庠 -逼 -麓 -籼 -釉 -呜 -碧 -秧 -氩 -摔 -霄 -穸 -纨 -辟 -妈 -映 -完 -牛 -缴 -嗷 -炊 -恩 -荔 -茆 -掉 -紊 -慌 -莓 -羟 -阙 -萁 -磐 -另 -蕹 -辱 -鳐 -湮 -吡 -吩 -唐 -睦 -垠 -舒 -圜 -冗 -瞿 -溺 -芾 -囱 -匠 -僳 -汐 -菩 -饬 -漓 -黑 -霰 -浸 -濡 -窥 -毂 -蒡 -兢 -驻 -鹉 -芮 -诙 -迫 -雳 -厂 -忐 -臆 -猴 -鸣 -蚪 -栈 -箕 -羡 -渐 -莆 -捍 -眈 -哓 -趴 -蹼 -埕 -嚣 -骛 -宏 -淄 -斑 -噜 -严 -瑛 -垃 -椎 -诱 -压 -庾 -绞 -焘 -廿 -抡 -迄 -棘 -夫 -纬 -锹 -眨 -瞌 -侠 -脐 -竞 -瀑 -孳 -骧 -遁 -姜 -颦 -荪 -滚 -萦 -伪 -逸 -粳 -爬 -锁 -矣 -役 -趣 -洒 -颔 -诏 -逐 -奸 -甭 -惠 -攀 -蹄 -泛 -尼 -拼 -阮 -鹰 -亚 -颈 -惑 -勒 -〉 -际 -肛 -爷 -刚 -钨 -丰 -养 -冶 -鲽 -辉 -蔻 -画 -覆 -皴 -妊 -麦 -返 -醉 -皂 -擀 -〗 -酶 -凑 -粹 -悟 -诀 -硖 -港 -卜 -z -杀 -涕 -± -舍 -铠 -抵 -弛 -段 -敝 -镐 -奠 -拂 -轴 -跛 -袱 -e -t -沉 -菇 -俎 -薪 -峦 -秭 -蟹 -历 -盟 -菠 -寡 -液 -肢 -喻 -染 -裱 -悱 -抱 -氙 -赤 -捅 -猛 -跑 -氮 -谣 -仁 -尺 -辊 -窍 -烙 -衍 -架 -擦 -倏 -璐 -瑁 -币 -楞 -胖 -夔 -趸 -邛 -惴 -饕 -虔 -蝎 -§ -哉 -贝 -宽 -辫 -炮 -扩 -饲 -籽 -魏 -菟 -锰 -伍 -猝 -末 -琳 -哚 -蛎 -邂 -呀 -姿 -鄞 -却 -歧 -仙 -恸 -椐 -森 -牒 -寤 -袒 -婆 -虢 -雅 -钉 -朵 -贼 -欲 -苞 -寰 -故 -龚 -坭 -嘘 -咫 -礼 -硷 -兀 -睢 -汶 -’ -铲 -烧 -绕 -诃 -浃 -钿 -哺 -柜 -讼 -颊 -璁 -腔 -洽 -咐 -脲 -簌 -筠 -镣 -玮 -鞠 -谁 -兼 -姆 -挥 -梯 -蝴 -谘 -漕 -刷 -躏 -宦 -弼 -b -垌 -劈 -麟 -莉 -揭 -笙 -渎 -仕 -嗤 -仓 -配 -怏 -抬 -错 -泯 -镊 -孰 -猿 -邪 -仍 -秋 -鼬 -壹 -歇 -吵 -炼 -< -尧 -射 -柬 -廷 -胧 -霾 -凳 -隋 -肚 -浮 -梦 -祥 -株 -堵 -退 -L -鹫 -跎 -凶 -毽 -荟 -炫 -栩 -玳 -甜 -沂 -鹿 -顽 -伯 -爹 -赔 -蛴 -徐 -匡 -欣 -狰 -缸 -雹 -蟆 -疤 -默 -沤 -啜 -痂 -衣 -禅 -w -i -h -辽 -葳 -黝 -钗 -停 -沽 -棒 -馨 -颌 -肉 -吴 -硫 -悯 -劾 -娈 -马 -啧 -吊 -悌 -镑 -峭 -帆 -瀣 -涉 -咸 -疸 -滋 -泣 -翦 -拙 -癸 -钥 -蜒 -+ -尾 -庄 -凝 -泉 -婢 -渴 -谊 -乞 -陆 -锉 -糊 -鸦 -淮 -I -B -N -晦 -弗 -乔 -庥 -葡 -尻 -席 -橡 -傣 -渣 -拿 -惩 -麋 -斛 -缃 -矮 -蛏 -岘 -鸽 -姐 -膏 -催 -奔 -镒 -喱 -蠡 -摧 -钯 -胤 -柠 -拐 -璋 -鸥 -卢 -荡 -倾 -^ -_ -珀 -逄 -萧 -塾 -掇 -贮 -笆 -聂 -圃 -冲 -嵬 -M -滔 -笕 -值 -炙 -偶 -蜱 -搐 -梆 -汪 -蔬 -腑 -鸯 -蹇 -敞 -绯 -仨 -祯 -谆 -梧 -糗 -鑫 -啸 -豺 -囹 -猾 -巢 -柄 -瀛 -筑 -踌 -沭 -暗 -苁 -鱿 -蹉 -脂 -蘖 -牢 -热 -木 -吸 -溃 -宠 -序 -泞 -偿 -拜 -檩 -厚 -朐 -毗 -螳 -吞 -媚 -朽 -担 -蝗 -橘 -畴 -祈 -糟 -盱 -隼 -郜 -惜 -珠 -裨 -铵 -焙 -琚 -唯 -咚 -噪 -骊 -丫 -滢 -勤 -棉 -呸 -咣 -淀 -隔 -蕾 -窈 -饨 -挨 -煅 -短 -匙 -粕 -镜 -赣 -撕 -墩 -酬 -馁 -豌 -颐 -抗 -酣 -氓 -佑 -搁 -哭 -递 -耷 -涡 -桃 -贻 -碣 -截 -瘦 -昭 -镌 -蔓 -氚 -甲 -猕 -蕴 -蓬 -散 -拾 -纛 -狼 -猷 -铎 -埋 -旖 -矾 -讳 -囊 -糜 -迈 -粟 -蚂 -紧 -鲳 -瘢 -栽 -稼 -羊 -锄 -斟 -睁 -桥 -瓮 -蹙 -祉 -醺 -鼻 -昱 -剃 -跳 -篱 -跷 -蒜 -翎 -宅 -晖 -嗑 -壑 -峻 -癫 -屏 -狠 -陋 -袜 -途 -憎 -祀 -莹 -滟 -佶 -溥 -臣 -约 -盛 -峰 -磁 -慵 -婪 -拦 -莅 -朕 -鹦 -粲 -裤 -哎 -疡 -嫖 -琵 -窟 -堪 -谛 -嘉 -儡 -鳝 -斩 -郾 -驸 -酊 -妄 -胜 -贺 -徙 -傅 -噌 -钢 -栅 -庇 -恋 -匝 -巯 -邈 -尸 -锚 -粗 -佟 -蛟 -薹 -纵 -蚊 -郅 -绢 -锐 -苗 -俞 -篆 -淆 -膀 -鲜 -煎 -诶 -秽 -寻 -涮 -刺 -怀 -噶 -巨 -褰 -魅 -灶 -灌 -桉 -藕 -谜 -舸 -薄 -搀 -恽 -借 -牯 -痉 -渥 -愿 -亓 -耘 -杠 -柩 -锔 -蚶 -钣 -珈 -喘 -蹒 -幽 -赐 -稗 -晤 -莱 -泔 -扯 -肯 -菪 -裆 -腩 -豉 -疆 -骜 -腐 -倭 -珏 -唔 -粮 -亡 -润 -慰 -伽 -橄 -玄 -誉 -醐 -胆 -龊 -粼 -塬 -陇 -彼 -削 -嗣 -绾 -芽 -妗 -垭 -瘴 -爽 -薏 -寨 -龈 -泠 -弹 -赢 -漪 -猫 -嘧 -涂 -恤 -圭 -茧 -烽 -屑 -痕 -巾 -赖 -荸 -凰 -腮 -畈 -亵 -蹲 -偃 -苇 -澜 -艮 -换 -骺 -烘 -苕 -梓 -颉 -肇 -哗 -悄 -氤 -涠 -葬 -屠 -鹭 -植 -竺 -佯 -诣 -鲇 -瘀 -鲅 -邦 -移 -滁 -冯 -耕 -癔 -戌 -茬 -沁 -巩 -悠 -湘 -洪 -痹 -锟 -循 -谋 -腕 -鳃 -钠 -捞 -焉 -迎 -碱 -伫 -急 -榷 -奈 -邝 -卯 -辄 -皲 -卟 -醛 -畹 -忧 -稳 -雄 -昼 -缩 -阈 -睑 -扌 -耗 -曦 -涅 -捏 -瞧 -邕 -淖 -漉 -铝 -耦 -禹 -湛 -喽 -莼 -琅 -诸 -苎 -纂 -硅 -始 -嗨 -傥 -燃 -臂 -赅 -嘈 -呆 -贵 -屹 -壮 -肋 -亍 -蚀 -卅 -豹 -腆 -邬 -迭 -浊 -} -童 -螂 -捐 -圩 -勐 -触 -寞 -汊 -壤 -荫 -膺 -渌 -芳 -懿 -遴 -螈 -泰 -蓼 -蛤 -茜 -舅 -枫 -朔 -膝 -眙 -避 -梅 -判 -鹜 -璜 -牍 -缅 -垫 -藻 -黔 -侥 -惚 -懂 -踩 -腰 -腈 -札 -丞 -唾 -慈 -顿 -摹 -荻 -琬 -~ -斧 -沈 -滂 -胁 -胀 -幄 -莜 -Z -匀 -鄄 -掌 -绰 -茎 -焚 -赋 -萱 -谑 -汁 -铒 -瞎 -夺 -蜗 -野 -娆 -冀 -弯 -篁 -懵 -灞 -隽 -芡 -脘 -俐 -辩 -芯 -掺 -喏 -膈 -蝈 -觐 -悚 -踹 -蔗 -熠 -鼠 -呵 -抓 -橼 -峨 -畜 -缔 -禾 -崭 -弃 -熊 -摒 -凸 -拗 -穹 -蒙 -抒 -祛 -劝 -闫 -扳 -阵 -醌 -踪 -喵 -侣 -搬 -仅 -荧 -赎 -蝾 -琦 -买 -婧 -瞄 -寓 -皎 -冻 -赝 -箩 -莫 -瞰 -郊 -笫 -姝 -筒 -枪 -遣 -煸 -袋 -舆 -痱 -涛 -母 -〇 -启 -践 -耙 -绲 -盘 -遂 -昊 -搞 -槿 -诬 -纰 -泓 -惨 -檬 -亻 -越 -C -o -憩 -熵 -祷 -钒 -暧 -塔 -阗 -胰 -咄 -娶 -魔 -琶 -钞 -邻 -扬 -杉 -殴 -咽 -弓 -〆 -髻 -】 -吭 -揽 -霆 -拄 -殖 -脆 -彻 -岩 -芝 -勃 -辣 -剌 -钝 -嘎 -甄 -佘 -皖 -伦 -授 -徕 -憔 -挪 -皇 -庞 -稔 -芜 -踏 -溴 -兖 -卒 -擢 -饥 -鳞 -煲 -‰ -账 -颗 -叻 -斯 -捧 -鳍 -琮 -讹 -蛙 -纽 -谭 -酸 -兔 -莒 -睇 -伟 -觑 -羲 -嗜 -宜 -褐 -旎 -辛 -卦 -诘 -筋 -鎏 -溪 -挛 -熔 -阜 -晰 -鳅 -丢 -奚 -灸 -呱 -献 -陉 -黛 -鸪 -甾 -萨 -疮 -拯 -洲 -疹 -辑 -叙 -恻 -谒 -允 -柔 -烂 -氏 -逅 -漆 -拎 -惋 -扈 -湟 -纭 -啕 -掬 -擞 -哥 -忽 -涤 -鸵 -靡 -郗 -瓷 -扁 -廊 -怨 -雏 -钮 -敦 -E -懦 -憋 -汀 -拚 -啉 -腌 -岸 -f -痼 -瞅 -尊 -咀 -眩 -飙 -忌 -仝 -迦 -熬 -毫 -胯 -篑 -茄 -腺 -凄 -舛 -碴 -锵 -诧 -羯 -後 -漏 -汤 -宓 -仞 -蚁 -壶 -谰 -皑 -铄 -棰 -罔 -辅 -晶 -苦 -牟 -闽 -\ -烃 -饮 -聿 -丙 -蛳 -朱 -煤 -涔 -鳖 -犁 -罐 -荼 -砒 -淦 -妤 -黏 -戎 -孑 -婕 -瑾 -戢 -钵 -枣 -捋 -砥 -衩 -狙 -桠 -稣 -阎 -肃 -梏 -诫 -孪 -昶 -婊 -衫 -嗔 -侃 -塞 -蜃 -樵 -峒 -貌 -屿 -欺 -缫 -阐 -栖 -诟 -珞 -荭 -吝 -萍 -嗽 -恂 -啻 -蜴 -磬 -峋 -俸 -豫 -谎 -徊 -镍 -韬 -魇 -晴 -U -囟 -猜 -蛮 -坐 -囿 -伴 -亭 -肝 -佗 -蝠 -妃 -胞 -滩 -榴 -氖 -垩 -苋 -砣 -扪 -馏 -姓 -轩 -厉 -夥 -侈 -禀 -垒 -岑 -赏 -钛 -辐 -痔 -披 -纸 -碳 -“ -坞 -蠓 -挤 -荥 -沅 -悔 -铧 -帼 -蒌 -蝇 -a -p -y -n -g -哀 -浆 -瑶 -凿 -桶 -馈 -皮 -奴 -苜 -佤 -伶 -晗 -铱 -炬 -优 -弊 -氢 -恃 -甫 -攥 -端 -锌 -灰 -稹 -炝 -曙 -邋 -亥 -眶 -碾 -拉 -萝 -绔 -捷 -浍 -腋 -姑 -菖 -凌 -涞 -麽 -锢 -桨 -潢 -绎 -镰 -殆 -锑 -渝 -铬 -困 -绽 -觎 -匈 -糙 -暑 -裹 -鸟 -盔 -肽 -迷 -綦 -『 -亳 -佝 -俘 -钴 -觇 -骥 -仆 -疝 -跪 -婶 -郯 -瀹 -唉 -脖 -踞 -针 -晾 -忒 -扼 -瞩 -叛 -椒 -疟 -嗡 -邗 -肆 -跆 -玫 -忡 -捣 -咧 -唆 -艄 -蘑 -潦 -笛 -阚 -沸 -泻 -掊 -菽 -贫 -斥 -髂 -孢 -镂 -赂 -麝 -鸾 -屡 -衬 -苷 -恪 -叠 -希 -粤 -爻 -喝 -茫 -惬 -郸 -绻 -庸 -撅 -碟 -宄 -妹 -膛 -叮 -饵 -崛 -嗲 -椅 -冤 -搅 -咕 -敛 -尹 -垦 -闷 -蝉 -霎 -勰 -败 -蓑 -泸 -肤 -鹌 -幌 -焦 -浠 -鞍 -刁 -舰 -乙 -竿 -裔 -。 -茵 -函 -伊 -兄 -丨 -娜 -匍 -謇 -莪 -宥 -似 -蝽 -翳 -酪 -翠 -粑 -薇 -祢 -骏 -赠 -叫 -Q -噤 -噻 -竖 -芗 -莠 -潭 -俊 -羿 -耜 -O -郫 -趁 -嗪 -囚 -蹶 -芒 -洁 -笋 -鹑 -敲 -硝 -啶 -堡 -渲 -揩 -』 -携 -宿 -遒 -颍 -扭 -棱 -割 -萜 -蔸 -葵 -琴 -捂 -饰 -衙 -耿 -掠 -募 -岂 -窖 -涟 -蔺 -瘤 -柞 -瞪 -怜 -匹 -距 -楔 -炜 -哆 -秦 -缎 -幼 -茁 -绪 -痨 -恨 -楸 -娅 -瓦 -桩 -雪 -嬴 -伏 -榔 -妥 -铿 -拌 -眠 -雍 -缇 -‘ -卓 -搓 -哌 -觞 -噩 -屈 -哧 -髓 -咦 -巅 -娑 -侑 -淫 -膳 -祝 -勾 -姊 -莴 -胄 -疃 -薛 -蜷 -胛 -巷 -芙 -芋 -熙 -闰 -勿 -窃 -狱 -剩 -钏 -幢 -陟 -铛 -慧 -靴 -耍 -k -浙 -浇 -飨 -惟 -绗 -祜 -澈 -啼 -咪 -磷 -摞 -诅 -郦 -抹 -跃 -壬 -吕 -肖 -琏 -颤 -尴 -剡 -抠 -凋 -赚 -泊 -津 -宕 -殷 -倔 -氲 -漫 -邺 -涎 -怠 -$ -垮 -荬 -遵 -俏 -叹 -噢 -饽 -蜘 -孙 -筵 -疼 -鞭 -羧 -牦 -箭 -潴 -c -眸 -祭 -髯 -啖 -坳 -愁 -芩 -驮 -倡 -巽 -穰 -沃 -胚 -怒 -凤 -槛 -剂 -趵 -嫁 -v -邢 -灯 -鄢 -桐 -睽 -檗 -锯 -槟 -婷 -嵋 -圻 -诗 -蕈 -颠 -遭 -痢 -芸 -怯 -馥 -竭 -锗 -徜 -恭 -遍 -籁 -剑 -嘱 -苡 -龄 -僧 -桑 -潸 -弘 -澶 -楹 -悲 -讫 -愤 -腥 -悸 -谍 -椹 -呢 -桓 -葭 -攫 -阀 -翰 -躲 -敖 -柑 -郎 -笨 -橇 -呃 -魁 -燎 -脓 -葩 -磋 -垛 -玺 -狮 -沓 -砜 -蕊 -锺 -罹 -蕉 -翱 -虐 -闾 -巫 -旦 -茱 -嬷 -枯 -鹏 -贡 -芹 -汛 -矫 -绁 -拣 -禺 -佃 -讣 -舫 -惯 -乳 -趋 -疲 -挽 -岚 -虾 -衾 -蠹 -蹂 -飓 -氦 -铖 -孩 -稞 -瑜 -壅 -掀 -勘 -妓 -畅 -髋 -W -庐 -牲 -蓿 -榕 -练 -垣 -唱 -邸 -菲 -昆 -婺 -穿 -绡 -麒 -蚱 -掂 -愚 -泷 -涪 -漳 -妩 -娉 -榄 -讷 -觅 -旧 -藤 -煮 -呛 -柳 -腓 -叭 -庵 -烷 -阡 -罂 -蜕 -擂 -猖 -咿 -媲 -脉 -【 -沏 -貅 -黠 -熏 -哲 -烁 -坦 -酵 -兜 -× -潇 -撒 -剽 -珩 -圹 -乾 -摸 -樟 -帽 -嗒 -襄 -魂 -轿 -憬 -锡 -〕 -喃 -皆 -咖 -隅 -脸 -残 -泮 -袂 -鹂 -珊 -囤 -捆 -咤 -误 -徨 -闹 -淙 -芊 -淋 -怆 -囗 -拨 -梳 -渤 -R -G -绨 -蚓 -婀 -幡 -狩 -麾 -谢 -唢 -裸 -旌 -伉 -纶 -裂 -驳 -砼 -咛 -澄 -樨 -蹈 -宙 -澍 -倍 -貔 -操 -勇 -蟠 -摈 -砧 -虬 -够 -缁 -悦 -藿 -撸 -艹 -摁 -淹 -豇 -虎 -榭 -ˉ -吱 -d -° -喧 -荀 -踱 -侮 -奋 -偕 -饷 -犍 -惮 -坑 -璎 -徘 -宛 -妆 -袈 -倩 -窦 -昂 -荏 -乖 -K -怅 -撰 -鳙 -牙 -袁 -酞 -X -痿 -琼 -闸 -雁 -趾 -荚 -虻 -涝 -《 -杏 -韭 -偈 -烤 -绫 -鞘 -卉 -症 -遢 -蓥 -诋 -杭 -荨 -匆 -竣 -簪 -辙 -敕 -虞 -丹 -缭 -咩 -黟 -m -淤 -瑕 -咂 -铉 -硼 -茨 -嶂 -痒 -畸 -敬 -涿 -粪 -窘 -熟 -叔 -嫔 -盾 -忱 -裘 -憾 -梵 -赡 -珙 -咯 -娘 -庙 -溯 -胺 -葱 -痪 -摊 -荷 -卞 -乒 -髦 -寐 -铭 -坩 -胗 -枷 -爆 -溟 -嚼 -羚 -砬 -轨 -惊 -挠 -罄 -竽 -菏 -氧 -浅 -楣 -盼 -枢 -炸 -阆 -杯 -谏 -噬 -淇 -渺 -俪 -秆 -墓 -泪 -跻 -砌 -痰 -垡 -渡 -耽 -釜 -讶 -鳎 -煞 -呗 -韶 -舶 -绷 -鹳 -缜 -旷 -铊 -皱 -龌 -檀 -霖 -奄 -槐 -艳 -蝶 -旋 -哝 -赶 -骞 -蚧 -腊 -盈 -丁 -` -蜚 -矸 -蝙 -睨 -嚓 -僻 -鬼 -醴 -夜 -彝 -磊 -笔 -拔 -栀 -糕 -厦 -邰 -纫 -逭 -纤 -眦 -膊 -馍 -躇 -烯 -蘼 -冬 -诤 -暄 -骶 -哑 -瘠 -」 -臊 -丕 -愈 -咱 -螺 -擅 -跋 -搏 -硪 -谄 -笠 -淡 -嘿 -骅 -谧 -鼎 -皋 -姚 -歼 -蠢 -驼 -耳 -胬 -挝 -涯 -狗 -蒽 -孓 -犷 -凉 -芦 -箴 -铤 -孤 -嘛 -坤 -V -茴 -朦 -挞 -尖 -橙 -诞 -搴 -碇 -洵 -浚 -帚 -蜍 -漯 -柘 -嚎 -讽 -芭 -荤 -咻 -祠 -秉 -跖 -埃 -吓 -糯 -眷 -馒 -惹 -娼 -鲑 -嫩 -讴 -轮 -瞥 -靶 -褚 -乏 -缤 -宋 -帧 -删 -驱 -碎 -扑 -俩 -俄 -偏 -涣 -竹 -噱 -皙 -佰 -渚 -唧 -斡 -# -镉 -刀 -崎 -筐 -佣 -夭 -贰 -肴 -峙 -哔 -艿 -匐 -牺 -镛 -缘 -仡 -嫡 -劣 -枸 -堀 -梨 -簿 -鸭 -蒸 -亦 -稽 -浴 -{ -衢 -束 -槲 -j -阁 -揍 -疥 -棋 -潋 -聪 -窜 -乓 -睛 -插 -冉 -阪 -苍 -搽 -「 -蟾 -螟 -幸 -仇 -樽 -撂 -慢 -跤 -幔 -俚 -淅 -覃 -觊 -溶 -妖 -帛 -侨 -曰 -妾 -泗 -· -: -瀘 -風 -Ë -( -) -∶ -紅 -紗 -瑭 -雲 -頭 -鶏 -財 -許 -• -¥ -樂 -焗 -麗 -— -; -滙 -東 -榮 -繪 -興 -… -門 -業 -π -楊 -國 -顧 -é -盤 -寳 -Λ -龍 -鳳 -島 -誌 -緣 -結 -銭 -萬 -勝 -祎 -璟 -優 -歡 -臨 -時 -購 -= -★ -藍 -昇 -鐵 -觀 -勅 -農 -聲 -畫 -兿 -術 -發 -劉 -記 -專 -耑 -園 -書 -壴 -種 -Ο -● -褀 -號 -銀 -匯 -敟 -锘 -葉 -橪 -廣 -進 -蒄 -鑽 -阝 -祙 -貢 -鍋 -豊 -夬 -喆 -團 -閣 -開 -燁 -賓 -館 -酡 -沔 -順 -+ -硚 -劵 -饸 -陽 -車 -湓 -復 -萊 -氣 -軒 -華 -堃 -迮 -纟 -戶 -馬 -學 -裡 -電 -嶽 -獨 -マ -シ -サ -ジ -燘 -袪 -環 -❤ -臺 -灣 -専 -賣 -孖 -聖 -攝 -線 -▪ -α -傢 -俬 -夢 -達 -莊 -喬 -貝 -薩 -劍 -羅 -壓 -棛 -饦 -尃 -璈 -囍 -醫 -G -I -A -# -N -鷄 -髙 -嬰 -啓 -約 -隹 -潔 -賴 -藝 -~ -寶 -籣 -麺 -  -嶺 -√ -義 -網 -峩 -長 -∧ -魚 -機 -構 -② -鳯 -偉 -L -B -㙟 -畵 -鴿 -' -詩 -溝 -嚞 -屌 -藔 -佧 -玥 -蘭 -織 -1 -3 -9 -0 -7 -點 -砭 -鴨 -鋪 -銘 -廳 -弍 -‧ -創 -湯 -坶 -℃ -卩 -骝 -& -烜 -荘 -當 -潤 -扞 -係 -懷 -碶 -钅 -蚨 -讠 -☆ -叢 -爲 -埗 -涫 -塗 -→ -楽 -現 -鯨 -愛 -瑪 -鈺 -忄 -悶 -藥 -飾 -樓 -視 -孬 -ㆍ -燚 -苪 -師 -① -丼 -锽 -│ -韓 -標 -è -兒 -閏 -匋 -張 -漢 -Ü -髪 -會 -閑 -檔 -習 -裝 -の -峯 -菘 -輝 -И -雞 -釣 -億 -浐 -K -O -R -8 -H -E -P -T -W -D -S -C -M -F -姌 -饹 -» -晞 -廰 -ä -嵯 -鷹 -負 -飲 -絲 -冚 -楗 -澤 -綫 -區 -❋ -← -質 -靑 -揚 -③ -滬 -統 -産 -協 -﹑ -乸 -畐 -經 -運 -際 -洺 -岽 -為 -粵 -諾 -崋 -豐 -碁 -ɔ -V -2 -6 -齋 -誠 -訂 -´ -勑 -雙 -陳 -無 -í -泩 -媄 -夌 -刂 -i -c -t -o -r -a -嘢 -耄 -燴 -暃 -壽 -媽 -靈 -抻 -體 -唻 -É -冮 -甹 -鎮 -錦 -ʌ -蜛 -蠄 -尓 -駕 -戀 -飬 -逹 -倫 -貴 -極 -Я -Й -寬 -磚 -嶪 -郎 -職 -| -間 -n -d -剎 -伈 -課 -飛 -橋 -瘊 -№ -譜 -骓 -圗 -滘 -縣 -粿 -咅 -養 -濤 -彳 -® -% -Ⅱ -啰 -㴪 -見 -矞 -薬 -糁 -邨 -鲮 -顔 -罱 -З -選 -話 -贏 -氪 -俵 -競 -瑩 -繡 -枱 -β -綉 -á -獅 -爾 -™ -麵 -戋 -淩 -徳 -個 -劇 -場 -務 -簡 -寵 -h -實 -膠 -轱 -圖 -築 -嘣 -樹 -㸃 -營 -耵 -孫 -饃 -鄺 -飯 -麯 -遠 -輸 -坫 -孃 -乚 -閃 -鏢 -㎡ -題 -廠 -關 -↑ -爺 -將 -軍 -連 -篦 -覌 -參 -箸 -- -窠 -棽 -寕 -夀 -爰 -歐 -呙 -閥 -頡 -熱 -雎 -垟 -裟 -凬 -勁 -帑 -馕 -夆 -疌 -枼 -馮 -貨 -蒤 -樸 -彧 -旸 -靜 -龢 -暢 -㐱 -鳥 -珺 -鏡 -灡 -爭 -堷 -廚 -Ó -騰 -診 -┅ -蘇 -褔 -凱 -頂 -豕 -亞 -帥 -嘬 -⊥ -仺 -桖 -複 -饣 -絡 -穂 -顏 -棟 -納 -▏ -濟 -親 -設 -計 -攵 -埌 -烺 -ò -頤 -燦 -蓮 -撻 -節 -講 -濱 -濃 -娽 -洳 -朿 -燈 -鈴 -護 -膚 -铔 -過 -補 -Z -U -5 -4 -坋 -闿 -䖝 -餘 -缐 -铞 -貿 -铪 -桼 -趙 -鍊 -[ -㐂 -垚 -菓 -揸 -捲 -鐘 -滏 -𣇉 -爍 -輪 -燜 -鴻 -鮮 -動 -鹞 -鷗 -丄 -慶 -鉌 -翥 -飮 -腸 -⇋ -漁 -覺 -來 -熘 -昴 -翏 -鲱 -圧 -鄉 -萭 -頔 -爐 -嫚 -г -貭 -類 -聯 -幛 -輕 -訓 -鑒 -夋 -锨 -芃 -珣 -䝉 -扙 -嵐 -銷 -處 -ㄱ -語 -誘 -苝 -歸 -儀 -燒 -楿 -內 -粢 -葒 -奧 -麥 -礻 -滿 -蠔 -穵 -瞭 -態 -鱬 -榞 -硂 -鄭 -黃 -煙 -祐 -奓 -逺 -* -瑄 -獲 -聞 -薦 -讀 -這 -樣 -決 -問 -啟 -們 -執 -説 -轉 -單 -隨 -唘 -帶 -倉 -庫 -還 -贈 -尙 -皺 -■ -餅 -產 -○ -∈ -報 -狀 -楓 -賠 -琯 -嗮 -禮 -` -傳 -> -≤ -嗞 -Φ -≥ -換 -咭 -∣ -↓ -曬 -ε -応 -寫 -″ -終 -様 -純 -費 -療 -聨 -凍 -壐 -郵 -ü -黒 -∫ -製 -塊 -調 -軽 -確 -撃 -級 -馴 -Ⅲ -涇 -繹 -數 -碼 -證 -狒 -処 -劑 -< -晧 -賀 -衆 -] -櫥 -兩 -陰 -絶 -對 -鯉 -憶 -◎ -p -e -Y -蕒 -煖 -頓 -測 -試 -鼽 -僑 -碩 -妝 -帯 -≈ -鐡 -舖 -權 -喫 -倆 -ˋ -該 -悅 -ā -俫 -. -f -s -b -m -k -g -u -j -貼 -淨 -濕 -針 -適 -備 -l -/ -給 -謢 -強 -觸 -衛 -與 -⊙ -$ -緯 -變 -⑴ -⑵ -⑶ -㎏ -殺 -∩ -幚 -─ -價 -▲ -離 -ú -ó -飄 -烏 -関 -閟 -﹝ -﹞ -邏 -輯 -鍵 -驗 -訣 -導 -歷 -屆 -層 -▼ -儱 -錄 -熳 -ē -艦 -吋 -錶 -辧 -飼 -顯 -④ -禦 -販 -気 -対 -枰 -閩 -紀 -幹 -瞓 -貊 -淚 -△ -眞 -墊 -Ω -獻 -褲 -縫 -緑 -亜 -鉅 -餠 -{ -} -◆ -蘆 -薈 -█ -◇ -溫 -彈 -晳 -粧 -犸 -穩 -訊 -崬 -凖 -熥 -П -舊 -條 -紋 -圍 -Ⅳ -筆 -尷 -難 -雜 -錯 -綁 -識 -頰 -鎖 -艶 -□ -殁 -殼 -⑧ -├ -▕ -鵬 -ǐ -ō -ǒ -糝 -綱 -▎ -μ -盜 -饅 -醬 -籤 -蓋 -釀 -鹽 -據 -à -ɡ -辦 -◥ -彐 -┌ -婦 -獸 -鲩 -伱 -ī -蒟 -蒻 -齊 -袆 -腦 -寧 -凈 -妳 -煥 -詢 -偽 -謹 -啫 -鯽 -騷 -鱸 -損 -傷 -鎻 -髮 -買 -冏 -儥 -両 -﹢ -∞ -載 -喰 -z -羙 -悵 -燙 -曉 -員 -組 -徹 -艷 -痠 -鋼 -鼙 -縮 -細 -嚒 -爯 -≠ -維 -" -鱻 -壇 -厍 -帰 -浥 -犇 -薡 -軎 -² -應 -醜 -刪 -緻 -鶴 -賜 -噁 -軌 -尨 -镔 -鷺 -槗 -彌 -葚 -濛 -請 -溇 -緹 -賢 -訪 -獴 -瑅 -資 -縤 -陣 -蕟 -栢 -韻 -祼 -恁 -伢 -謝 -劃 -涑 -總 -衖 -踺 -砋 -凉 -籃 -駿 -苼 -瘋 -昽 -紡 -驊 -腎 -﹗ -響 -杋 -剛 -嚴 -禪 -歓 -槍 -傘 -檸 -檫 -炣 -勢 -鏜 -鎢 -銑 -尐 -減 -奪 -惡 -θ -僮 -婭 -臘 -ū -ì -殻 -鉄 -∑ -蛲 -焼 -緖 -續 -紹 -懮 \ No newline at end of file diff --git a/backend/ppocr/utils/dict/chinese_cht_dict.txt b/backend/ppocr/utils/dict/chinese_cht_dict.txt deleted file mode 100644 index cc1aa47..0000000 --- a/backend/ppocr/utils/dict/chinese_cht_dict.txt +++ /dev/null @@ -1,8421 +0,0 @@ -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -¥ -® -° -± -² -´ -· -» -É -Ë -Ó -× -Ü -à -á -ä -è -é -ì -í -ò -ó -÷ -ú -ü -ā -ē -ī -ō -ū -ǐ -ǒ -ɔ -ɡ -ʌ -ˋ -Λ -Ο -Φ -Ω -α -β -ε -θ -μ -π -З -И -Й -П -Я -г -— -‖ -‘ -’ -“ -” -• -… -‧ -′ -″ -※ -℃ -№ -™ -Ⅱ -Ⅲ -Ⅳ -← -↑ -→ -↓ -⇋ -∈ -∑ -√ -∞ -∣ -∧ -∩ -∫ -∶ -≈ -≠ -≤ -≥ -⊙ -⊥ -① -② -③ -④ -⑧ -⑴ -⑵ -⑶ -─ -│ -┅ -┌ -├ -█ -▎ -▏ -▕ -■ -□ -▪ -▲ -△ -▼ -◆ -◇ -○ -◎ -● -◥ -★ -☆ -❋ -❤ -  -、 -。 -〇 -〉 -《 -》 -「 -」 -『 -』 -【 -】 -〔 -〕 -〖 -〗 -の -サ -シ -ジ -マ -ㄱ -ㆍ -㎏ -㎡ -㐂 -㐱 -㙟 -㴪 -㸃 -䖝 -䝉 -䰾 -䲁 -一 -丁 -七 -丄 -丈 -三 -上 -下 -丌 -不 -与 -丏 -丐 -丑 -且 -丕 -世 -丘 -丙 -丞 -丟 -両 -並 -丨 -丫 -中 -丰 -串 -丶 -丸 -丹 -主 -丼 -丿 -乂 -乃 -久 -么 -之 -乍 -乎 -乏 -乒 -乓 -乖 -乗 -乘 -乙 -乚 -乜 -九 -乞 -也 -乩 -乭 -乳 -乸 -乹 -乾 -亀 -亂 -亅 -了 -予 -亊 -事 -二 -亍 -云 -互 -亓 -五 -井 -亘 -些 -亜 -亞 -亟 -亠 -亡 -亢 -交 -亥 -亦 -亨 -享 -京 -亭 -亮 -亰 -亳 -亶 -亹 -人 -亻 -什 -仁 -仂 -仃 -仄 -仇 -仉 -今 -介 -仍 -仏 -仔 -仕 -他 -仗 -付 -仙 -仛 -仝 -仞 -仟 -仡 -代 -令 -以 -仨 -仫 -仮 -仰 -仲 -仳 -仵 -件 -仺 -任 -仼 -份 -仿 -企 -伃 -伈 -伉 -伊 -伋 -伍 -伎 -伏 -伐 -休 -伕 -伙 -伝 -伢 -伯 -估 -伱 -伴 -伶 -伷 -伸 -伺 -似 -伽 -伾 -佀 -佁 -佃 -但 -佇 -佈 -佉 -佋 -位 -低 -住 -佐 -佑 -体 -佔 -何 -佗 -佘 -余 -佚 -佛 -作 -佝 -佞 -佟 -你 -佣 -佤 -佧 -佩 -佬 -佯 -佰 -佳 -併 -佶 -佹 -佺 -佼 -佾 -使 -侁 -侃 -侄 -侅 -來 -侈 -侊 -例 -侍 -侏 -侑 -侖 -侗 -侘 -侚 -供 -依 -侞 -価 -侮 -侯 -侵 -侶 -侷 -侹 -便 -俁 -係 -促 -俄 -俅 -俊 -俋 -俌 -俍 -俎 -俏 -俐 -俑 -俗 -俘 -俚 -俛 -保 -俞 -俟 -俠 -信 -俬 -修 -俯 -俱 -俳 -俴 -俵 -俶 -俸 -俺 -俽 -俾 -倆 -倈 -倉 -個 -倌 -倍 -們 -倒 -倓 -倔 -倖 -倗 -倘 -候 -倚 -倜 -倞 -借 -倡 -倢 -倣 -値 -倦 -倧 -倩 -倪 -倫 -倬 -倭 -倮 -倻 -值 -偁 -偃 -假 -偈 -偉 -偊 -偌 -偍 -偎 -偏 -偓 -偕 -做 -停 -健 -偪 -偲 -側 -偵 -偶 -偷 -偸 -偽 -傀 -傃 -傅 -傈 -傉 -傍 -傑 -傒 -傕 -傖 -傘 -備 -傜 -傢 -傣 -催 -傭 -傲 -傳 -債 -傷 -傻 -傾 -僅 -僉 -僊 -働 -像 -僑 -僔 -僕 -僖 -僙 -僚 -僜 -僡 -僧 -僩 -僭 -僮 -僰 -僱 -僳 -僴 -僵 -價 -僻 -儀 -儁 -儂 -億 -儆 -儇 -儈 -儉 -儋 -儐 -儒 -儔 -儕 -儘 -儚 -儞 -償 -儡 -儥 -儦 -優 -儫 -儱 -儲 -儷 -儺 -儻 -儼 -兀 -允 -元 -兄 -充 -兆 -先 -光 -克 -兌 -免 -児 -兒 -兔 -兕 -兗 -兜 -入 -內 -全 -兩 -兪 -八 -公 -六 -兮 -共 -兵 -其 -具 -典 -兼 -兿 -冀 -冂 -円 -冇 -冉 -冊 -再 -冏 -冑 -冒 -冕 -冖 -冗 -冚 -冠 -冢 -冤 -冥 -冧 -冨 -冪 -冫 -冬 -冮 -冰 -冴 -冶 -冷 -冼 -冽 -凃 -凄 -准 -凈 -凋 -凌 -凍 -凖 -凜 -凝 -凞 -几 -凡 -処 -凪 -凬 -凰 -凱 -凳 -凵 -凶 -凸 -凹 -出 -函 -刀 -刁 -刂 -刃 -刄 -分 -切 -刈 -刊 -刎 -刑 -划 -列 -初 -判 -別 -刦 -刧 -刨 -利 -刪 -刮 -到 -制 -刷 -券 -刺 -刻 -刼 -剁 -剃 -則 -削 -剋 -剌 -前 -剎 -剏 -剔 -剖 -剛 -剝 -剡 -剣 -剩 -剪 -剮 -副 -割 -創 -剿 -劃 -劄 -劇 -劈 -劉 -劊 -劌 -劍 -劑 -劔 -力 -功 -加 -劣 -助 -努 -劫 -劬 -劭 -劵 -効 -劼 -劾 -勁 -勃 -勅 -勇 -勉 -勐 -勑 -勒 -勔 -動 -勖 -勗 -勘 -務 -勛 -勝 -勞 -募 -勢 -勣 -勤 -勦 -勰 -勱 -勲 -勳 -勵 -勷 -勸 -勺 -勻 -勾 -勿 -匂 -匄 -包 -匆 -匈 -匋 -匍 -匏 -匐 -匕 -化 -北 -匙 -匚 -匝 -匠 -匡 -匣 -匪 -匯 -匱 -匸 -匹 -匾 -匿 -區 -十 -千 -卅 -升 -午 -卉 -半 -卋 -卍 -卐 -卑 -卒 -卓 -協 -南 -博 -卜 -卞 -卟 -占 -卡 -卣 -卦 -卧 -卩 -卬 -卮 -卯 -印 -危 -卲 -即 -卵 -卷 -卸 -卹 -卺 -卻 -卽 -卿 -厄 -厓 -厔 -厙 -厚 -厝 -原 -厥 -厭 -厰 -厲 -厴 -厶 -去 -參 -叄 -又 -叉 -及 -友 -反 -収 -叔 -叕 -取 -受 -叛 -叟 -叡 -叢 -口 -古 -句 -另 -叨 -叩 -只 -叫 -召 -叭 -叮 -可 -台 -叱 -史 -右 -叵 -司 -叻 -叼 -吁 -吃 -各 -吆 -合 -吉 -吊 -吋 -同 -名 -后 -吏 -吐 -向 -吒 -吔 -吖 -君 -吝 -吞 -吟 -吠 -吡 -吥 -否 -吧 -吩 -含 -吮 -吱 -吲 -吳 -吵 -吶 -吸 -吹 -吻 -吼 -吾 -呀 -呂 -呃 -呈 -呉 -告 -呋 -呎 -呢 -呤 -呦 -周 -呱 -味 -呵 -呷 -呸 -呼 -命 -呾 -咀 -咁 -咂 -咄 -咅 -咆 -咋 -和 -咎 -咑 -咒 -咔 -咕 -咖 -咗 -咘 -咚 -咟 -咤 -咥 -咧 -咨 -咩 -咪 -咫 -咬 -咭 -咯 -咱 -咲 -咳 -咸 -咻 -咼 -咽 -咾 -咿 -哀 -品 -哂 -哄 -哆 -哇 -哈 -哉 -哌 -哎 -哏 -哐 -哖 -哚 -哞 -員 -哥 -哦 -哨 -哩 -哪 -哭 -哮 -哱 -哲 -哺 -哼 -唃 -唄 -唆 -唇 -唉 -唏 -唐 -唑 -唔 -唘 -唧 -唫 -唬 -唭 -售 -唯 -唱 -唳 -唵 -唷 -唸 -唻 -唾 -啁 -啃 -啄 -商 -啉 -啊 -啍 -問 -啓 -啖 -啚 -啜 -啞 -啟 -啡 -啣 -啤 -啥 -啦 -啪 -啫 -啯 -啰 -啱 -啲 -啵 -啶 -啷 -啻 -啼 -啾 -喀 -喂 -喃 -善 -喆 -喇 -喈 -喉 -喊 -喋 -喏 -喔 -喘 -喙 -喚 -喜 -喝 -喢 -喦 -喧 -喪 -喫 -喬 -單 -喰 -喱 -喲 -喳 -喵 -喹 -喻 -喼 -嗄 -嗅 -嗆 -嗇 -嗊 -嗎 -嗑 -嗒 -嗓 -嗔 -嗖 -嗚 -嗜 -嗝 -嗞 -嗡 -嗢 -嗣 -嗦 -嗨 -嗩 -嗪 -嗮 -嗯 -嗲 -嗶 -嗹 -嗽 -嘀 -嘅 -嘆 -嘉 -嘌 -嘍 -嘎 -嘏 -嘔 -嘗 -嘚 -嘛 -嘜 -嘞 -嘟 -嘢 -嘣 -嘥 -嘧 -嘩 -嘬 -嘮 -嘯 -嘰 -嘲 -嘴 -嘶 -嘸 -嘹 -嘻 -嘿 -噁 -噌 -噍 -噏 -噓 -噗 -噝 -噠 -噢 -噤 -噥 -噦 -器 -噩 -噪 -噬 -噯 -噰 -噲 -噴 -噶 -噸 -噹 -噻 -嚇 -嚈 -嚎 -嚏 -嚐 -嚒 -嚓 -嚕 -嚗 -嚙 -嚞 -嚟 -嚤 -嚦 -嚧 -嚨 -嚩 -嚮 -嚳 -嚴 -嚶 -嚷 -嚼 -嚿 -囀 -囂 -囃 -囉 -囊 -囍 -囑 -囒 -囓 -囗 -囚 -四 -囝 -回 -因 -囡 -団 -囤 -囧 -囪 -囮 -囯 -困 -囲 -図 -囶 -囷 -囹 -固 -囿 -圂 -圃 -圄 -圈 -圉 -國 -圍 -圏 -園 -圓 -圖 -圗 -團 -圜 -土 -圧 -在 -圩 -圪 -圭 -圯 -地 -圳 -圻 -圾 -址 -均 -坊 -坋 -坌 -坍 -坎 -坐 -坑 -坖 -坡 -坣 -坤 -坦 -坨 -坩 -坪 -坫 -坬 -坭 -坮 -坯 -坳 -坵 -坶 -坷 -坻 -垂 -垃 -垈 -型 -垍 -垓 -垕 -垚 -垛 -垞 -垟 -垠 -垢 -垣 -垮 -垯 -垰 -垵 -垸 -垻 -垿 -埃 -埅 -埇 -埈 -埋 -埌 -城 -埏 -埒 -埔 -埕 -埗 -埜 -域 -埠 -埡 -埤 -埧 -埨 -埪 -埭 -埮 -埴 -埵 -執 -培 -基 -埻 -埼 -堀 -堂 -堃 -堅 -堆 -堇 -堈 -堉 -堊 -堍 -堖 -堝 -堡 -堤 -堦 -堪 -堮 -堯 -堰 -報 -場 -堵 -堷 -堺 -塀 -塅 -塆 -塊 -塋 -塌 -塍 -塏 -塑 -塔 -塗 -塘 -塙 -塜 -塞 -塡 -塢 -塤 -塨 -塩 -填 -塬 -塭 -塰 -塱 -塲 -塵 -塹 -塽 -塾 -墀 -境 -墅 -墉 -墊 -墎 -墓 -増 -墘 -墜 -增 -墟 -墡 -墣 -墨 -墩 -墫 -墬 -墮 -墱 -墳 -墺 -墼 -墾 -壁 -壄 -壆 -壇 -壋 -壌 -壎 -壐 -壑 -壓 -壔 -壕 -壘 -壙 -壞 -壟 -壠 -壢 -壤 -壩 -士 -壬 -壯 -壱 -壴 -壹 -壺 -壽 -夀 -夆 -変 -夊 -夋 -夌 -夏 -夔 -夕 -外 -夙 -多 -夜 -夠 -夢 -夤 -夥 -大 -天 -太 -夫 -夬 -夭 -央 -夯 -失 -夷 -夾 -奀 -奄 -奇 -奈 -奉 -奎 -奏 -奐 -契 -奓 -奔 -奕 -套 -奘 -奚 -奠 -奢 -奣 -奧 -奩 -奪 -奫 -奭 -奮 -女 -奴 -奶 -她 -好 -妀 -妁 -如 -妃 -妄 -妊 -妍 -妏 -妑 -妒 -妓 -妖 -妙 -妝 -妞 -妠 -妤 -妥 -妧 -妨 -妭 -妮 -妯 -妲 -妳 -妸 -妹 -妺 -妻 -妾 -姀 -姁 -姃 -姆 -姈 -姉 -姊 -始 -姌 -姍 -姐 -姑 -姒 -姓 -委 -姚 -姜 -姝 -姣 -姥 -姦 -姨 -姪 -姫 -姬 -姮 -姵 -姶 -姸 -姻 -姿 -威 -娃 -娉 -娋 -娌 -娍 -娎 -娑 -娖 -娘 -娛 -娜 -娟 -娠 -娣 -娥 -娩 -娫 -娳 -娶 -娸 -娼 -娽 -婀 -婁 -婆 -婉 -婊 -婑 -婕 -婚 -婢 -婦 -婧 -婪 -婭 -婯 -婷 -婺 -婻 -婼 -婿 -媃 -媄 -媊 -媐 -媒 -媓 -媖 -媗 -媚 -媛 -媜 -媞 -媧 -媭 -媯 -媲 -媳 -媺 -媼 -媽 -媾 -媿 -嫁 -嫂 -嫄 -嫈 -嫉 -嫌 -嫖 -嫘 -嫚 -嫡 -嫣 -嫦 -嫩 -嫪 -嫲 -嫳 -嫵 -嫺 -嫻 -嬅 -嬈 -嬉 -嬋 -嬌 -嬗 -嬛 -嬝 -嬡 -嬤 -嬨 -嬪 -嬬 -嬭 -嬰 -嬴 -嬸 -嬾 -嬿 -孀 -孃 -孆 -孋 -孌 -子 -孑 -孔 -孕 -孖 -字 -存 -孚 -孛 -孜 -孝 -孟 -孢 -季 -孤 -孩 -孫 -孬 -孮 -孰 -孳 -孵 -學 -孺 -孻 -孽 -孿 -宀 -它 -宅 -宇 -守 -安 -宋 -完 -宍 -宏 -宓 -宕 -宗 -官 -宙 -定 -宛 -宜 -実 -客 -宣 -室 -宥 -宦 -宧 -宮 -宰 -害 -宴 -宵 -家 -宸 -容 -宿 -寀 -寁 -寂 -寄 -寅 -密 -寇 -寈 -寊 -富 -寐 -寒 -寓 -寔 -寕 -寖 -寗 -寘 -寛 -寜 -寞 -察 -寡 -寢 -寤 -寥 -實 -寧 -寨 -審 -寫 -寬 -寮 -寯 -寰 -寳 -寵 -寶 -寸 -寺 -対 -封 -専 -尃 -射 -將 -專 -尉 -尊 -尋 -對 -導 -小 -尐 -少 -尓 -尕 -尖 -尗 -尙 -尚 -尢 -尤 -尨 -尪 -尬 -就 -尷 -尹 -尺 -尻 -尼 -尾 -尿 -局 -屁 -屄 -居 -屆 -屇 -屈 -屋 -屌 -屍 -屎 -屏 -屐 -屑 -屓 -展 -屚 -屜 -屠 -屢 -層 -履 -屬 -屭 -屯 -山 -屹 -屺 -屻 -岀 -岈 -岌 -岐 -岑 -岔 -岡 -岢 -岣 -岧 -岩 -岪 -岫 -岬 -岰 -岱 -岳 -岵 -岷 -岸 -岻 -峁 -峅 -峇 -峋 -峍 -峒 -峘 -峙 -峚 -峠 -峨 -峩 -峪 -峭 -峯 -峰 -峴 -島 -峻 -峼 -峽 -崁 -崆 -崇 -崈 -崋 -崍 -崎 -崐 -崑 -崒 -崔 -崖 -崗 -崘 -崙 -崚 -崛 -崞 -崟 -崠 -崢 -崤 -崧 -崩 -崬 -崮 -崱 -崴 -崵 -崶 -崽 -嵇 -嵊 -嵋 -嵌 -嵎 -嵐 -嵒 -嵕 -嵖 -嵗 -嵙 -嵛 -嵜 -嵨 -嵩 -嵬 -嵮 -嵯 -嵰 -嵴 -嵻 -嵿 -嶁 -嶂 -嶃 -嶄 -嶇 -嶋 -嶌 -嶍 -嶒 -嶔 -嶗 -嶝 -嶠 -嶢 -嶦 -嶧 -嶪 -嶬 -嶰 -嶲 -嶴 -嶷 -嶸 -嶺 -嶼 -嶽 -巂 -巄 -巆 -巋 -巌 -巍 -巎 -巑 -巒 -巔 -巖 -巘 -巛 -川 -州 -巡 -巢 -工 -左 -巧 -巨 -巫 -差 -巰 -己 -已 -巳 -巴 -巶 -巷 -巻 -巽 -巾 -巿 -市 -布 -帆 -希 -帑 -帔 -帕 -帖 -帘 -帙 -帚 -帛 -帝 -帡 -帢 -帥 -師 -席 -帯 -帰 -帳 -帶 -帷 -常 -帽 -幀 -幃 -幄 -幅 -幌 -幔 -幕 -幗 -幚 -幛 -幟 -幡 -幢 -幣 -幪 -幫 -干 -平 -年 -幵 -幷 -幸 -幹 -幺 -幻 -幼 -幽 -幾 -庀 -庁 -広 -庇 -床 -序 -底 -庖 -店 -庚 -府 -庠 -庢 -庥 -度 -座 -庫 -庭 -庲 -庵 -庶 -康 -庸 -庹 -庼 -庾 -廁 -廂 -廄 -廆 -廈 -廉 -廊 -廋 -廌 -廍 -廑 -廓 -廔 -廕 -廖 -廙 -廚 -廝 -廞 -廟 -廠 -廡 -廢 -廣 -廧 -廨 -廩 -廬 -廰 -廱 -廳 -延 -廷 -廸 -建 -廻 -廼 -廿 -弁 -弄 -弅 -弇 -弈 -弉 -弊 -弋 -弍 -式 -弐 -弒 -弓 -弔 -引 -弖 -弗 -弘 -弛 -弟 -弢 -弦 -弧 -弨 -弩 -弭 -弱 -張 -強 -弸 -弼 -弾 -彀 -彄 -彅 -彆 -彈 -彊 -彌 -彎 -彐 -彔 -彖 -彗 -彘 -彙 -彜 -彞 -彠 -彡 -形 -彣 -彤 -彥 -彧 -彩 -彪 -彫 -彬 -彭 -彰 -影 -彳 -彷 -役 -彼 -彿 -往 -征 -徂 -待 -徇 -很 -徉 -徊 -律 -後 -徐 -徑 -徒 -得 -徘 -徙 -徜 -從 -徠 -御 -徧 -徨 -復 -循 -徫 -徬 -徭 -微 -徳 -徴 -徵 -德 -徸 -徹 -徽 -心 -忄 -必 -忉 -忌 -忍 -忐 -忑 -忒 -志 -忘 -忙 -応 -忝 -忞 -忠 -快 -忬 -忯 -忱 -忳 -念 -忻 -忽 -忿 -怍 -怎 -怒 -怕 -怖 -怙 -怛 -思 -怠 -怡 -急 -怦 -性 -怨 -怪 -怯 -怵 -恁 -恂 -恃 -恆 -恊 -恍 -恐 -恕 -恙 -恢 -恣 -恤 -恥 -恨 -恩 -恪 -恬 -恭 -息 -恰 -恵 -恿 -悄 -悅 -悆 -悉 -悌 -悍 -悔 -悖 -悚 -悛 -悝 -悞 -悟 -悠 -患 -悧 -您 -悪 -悰 -悲 -悳 -悵 -悶 -悸 -悼 -情 -惆 -惇 -惑 -惔 -惕 -惘 -惚 -惜 -惟 -惠 -惡 -惣 -惦 -惰 -惱 -惲 -想 -惶 -惹 -惺 -愁 -愃 -愆 -愈 -愉 -愍 -意 -愐 -愒 -愔 -愕 -愚 -愛 -愜 -感 -愣 -愧 -愨 -愫 -愭 -愴 -愷 -愼 -愾 -愿 -慄 -慈 -態 -慌 -慎 -慕 -慘 -慚 -慜 -慟 -慢 -慣 -慥 -慧 -慨 -慮 -慰 -慳 -慵 -慶 -慷 -慾 -憂 -憊 -憋 -憍 -憎 -憐 -憑 -憓 -憕 -憙 -憚 -憤 -憧 -憨 -憩 -憫 -憬 -憲 -憶 -憺 -憻 -憾 -懂 -懃 -懇 -懈 -應 -懋 -懌 -懍 -懐 -懣 -懦 -懮 -懲 -懵 -懶 -懷 -懸 -懺 -懼 -懽 -懾 -懿 -戀 -戇 -戈 -戊 -戌 -戍 -戎 -成 -我 -戒 -戔 -戕 -或 -戙 -戚 -戛 -戟 -戡 -戢 -戥 -戦 -戩 -截 -戮 -戰 -戱 -戲 -戳 -戴 -戶 -戸 -戻 -戽 -戾 -房 -所 -扁 -扆 -扇 -扈 -扉 -手 -扌 -才 -扎 -扒 -打 -扔 -托 -扙 -扛 -扞 -扣 -扥 -扦 -扭 -扮 -扯 -扳 -扶 -批 -扼 -找 -承 -技 -抃 -抄 -抇 -抉 -把 -抑 -抒 -抓 -投 -抖 -抗 -折 -抦 -披 -抬 -抱 -抵 -抹 -抻 -押 -抽 -抿 -拂 -拆 -拇 -拈 -拉 -拋 -拌 -拍 -拎 -拏 -拐 -拒 -拓 -拔 -拖 -拗 -拘 -拙 -拚 -招 -拜 -拝 -拡 -括 -拭 -拮 -拯 -拱 -拳 -拴 -拷 -拺 -拼 -拽 -拾 -拿 -持 -指 -按 -挎 -挑 -挖 -挙 -挨 -挪 -挫 -振 -挲 -挵 -挹 -挺 -挻 -挾 -捂 -捆 -捉 -捌 -捍 -捎 -捏 -捐 -捒 -捕 -捜 -捦 -捧 -捨 -捩 -捫 -捭 -捱 -捲 -捶 -捷 -捺 -捻 -掀 -掂 -掃 -掄 -掇 -授 -掉 -掌 -掏 -掐 -排 -掖 -掘 -掙 -掛 -掞 -掟 -掠 -採 -探 -掣 -接 -控 -推 -掩 -措 -掬 -掰 -掾 -揀 -揄 -揆 -揉 -揍 -描 -提 -插 -揔 -揖 -揚 -換 -握 -揪 -揭 -揮 -援 -揸 -揺 -損 -搏 -搐 -搓 -搔 -搖 -搗 -搜 -搞 -搠 -搢 -搪 -搬 -搭 -搳 -搴 -搵 -搶 -搽 -搾 -摂 -摒 -摔 -摘 -摜 -摞 -摟 -摠 -摧 -摩 -摭 -摯 -摳 -摴 -摵 -摶 -摸 -摹 -摺 -摻 -摽 -撃 -撇 -撈 -撐 -撒 -撓 -撕 -撖 -撙 -撚 -撞 -撣 -撤 -撥 -撩 -撫 -撬 -播 -撮 -撰 -撲 -撳 -撻 -撼 -撾 -撿 -擀 -擁 -擂 -擅 -擇 -擊 -擋 -操 -擎 -擒 -擔 -擘 -據 -擠 -擢 -擥 -擦 -擬 -擯 -擰 -擱 -擲 -擴 -擷 -擺 -擼 -擾 -攀 -攏 -攔 -攖 -攘 -攜 -攝 -攞 -攢 -攣 -攤 -攪 -攫 -攬 -支 -攴 -攵 -收 -攷 -攸 -改 -攻 -攽 -放 -政 -故 -效 -敍 -敎 -敏 -救 -敔 -敕 -敖 -敗 -敘 -教 -敝 -敞 -敟 -敢 -散 -敦 -敫 -敬 -敭 -敲 -整 -敵 -敷 -數 -敻 -敾 -斂 -斃 -文 -斌 -斎 -斐 -斑 -斕 -斖 -斗 -料 -斛 -斜 -斝 -斟 -斡 -斤 -斥 -斧 -斬 -斯 -新 -斷 -方 -於 -施 -斿 -旁 -旂 -旃 -旄 -旅 -旉 -旋 -旌 -旎 -族 -旖 -旗 -旙 -旛 -旡 -既 -日 -旦 -旨 -早 -旬 -旭 -旱 -旲 -旳 -旺 -旻 -旼 -旽 -旾 -旿 -昀 -昂 -昃 -昆 -昇 -昉 -昊 -昌 -昍 -明 -昏 -昐 -易 -昔 -昕 -昚 -昛 -昜 -昝 -昞 -星 -映 -昡 -昣 -昤 -春 -昧 -昨 -昪 -昫 -昭 -是 -昰 -昱 -昴 -昵 -昶 -昺 -晁 -時 -晃 -晈 -晉 -晊 -晏 -晗 -晙 -晚 -晛 -晝 -晞 -晟 -晤 -晦 -晧 -晨 -晩 -晪 -晫 -晭 -普 -景 -晰 -晳 -晴 -晶 -晷 -晸 -智 -晾 -暃 -暄 -暅 -暇 -暈 -暉 -暊 -暌 -暎 -暏 -暐 -暑 -暕 -暖 -暗 -暘 -暝 -暟 -暠 -暢 -暦 -暨 -暫 -暮 -暱 -暲 -暴 -暸 -暹 -暻 -暾 -曄 -曅 -曆 -曇 -曉 -曌 -曔 -曖 -曙 -曜 -曝 -曠 -曦 -曧 -曨 -曩 -曬 -曮 -曰 -曲 -曳 -更 -曶 -曷 -書 -曹 -曺 -曼 -曽 -曾 -替 -最 -會 -月 -有 -朊 -朋 -服 -朏 -朐 -朓 -朔 -朕 -朖 -朗 -望 -朝 -期 -朦 -朧 -木 -未 -末 -本 -札 -朱 -朴 -朵 -朶 -朽 -朿 -杁 -杉 -杋 -杌 -李 -杏 -材 -村 -杓 -杖 -杙 -杜 -杞 -束 -杠 -杣 -杤 -杧 -杬 -杭 -杯 -東 -杲 -杳 -杴 -杵 -杷 -杻 -杼 -松 -板 -极 -枇 -枉 -枋 -枏 -析 -枕 -枖 -林 -枚 -枛 -果 -枝 -枠 -枡 -枯 -枰 -枱 -枲 -枳 -架 -枷 -枸 -枹 -枼 -柁 -柃 -柄 -柉 -柊 -柎 -柏 -某 -柑 -柒 -染 -柔 -柘 -柚 -柜 -柝 -柞 -柟 -查 -柩 -柬 -柯 -柰 -柱 -柳 -柴 -柵 -柶 -柷 -査 -柾 -柿 -栃 -栄 -栐 -栒 -栓 -栜 -栝 -栞 -校 -栢 -栨 -栩 -株 -栲 -栴 -核 -根 -栻 -格 -栽 -桀 -桁 -桂 -桃 -桄 -桅 -框 -案 -桉 -桌 -桎 -桐 -桑 -桓 -桔 -桕 -桖 -桙 -桜 -桝 -桫 -桱 -桲 -桴 -桶 -桷 -桼 -桿 -梀 -梁 -梂 -梃 -梅 -梆 -梉 -梏 -梓 -梔 -梗 -梘 -條 -梟 -梠 -梢 -梣 -梧 -梨 -梫 -梭 -梯 -械 -梱 -梳 -梵 -梶 -梽 -棄 -棆 -棉 -棋 -棍 -棐 -棒 -棓 -棕 -棖 -棗 -棘 -棚 -棛 -棟 -棠 -棡 -棣 -棧 -棨 -棩 -棪 -棫 -森 -棱 -棲 -棵 -棶 -棹 -棺 -棻 -棼 -棽 -椅 -椆 -椇 -椋 -植 -椎 -椏 -椒 -椙 -椥 -椪 -椰 -椲 -椴 -椵 -椹 -椽 -椿 -楂 -楊 -楓 -楔 -楗 -楙 -楚 -楝 -楞 -楠 -楡 -楢 -楣 -楤 -楦 -楧 -楨 -楫 -業 -楮 -楯 -楳 -極 -楷 -楸 -楹 -楽 -楿 -概 -榆 -榊 -榍 -榎 -榑 -榔 -榕 -榖 -榗 -榘 -榛 -榜 -榞 -榢 -榣 -榤 -榦 -榧 -榨 -榫 -榭 -榮 -榲 -榴 -榷 -榻 -榿 -槀 -槁 -槃 -槊 -構 -槌 -槍 -槎 -槐 -槓 -槔 -槗 -様 -槙 -槤 -槩 -槭 -槰 -槱 -槲 -槳 -槺 -槻 -槼 -槽 -槿 -樀 -樁 -樂 -樅 -樆 -樊 -樋 -樑 -樓 -樗 -樘 -標 -樞 -樟 -模 -樣 -樨 -権 -樫 -樵 -樸 -樹 -樺 -樻 -樽 -樾 -橄 -橇 -橈 -橋 -橐 -橒 -橓 -橘 -橙 -橚 -機 -橡 -橢 -橪 -橫 -橿 -檀 -檄 -檇 -檉 -檊 -檎 -檐 -檔 -檗 -檜 -檞 -檠 -檡 -檢 -檣 -檦 -檨 -檫 -檬 -檯 -檳 -檵 -檸 -檻 -檽 -櫂 -櫃 -櫆 -櫈 -櫓 -櫚 -櫛 -櫞 -櫟 -櫥 -櫨 -櫪 -櫱 -櫸 -櫻 -櫾 -櫿 -欄 -欉 -權 -欏 -欒 -欖 -欞 -欠 -次 -欣 -欥 -欲 -欸 -欹 -欺 -欽 -款 -歆 -歇 -歉 -歊 -歌 -歎 -歐 -歓 -歙 -歛 -歡 -止 -正 -此 -步 -武 -歧 -歩 -歪 -歲 -歳 -歴 -歷 -歸 -歹 -死 -歿 -殂 -殃 -殄 -殆 -殉 -殊 -殑 -殖 -殘 -殛 -殞 -殟 -殤 -殭 -殮 -殯 -殲 -殳 -段 -殷 -殺 -殻 -殼 -殿 -毀 -毅 -毆 -毉 -毋 -毌 -母 -毎 -每 -毐 -毒 -毓 -比 -毖 -毗 -毘 -毛 -毫 -毬 -毯 -毴 -毸 -毽 -毿 -氂 -氈 -氍 -氏 -氐 -民 -氓 -氖 -気 -氘 -氙 -氚 -氛 -氟 -氣 -氦 -氧 -氨 -氪 -氫 -氬 -氮 -氯 -氰 -水 -氵 -氷 -永 -氹 -氻 -氽 -氾 -汀 -汁 -求 -汊 -汎 -汐 -汕 -汗 -汛 -汜 -汝 -汞 -江 -池 -污 -汧 -汨 -汩 -汪 -汭 -汰 -汲 -汴 -汶 -決 -汽 -汾 -沁 -沂 -沃 -沄 -沅 -沆 -沇 -沈 -沉 -沌 -沍 -沏 -沐 -沒 -沓 -沔 -沖 -沘 -沙 -沚 -沛 -沜 -沢 -沨 -沫 -沭 -沮 -沯 -沱 -河 -沸 -油 -沺 -治 -沼 -沽 -沾 -沿 -況 -泂 -泄 -泆 -泇 -泉 -泊 -泌 -泐 -泓 -泔 -法 -泖 -泗 -泚 -泛 -泠 -泡 -波 -泣 -泥 -泩 -泫 -泮 -泯 -泰 -泱 -泳 -泵 -洄 -洋 -洌 -洎 -洗 -洙 -洛 -洞 -洢 -洣 -洤 -津 -洨 -洩 -洪 -洮 -洱 -洲 -洳 -洵 -洸 -洹 -洺 -活 -洽 -派 -流 -浄 -浙 -浚 -浛 -浜 -浞 -浟 -浠 -浡 -浣 -浤 -浥 -浦 -浩 -浪 -浮 -浯 -浴 -浵 -海 -浸 -浹 -涅 -涇 -消 -涉 -涌 -涎 -涑 -涓 -涔 -涕 -涙 -涪 -涫 -涮 -涯 -液 -涵 -涸 -涼 -涿 -淄 -淅 -淆 -淇 -淋 -淌 -淍 -淎 -淏 -淑 -淓 -淖 -淘 -淙 -淚 -淛 -淝 -淞 -淠 -淡 -淤 -淥 -淦 -淨 -淩 -淪 -淫 -淬 -淮 -淯 -淰 -深 -淳 -淵 -淶 -混 -淸 -淹 -淺 -添 -淼 -淽 -渃 -清 -済 -渉 -渋 -渕 -渙 -渚 -減 -渝 -渟 -渠 -渡 -渣 -渤 -渥 -渦 -渫 -測 -渭 -港 -渲 -渴 -游 -渺 -渼 -渽 -渾 -湃 -湄 -湉 -湊 -湍 -湓 -湔 -湖 -湘 -湛 -湜 -湞 -湟 -湣 -湥 -湧 -湫 -湮 -湯 -湳 -湴 -湼 -満 -溁 -溇 -溈 -溉 -溋 -溎 -溏 -源 -準 -溙 -溜 -溝 -溟 -溢 -溥 -溦 -溧 -溪 -溫 -溯 -溱 -溲 -溴 -溵 -溶 -溺 -溼 -滀 -滁 -滂 -滄 -滅 -滇 -滈 -滉 -滋 -滌 -滎 -滏 -滑 -滓 -滔 -滕 -滘 -滙 -滝 -滬 -滯 -滲 -滴 -滷 -滸 -滹 -滻 -滽 -滾 -滿 -漁 -漂 -漆 -漇 -漈 -漎 -漏 -漓 -演 -漕 -漚 -漠 -漢 -漣 -漩 -漪 -漫 -漬 -漯 -漱 -漲 -漳 -漴 -漵 -漷 -漸 -漼 -漾 -漿 -潁 -潑 -潔 -潘 -潛 -潞 -潟 -潢 -潤 -潭 -潮 -潯 -潰 -潲 -潺 -潼 -潽 -潾 -潿 -澀 -澁 -澂 -澄 -澆 -澇 -澈 -澉 -澋 -澌 -澍 -澎 -澔 -澗 -澠 -澡 -澣 -澤 -澥 -澧 -澪 -澮 -澯 -澱 -澳 -澶 -澹 -澻 -激 -濁 -濂 -濃 -濉 -濊 -濋 -濕 -濘 -濙 -濛 -濞 -濟 -濠 -濡 -濤 -濫 -濬 -濮 -濯 -濰 -濱 -濲 -濶 -濺 -濼 -濾 -瀁 -瀅 -瀆 -瀉 -瀍 -瀏 -瀑 -瀔 -瀕 -瀘 -瀚 -瀛 -瀝 -瀞 -瀟 -瀠 -瀣 -瀦 -瀧 -瀨 -瀬 -瀰 -瀲 -瀴 -瀶 -瀹 -瀾 -灃 -灊 -灌 -灑 -灘 -灝 -灞 -灡 -灣 -灤 -灧 -火 -灰 -灴 -灸 -灼 -災 -炁 -炅 -炆 -炊 -炎 -炒 -炔 -炕 -炘 -炙 -炟 -炣 -炤 -炫 -炬 -炭 -炮 -炯 -炱 -炲 -炳 -炷 -炸 -為 -炻 -烈 -烉 -烊 -烋 -烏 -烒 -烔 -烘 -烙 -烜 -烝 -烤 -烯 -烱 -烴 -烷 -烹 -烺 -烽 -焃 -焄 -焉 -焊 -焌 -焓 -焗 -焙 -焚 -焜 -焞 -無 -焦 -焯 -焰 -焱 -焴 -然 -焻 -焼 -焿 -煇 -煉 -煊 -煌 -煎 -煐 -煒 -煔 -煕 -煖 -煙 -煚 -煜 -煞 -煠 -煤 -煥 -煦 -照 -煨 -煩 -煬 -煮 -煲 -煳 -煵 -煶 -煸 -煽 -熄 -熅 -熇 -熈 -熊 -熏 -熒 -熔 -熖 -熗 -熘 -熙 -熜 -熟 -熠 -熤 -熥 -熨 -熬 -熯 -熱 -熲 -熳 -熵 -熹 -熺 -熼 -熾 -熿 -燁 -燃 -燄 -燈 -燉 -燊 -燎 -燏 -燐 -燒 -燔 -燕 -燘 -燙 -燚 -燜 -燝 -營 -燥 -燦 -燧 -燫 -燬 -燭 -燮 -燴 -燹 -燻 -燼 -燾 -燿 -爀 -爆 -爌 -爍 -爐 -爔 -爚 -爛 -爝 -爨 -爪 -爬 -爭 -爯 -爰 -爲 -爵 -父 -爸 -爹 -爺 -爻 -爽 -爾 -爿 -牁 -牂 -牆 -片 -版 -牌 -牒 -牕 -牖 -牘 -牙 -牛 -牝 -牟 -牠 -牡 -牢 -牧 -物 -牯 -牲 -特 -牻 -牼 -牽 -犀 -犁 -犂 -犇 -犍 -犎 -犖 -犛 -犢 -犧 -犨 -犬 -犯 -犰 -犴 -犽 -狀 -狂 -狄 -狍 -狎 -狐 -狒 -狓 -狗 -狙 -狛 -狟 -狠 -狡 -狦 -狨 -狩 -狳 -狶 -狷 -狸 -狹 -狻 -狼 -猁 -猄 -猇 -猊 -猗 -猙 -猛 -猜 -猝 -猞 -猢 -猥 -猨 -猩 -猳 -猴 -猶 -猷 -猺 -猻 -猾 -猿 -獁 -獃 -獄 -獅 -獇 -獎 -獏 -獐 -獒 -獠 -獢 -獣 -獨 -獬 -獮 -獯 -獰 -獲 -獴 -獵 -獷 -獸 -獺 -獻 -獼 -獾 -玀 -玄 -玆 -率 -玉 -王 -玎 -玏 -玓 -玕 -玖 -玗 -玘 -玙 -玟 -玠 -玡 -玢 -玥 -玧 -玨 -玩 -玫 -玭 -玲 -玳 -玶 -玷 -玹 -玻 -玾 -珀 -珂 -珅 -珈 -珉 -珊 -珌 -珍 -珎 -珏 -珖 -珙 -珝 -珞 -珠 -珡 -珣 -珤 -珥 -珦 -珧 -珩 -珪 -班 -珮 -珵 -珹 -珺 -珽 -現 -琁 -球 -琄 -琅 -理 -琇 -琉 -琊 -琍 -琎 -琚 -琛 -琡 -琢 -琤 -琥 -琦 -琨 -琪 -琬 -琮 -琯 -琰 -琱 -琳 -琴 -琵 -琶 -琹 -琺 -琿 -瑀 -瑁 -瑂 -瑄 -瑅 -瑆 -瑈 -瑊 -瑋 -瑑 -瑒 -瑕 -瑗 -瑙 -瑚 -瑛 -瑜 -瑝 -瑞 -瑟 -瑠 -瑢 -瑣 -瑤 -瑥 -瑧 -瑨 -瑩 -瑪 -瑭 -瑯 -瑰 -瑱 -瑳 -瑴 -瑺 -瑾 -璀 -璁 -璃 -璄 -璆 -璇 -璈 -璉 -璋 -璌 -璐 -璕 -璘 -璙 -璚 -璜 -璞 -璟 -璠 -璡 -璣 -璥 -璦 -璧 -璨 -璩 -璪 -璫 -璬 -璮 -環 -璱 -璵 -璸 -璹 -璽 -璿 -瓈 -瓊 -瓌 -瓏 -瓑 -瓔 -瓖 -瓘 -瓚 -瓛 -瓜 -瓞 -瓠 -瓢 -瓣 -瓤 -瓦 -瓮 -瓴 -瓶 -瓷 -瓿 -甂 -甄 -甌 -甍 -甑 -甕 -甘 -甙 -甚 -甜 -生 -甡 -產 -産 -甥 -甦 -用 -甩 -甪 -甫 -甬 -甯 -田 -由 -甲 -申 -男 -甸 -甹 -町 -甾 -畀 -畇 -畈 -畊 -畋 -界 -畎 -畏 -畐 -畑 -畔 -留 -畜 -畝 -畠 -畢 -略 -畦 -畧 -番 -畫 -畬 -畯 -異 -畲 -畳 -畵 -當 -畷 -畸 -畹 -畿 -疃 -疆 -疇 -疊 -疋 -疌 -疍 -疏 -疑 -疒 -疕 -疙 -疚 -疝 -疣 -疤 -疥 -疫 -疲 -疳 -疵 -疸 -疹 -疼 -疽 -疾 -痂 -病 -症 -痊 -痍 -痔 -痕 -痘 -痙 -痛 -痞 -痟 -痠 -痢 -痣 -痤 -痧 -痩 -痰 -痱 -痲 -痴 -痹 -痺 -痿 -瘀 -瘁 -瘊 -瘋 -瘍 -瘓 -瘙 -瘜 -瘞 -瘟 -瘠 -瘡 -瘢 -瘤 -瘦 -瘧 -瘩 -瘰 -瘴 -瘺 -癀 -療 -癆 -癇 -癌 -癒 -癖 -癘 -癜 -癟 -癡 -癢 -癤 -癥 -癩 -癬 -癭 -癮 -癯 -癰 -癱 -癲 -癸 -発 -登 -發 -白 -百 -皂 -的 -皆 -皇 -皈 -皋 -皎 -皐 -皓 -皖 -皙 -皚 -皛 -皝 -皞 -皮 -皰 -皴 -皷 -皸 -皺 -皿 -盂 -盃 -盅 -盆 -盈 -益 -盋 -盌 -盎 -盒 -盔 -盛 -盜 -盞 -盟 -盡 -監 -盤 -盥 -盦 -盧 -盨 -盩 -盪 -盫 -目 -盯 -盱 -盲 -直 -盷 -相 -盹 -盺 -盼 -盾 -眀 -省 -眉 -看 -県 -眙 -眛 -眜 -眞 -真 -眠 -眥 -眨 -眩 -眭 -眯 -眵 -眶 -眷 -眸 -眺 -眼 -眾 -着 -睇 -睛 -睜 -睞 -睡 -睢 -督 -睥 -睦 -睨 -睪 -睫 -睭 -睹 -睺 -睽 -睾 -睿 -瞄 -瞅 -瞋 -瞌 -瞎 -瞑 -瞓 -瞞 -瞢 -瞥 -瞧 -瞪 -瞫 -瞬 -瞭 -瞰 -瞳 -瞻 -瞼 -瞽 -瞿 -矇 -矍 -矗 -矚 -矛 -矜 -矞 -矢 -矣 -知 -矧 -矩 -短 -矮 -矯 -石 -矸 -矽 -砂 -砋 -砌 -砍 -砒 -研 -砝 -砢 -砥 -砦 -砧 -砩 -砫 -砭 -砮 -砯 -砰 -砲 -砳 -破 -砵 -砷 -砸 -砼 -硂 -硃 -硅 -硇 -硏 -硐 -硒 -硓 -硚 -硜 -硝 -硤 -硨 -硫 -硬 -硭 -硯 -硼 -碁 -碇 -碉 -碌 -碎 -碑 -碓 -碕 -碗 -碘 -碚 -碟 -碡 -碣 -碧 -碩 -碪 -碭 -碰 -碲 -碳 -碴 -碶 -碸 -確 -碻 -碼 -碽 -碾 -磁 -磅 -磊 -磋 -磐 -磔 -磕 -磘 -磙 -磚 -磜 -磡 -磨 -磪 -磬 -磯 -磱 -磲 -磵 -磷 -磺 -磻 -磾 -礁 -礄 -礎 -礐 -礑 -礒 -礙 -礠 -礦 -礪 -礫 -礬 -礮 -礱 -礴 -示 -礻 -礽 -社 -祀 -祁 -祂 -祆 -祇 -祈 -祉 -祋 -祏 -祐 -祓 -祕 -祖 -祗 -祙 -祚 -祛 -祜 -祝 -神 -祟 -祠 -祥 -祧 -票 -祭 -祹 -祺 -祼 -祿 -禁 -禃 -禇 -禍 -禎 -福 -禑 -禓 -禔 -禕 -禘 -禛 -禟 -禠 -禤 -禦 -禧 -禨 -禩 -禪 -禮 -禰 -禱 -禵 -禹 -禺 -禼 -禽 -禾 -禿 -秀 -私 -秈 -秉 -秋 -科 -秒 -秕 -秘 -租 -秠 -秣 -秤 -秦 -秧 -秩 -秭 -秳 -秸 -移 -稀 -稅 -稈 -稉 -程 -稍 -稑 -稔 -稗 -稘 -稙 -稚 -稜 -稞 -稟 -稠 -種 -稱 -稲 -稷 -稹 -稺 -稻 -稼 -稽 -稾 -稿 -穀 -穂 -穆 -穈 -穉 -穌 -積 -穎 -穗 -穟 -穠 -穡 -穢 -穣 -穩 -穫 -穰 -穴 -穵 -究 -穹 -空 -穿 -突 -窄 -窅 -窈 -窋 -窒 -窕 -窖 -窗 -窘 -窟 -窠 -窣 -窨 -窩 -窪 -窮 -窯 -窰 -窶 -窺 -窿 -竄 -竅 -竇 -竈 -竊 -立 -竑 -站 -竜 -竟 -章 -竣 -童 -竦 -竩 -竭 -端 -競 -竹 -竺 -竻 -竿 -笄 -笆 -笈 -笏 -笑 -笘 -笙 -笛 -笞 -笠 -笥 -符 -笨 -笩 -笪 -第 -笭 -笮 -笯 -笱 -笳 -笹 -筅 -筆 -等 -筊 -筋 -筌 -筍 -筏 -筐 -筒 -答 -策 -筘 -筠 -筥 -筦 -筧 -筬 -筭 -筱 -筲 -筳 -筵 -筶 -筷 -筻 -箆 -箇 -箋 -箍 -箏 -箐 -箑 -箒 -箔 -箕 -算 -箜 -管 -箬 -箭 -箱 -箴 -箸 -節 -篁 -範 -篆 -篇 -築 -篊 -篋 -篌 -篔 -篙 -篝 -篠 -篡 -篤 -篥 -篦 -篩 -篪 -篭 -篯 -篳 -篷 -簀 -簃 -簇 -簉 -簋 -簍 -簑 -簕 -簗 -簞 -簠 -簡 -簧 -簪 -簫 -簷 -簸 -簹 -簺 -簽 -簾 -簿 -籀 -籃 -籌 -籍 -籐 -籙 -籛 -籜 -籝 -籟 -籠 -籣 -籤 -籥 -籪 -籬 -籮 -籲 -米 -籽 -籾 -粄 -粉 -粍 -粑 -粒 -粕 -粗 -粘 -粟 -粢 -粥 -粦 -粧 -粩 -粱 -粲 -粳 -粵 -粹 -粼 -粽 -精 -粿 -糀 -糅 -糊 -糌 -糍 -糎 -糕 -糖 -糙 -糜 -糝 -糞 -糟 -糠 -糢 -糧 -糬 -糯 -糰 -糴 -糶 -糸 -糹 -糺 -系 -糾 -紀 -紂 -約 -紅 -紆 -紇 -紈 -紉 -紊 -紋 -納 -紐 -紑 -紓 -純 -紕 -紗 -紘 -紙 -級 -紛 -紜 -紝 -紞 -素 -紡 -索 -紫 -紮 -累 -細 -紱 -紲 -紳 -紵 -紹 -紺 -紿 -終 -絃 -組 -絆 -経 -絎 -結 -絕 -絛 -絜 -絞 -絡 -絢 -給 -絨 -絪 -絮 -統 -絲 -絳 -絵 -絶 -絹 -絺 -綁 -綃 -綈 -綉 -綎 -綏 -經 -綖 -継 -続 -綜 -綝 -綞 -綠 -綢 -綣 -綦 -綧 -綫 -綬 -維 -綮 -綰 -綱 -網 -綳 -綴 -綸 -綺 -綻 -綽 -綾 -綿 -緁 -緃 -緄 -緈 -緊 -緋 -総 -緑 -緒 -緖 -緘 -線 -緜 -緝 -緞 -締 -緡 -緣 -緤 -編 -緩 -緬 -緯 -緱 -緲 -練 -緹 -緻 -縂 -縄 -縈 -縉 -縊 -縕 -縛 -縝 -縞 -縠 -縡 -縣 -縤 -縫 -縮 -縯 -縱 -縴 -縵 -縷 -縹 -縻 -總 -績 -繁 -繃 -繆 -繇 -繒 -織 -繕 -繖 -繙 -繚 -繞 -繡 -繩 -繪 -繫 -繭 -繰 -繳 -繹 -繻 -繼 -繽 -繾 -纁 -纂 -纈 -續 -纍 -纏 -纓 -纔 -纕 -纖 -纘 -纛 -纜 -缐 -缶 -缸 -缺 -缽 -罃 -罄 -罅 -罈 -罉 -罌 -罍 -罐 -罔 -罕 -罘 -罟 -罡 -罨 -罩 -罪 -置 -罰 -罱 -署 -罳 -罵 -罶 -罷 -罹 -罽 -羂 -羅 -羆 -羈 -羊 -羋 -羌 -美 -羔 -羕 -羗 -羙 -羚 -羞 -羡 -羣 -群 -羥 -羧 -羨 -義 -羯 -羰 -羱 -羲 -羸 -羹 -羽 -羿 -翀 -翁 -翂 -翃 -翅 -翊 -翌 -翎 -翏 -習 -翔 -翕 -翙 -翜 -翟 -翠 -翡 -翥 -翦 -翩 -翬 -翮 -翰 -翱 -翳 -翹 -翻 -翼 -耀 -老 -考 -耄 -者 -耆 -而 -耍 -耎 -耐 -耑 -耒 -耔 -耕 -耗 -耘 -耙 -耜 -耦 -耨 -耬 -耳 -耵 -耶 -耷 -耽 -耿 -聃 -聆 -聊 -聒 -聖 -聘 -聚 -聞 -聟 -聨 -聯 -聰 -聱 -聲 -聳 -聴 -聶 -職 -聽 -聾 -聿 -肄 -肅 -肆 -肇 -肉 -肋 -肌 -肏 -肖 -肘 -肚 -肛 -肜 -肝 -肟 -股 -肢 -肥 -肩 -肪 -肫 -肯 -肱 -育 -肸 -肹 -肺 -肼 -肽 -胂 -胃 -胄 -胅 -胇 -胊 -背 -胍 -胎 -胖 -胗 -胙 -胚 -胛 -胝 -胞 -胡 -胤 -胥 -胬 -胭 -胰 -胱 -胳 -胴 -胸 -胺 -胼 -能 -脂 -脅 -脆 -脇 -脈 -脊 -脒 -脖 -脘 -脛 -脣 -脩 -脫 -脬 -脭 -脯 -脲 -脳 -脷 -脹 -脾 -腆 -腈 -腊 -腋 -腌 -腎 -腐 -腑 -腓 -腔 -腕 -腥 -腦 -腧 -腩 -腫 -腮 -腰 -腱 -腳 -腴 -腸 -腹 -腺 -腿 -膀 -膂 -膈 -膊 -膏 -膚 -膛 -膜 -膝 -膠 -膣 -膥 -膦 -膨 -膩 -膮 -膳 -膺 -膽 -膾 -膿 -臀 -臂 -臃 -臆 -臉 -臊 -臍 -臏 -臘 -臚 -臞 -臟 -臠 -臣 -臧 -臨 -自 -臭 -臯 -至 -致 -臺 -臻 -臼 -臾 -舂 -舅 -與 -興 -舉 -舊 -舌 -舍 -舎 -舒 -舔 -舖 -舘 -舛 -舜 -舞 -舟 -舢 -舥 -舨 -舩 -航 -舫 -般 -舲 -舵 -舶 -舷 -舸 -船 -舺 -艅 -艇 -艉 -艋 -艎 -艏 -艔 -艘 -艙 -艚 -艦 -艮 -良 -艱 -色 -艶 -艷 -艸 -艽 -艾 -艿 -芃 -芊 -芋 -芍 -芎 -芑 -芒 -芘 -芙 -芛 -芝 -芡 -芥 -芨 -芩 -芪 -芫 -芬 -芭 -芮 -芯 -花 -芳 -芴 -芷 -芸 -芹 -芻 -芽 -芾 -苄 -苅 -苑 -苒 -苓 -苔 -苕 -苗 -苛 -苜 -苝 -苞 -苟 -苡 -苣 -苤 -若 -苦 -苧 -苪 -苫 -苯 -英 -苳 -苴 -苷 -苺 -苻 -苼 -苾 -茀 -茁 -茂 -范 -茄 -茅 -茆 -茇 -茈 -茉 -茌 -茗 -茘 -茚 -茛 -茜 -茝 -茨 -茫 -茬 -茭 -茮 -茯 -茱 -茲 -茴 -茵 -茶 -茷 -茸 -茹 -茺 -茼 -荀 -荃 -荅 -荇 -草 -荊 -荎 -荏 -荒 -荔 -荖 -荘 -荳 -荷 -荸 -荻 -荼 -荽 -莆 -莉 -莊 -莎 -莒 -莓 -莕 -莖 -莘 -莙 -莛 -莜 -莞 -莠 -莢 -莧 -莨 -莩 -莪 -莫 -莽 -莿 -菀 -菁 -菅 -菇 -菈 -菉 -菊 -菌 -菍 -菏 -菑 -菓 -菔 -菖 -菘 -菜 -菝 -菟 -菠 -菡 -菥 -菩 -菪 -菫 -華 -菰 -菱 -菲 -菴 -菶 -菸 -菹 -菺 -菼 -菽 -菾 -萁 -萃 -萄 -萇 -萊 -萌 -萍 -萎 -萐 -萘 -萜 -萠 -萡 -萣 -萩 -萬 -萭 -萱 -萵 -萸 -萹 -萼 -落 -葃 -葆 -葉 -葊 -葎 -葑 -葒 -著 -葙 -葚 -葛 -葜 -葝 -葡 -董 -葦 -葩 -葫 -葬 -葭 -葯 -葰 -葳 -葵 -葶 -葷 -葺 -蒂 -蒄 -蒍 -蒎 -蒐 -蒓 -蒔 -蒗 -蒙 -蒜 -蒞 -蒟 -蒡 -蒢 -蒤 -蒧 -蒨 -蒭 -蒯 -蒲 -蒴 -蒸 -蒹 -蒺 -蒻 -蒼 -蒽 -蒾 -蒿 -蓀 -蓁 -蓂 -蓄 -蓆 -蓉 -蓋 -蓍 -蓑 -蓓 -蓖 -蓘 -蓚 -蓧 -蓨 -蓪 -蓬 -蓭 -蓮 -蓯 -蓳 -蓼 -蓽 -蓿 -蔆 -蔎 -蔑 -蔓 -蔔 -蔕 -蔗 -蔘 -蔚 -蔝 -蔞 -蔡 -蔣 -蔥 -蔦 -蔬 -蔭 -蔴 -蔵 -蔻 -蔽 -蕁 -蕃 -蕅 -蕈 -蕉 -蕊 -蕎 -蕑 -蕒 -蕖 -蕘 -蕙 -蕚 -蕟 -蕡 -蕢 -蕤 -蕨 -蕩 -蕪 -蕭 -蕷 -蕹 -蕺 -蕻 -蕾 -薀 -薄 -薆 -薇 -薈 -薊 -薌 -薏 -薐 -薑 -薔 -薗 -薘 -薙 -薛 -薜 -薞 -薟 -薡 -薦 -薨 -薩 -薪 -薫 -薬 -薯 -薰 -薲 -薷 -薸 -薹 -薺 -薾 -薿 -藁 -藉 -藍 -藎 -藏 -藐 -藔 -藕 -藜 -藝 -藟 -藤 -藥 -藦 -藨 -藩 -藪 -藶 -藸 -藹 -藺 -藻 -藿 -蘂 -蘄 -蘅 -蘆 -蘇 -蘊 -蘋 -蘐 -蘑 -蘓 -蘗 -蘘 -蘚 -蘞 -蘢 -蘧 -蘩 -蘭 -蘵 -蘶 -蘸 -蘼 -蘿 -虉 -虎 -虐 -虓 -虔 -處 -虖 -虛 -虜 -虞 -號 -虢 -虧 -虨 -虯 -虱 -虵 -虹 -虺 -虻 -蚆 -蚊 -蚋 -蚌 -蚍 -蚓 -蚖 -蚜 -蚝 -蚡 -蚢 -蚣 -蚤 -蚧 -蚨 -蚩 -蚪 -蚯 -蚱 -蚴 -蚵 -蚶 -蚺 -蚼 -蛀 -蛄 -蛇 -蛉 -蛋 -蛍 -蛐 -蛑 -蛔 -蛙 -蛛 -蛞 -蛟 -蛤 -蛭 -蛯 -蛸 -蛹 -蛺 -蛻 -蛾 -蜀 -蜂 -蜃 -蜆 -蜇 -蜈 -蜉 -蜊 -蜍 -蜑 -蜒 -蜓 -蜘 -蜚 -蜛 -蜜 -蜞 -蜢 -蜣 -蜥 -蜨 -蜮 -蜯 -蜱 -蜴 -蜷 -蜻 -蜾 -蜿 -蝀 -蝌 -蝍 -蝎 -蝓 -蝕 -蝗 -蝘 -蝙 -蝚 -蝟 -蝠 -蝣 -蝤 -蝦 -蝨 -蝮 -蝯 -蝰 -蝲 -蝴 -蝶 -蝸 -蝽 -螂 -螃 -螄 -螅 -螈 -螋 -融 -螐 -螔 -螞 -螟 -螠 -螢 -螣 -螥 -螫 -螭 -螯 -螳 -螶 -螺 -螻 -螽 -螾 -蟀 -蟄 -蟅 -蟆 -蟊 -蟋 -蟌 -蟎 -蟑 -蟒 -蟜 -蟠 -蟥 -蟪 -蟫 -蟬 -蟯 -蟲 -蟳 -蟴 -蟶 -蟹 -蟻 -蟾 -蠂 -蠃 -蠄 -蠅 -蠆 -蠊 -蠋 -蠍 -蠐 -蠑 -蠓 -蠔 -蠕 -蠖 -蠘 -蠙 -蠟 -蠡 -蠢 -蠣 -蠱 -蠲 -蠵 -蠶 -蠷 -蠹 -蠻 -血 -衂 -衆 -行 -衍 -衎 -術 -衕 -衖 -街 -衙 -衚 -衛 -衜 -衝 -衞 -衡 -衢 -衣 -表 -衩 -衫 -衰 -衲 -衷 -衽 -衾 -衿 -袁 -袂 -袈 -袋 -袍 -袓 -袖 -袛 -袞 -袤 -袪 -被 -袱 -袴 -袾 -裁 -裂 -裊 -裎 -裒 -裔 -裕 -裖 -裘 -裙 -補 -裝 -裟 -裡 -裨 -裬 -裱 -裳 -裴 -裵 -裸 -裹 -製 -裾 -裿 -褀 -褂 -複 -褌 -褍 -褎 -褐 -褒 -褓 -褔 -褘 -褙 -褚 -褞 -褥 -褧 -褪 -褫 -褭 -褲 -褶 -褸 -褻 -襄 -襌 -襖 -襞 -襟 -襠 -襤 -襦 -襪 -襯 -襲 -襴 -襶 -襻 -襾 -西 -要 -覃 -覆 -覇 -覈 -見 -覌 -規 -覓 -視 -覚 -覡 -覦 -覧 -親 -覬 -覲 -観 -覺 -覽 -覿 -觀 -角 -觔 -觙 -觚 -觜 -解 -觭 -觱 -觴 -觶 -觸 -觿 -言 -訁 -訂 -訃 -訇 -計 -訊 -訌 -討 -訏 -訐 -訒 -訓 -訔 -訕 -訖 -託 -記 -訛 -訝 -訟 -訣 -訥 -訪 -設 -許 -訴 -訶 -診 -註 -証 -訾 -詁 -詆 -詈 -詐 -詒 -詔 -評 -詛 -詞 -詠 -詡 -詢 -詣 -詥 -試 -詧 -詩 -詫 -詭 -詮 -詰 -話 -該 -詳 -詵 -詹 -詼 -誄 -誅 -誇 -誌 -認 -誒 -誓 -誕 -誘 -語 -誠 -誡 -誣 -誤 -誥 -誦 -誨 -說 -説 -読 -誰 -課 -誴 -誹 -誼 -誾 -調 -談 -請 -諍 -諏 -諒 -論 -諗 -諜 -諟 -諠 -諡 -諤 -諦 -諧 -諪 -諫 -諭 -諮 -諱 -諲 -諳 -諴 -諶 -諷 -諸 -諺 -諼 -諾 -謀 -謁 -謂 -謄 -謇 -謊 -謌 -謎 -謏 -謐 -謔 -謖 -謗 -謙 -謚 -講 -謜 -謝 -謠 -謢 -謤 -謨 -謩 -謫 -謬 -謳 -謹 -謾 -證 -譏 -譓 -譔 -識 -譙 -譚 -譜 -譞 -警 -譫 -譬 -譭 -譯 -議 -譲 -譳 -譴 -護 -譽 -譿 -讀 -讃 -變 -讌 -讎 -讓 -讖 -讙 -讚 -讜 -讞 -谷 -谿 -豁 -豆 -豇 -豈 -豉 -豊 -豌 -豎 -豐 -豔 -豕 -豚 -象 -豢 -豨 -豪 -豫 -豬 -豳 -豸 -豹 -豺 -豿 -貂 -貅 -貉 -貊 -貌 -貐 -貒 -貓 -貔 -貘 -貝 -貞 -負 -財 -貢 -貤 -貧 -貨 -販 -貪 -貫 -責 -貭 -貮 -貯 -貲 -貳 -貴 -貶 -買 -貸 -貺 -費 -貼 -貽 -貿 -賀 -賁 -賂 -賃 -賄 -資 -賈 -賊 -賑 -賒 -賓 -賔 -賕 -賚 -賜 -賞 -賠 -賡 -賢 -賣 -賤 -賦 -賨 -質 -賬 -賭 -賴 -賹 -賺 -賻 -購 -賽 -賾 -贄 -贅 -贇 -贈 -贊 -贌 -贍 -贏 -贓 -贔 -贖 -贛 -赤 -赦 -赧 -赫 -赬 -赭 -走 -赳 -赴 -起 -趁 -超 -越 -趐 -趕 -趖 -趙 -趟 -趣 -趨 -足 -趴 -趵 -趺 -趼 -趾 -跅 -跆 -跋 -跌 -跏 -跑 -跖 -跗 -跛 -距 -跟 -跡 -跣 -跤 -跨 -跩 -跪 -路 -跳 -踎 -踏 -踐 -踝 -踞 -踢 -踩 -踰 -踴 -踹 -踺 -蹂 -蹄 -蹇 -蹈 -蹉 -蹊 -蹋 -蹕 -蹙 -蹟 -蹠 -蹤 -蹦 -蹬 -蹭 -蹯 -蹲 -蹴 -蹶 -蹺 -蹻 -蹼 -躁 -躂 -躄 -躉 -躋 -躍 -躑 -躒 -躔 -躝 -躪 -身 -躬 -躰 -躲 -躺 -軀 -車 -軋 -軌 -軍 -軎 -軒 -軔 -軛 -軟 -転 -軫 -軲 -軸 -軹 -軺 -軻 -軼 -軽 -軾 -較 -輄 -輅 -載 -輋 -輒 -輓 -輔 -輕 -輛 -輝 -輞 -輟 -輥 -輦 -輩 -輪 -輬 -輭 -輯 -輶 -輸 -輻 -輾 -輿 -轀 -轂 -轄 -轅 -轆 -轉 -轍 -轎 -轘 -轝 -轟 -轤 -辛 -辜 -辟 -辣 -辦 -辧 -辨 -辭 -辮 -辯 -辰 -辱 -農 -辵 -辺 -辻 -込 -迂 -迄 -迅 -迎 -近 -返 -迢 -迤 -迥 -迦 -迪 -迫 -迭 -迮 -述 -迴 -迵 -迷 -迸 -迺 -追 -退 -送 -逃 -逄 -逅 -逆 -逈 -逋 -逌 -逍 -逎 -透 -逐 -逑 -途 -逕 -逖 -逗 -這 -通 -逛 -逝 -逞 -速 -造 -逢 -連 -逤 -逨 -逮 -逯 -進 -逴 -逵 -逸 -逹 -逺 -逼 -逾 -遁 -遂 -遄 -遇 -遊 -運 -遍 -過 -遏 -遐 -遒 -道 -達 -違 -遘 -遙 -遛 -遜 -遞 -遠 -遢 -遣 -遨 -適 -遭 -遮 -遯 -遲 -遴 -遵 -遶 -遷 -選 -遹 -遺 -遼 -避 -邀 -邁 -邂 -邃 -還 -邇 -邈 -邉 -邊 -邋 -邏 -邑 -邕 -邗 -邙 -邛 -邠 -邡 -邢 -那 -邦 -邨 -邪 -邯 -邰 -邱 -邲 -邳 -邴 -邵 -邸 -邽 -邾 -郁 -郃 -郄 -郅 -郇 -郊 -郋 -郎 -郗 -郛 -郜 -郝 -郞 -郟 -郡 -郢 -郤 -部 -郪 -郫 -郭 -郯 -郳 -郴 -郵 -郷 -都 -郾 -郿 -鄂 -鄃 -鄄 -鄆 -鄉 -鄋 -鄑 -鄒 -鄔 -鄖 -鄗 -鄘 -鄙 -鄚 -鄜 -鄞 -鄠 -鄢 -鄣 -鄤 -鄧 -鄩 -鄫 -鄭 -鄯 -鄰 -鄱 -鄲 -鄳 -鄴 -鄺 -酃 -酆 -酈 -酉 -酊 -酋 -酌 -配 -酎 -酏 -酐 -酒 -酔 -酗 -酚 -酞 -酡 -酢 -酣 -酥 -酩 -酪 -酬 -酮 -酯 -酰 -酴 -酵 -酶 -酷 -酸 -酺 -酼 -醁 -醂 -醃 -醅 -醇 -醉 -醋 -醌 -醍 -醐 -醒 -醚 -醛 -醜 -醞 -醢 -醣 -醪 -醫 -醬 -醮 -醯 -醴 -醺 -醾 -醿 -釀 -釁 -釆 -采 -釉 -釋 -里 -重 -野 -量 -釐 -金 -釒 -釓 -釔 -釕 -釗 -釘 -釙 -釚 -釜 -針 -釣 -釤 -釦 -釧 -釩 -釪 -釭 -釴 -釵 -釷 -釹 -釺 -鈀 -鈁 -鈄 -鈇 -鈈 -鈉 -鈊 -鈍 -鈏 -鈐 -鈑 -鈔 -鈕 -鈖 -鈞 -鈢 -鈣 -鈥 -鈦 -鈫 -鈮 -鈰 -鈳 -鈴 -鈷 -鈸 -鈹 -鈺 -鈾 -鈿 -鉀 -鉄 -鉅 -鉆 -鉈 -鉉 -鉋 -鉌 -鉍 -鉏 -鉑 -鉓 -鉗 -鉚 -鉛 -鉞 -鉟 -鉤 -鉦 -鉬 -鉭 -鉲 -鉶 -鉷 -鉸 -鉻 -鉾 -鉿 -銀 -銂 -銃 -銅 -銋 -銍 -銑 -銓 -銕 -銖 -銘 -銚 -銜 -銠 -銣 -銥 -銦 -銨 -銩 -銪 -銫 -銬 -銭 -銱 -銲 -銳 -銶 -銷 -銹 -銻 -銼 -銾 -鋁 -鋅 -鋆 -鋇 -鋌 -鋏 -鋐 -鋒 -鋕 -鋗 -鋙 -鋡 -鋤 -鋥 -鋦 -鋨 -鋪 -鋮 -鋯 -鋰 -鋱 -鋳 -鋶 -鋸 -鋹 -鋼 -錀 -錄 -錏 -錐 -錒 -錕 -錘 -錚 -錞 -錟 -錠 -錡 -錢 -錦 -錨 -錫 -錬 -錮 -錯 -錳 -錶 -錸 -錻 -鍀 -鍇 -鍈 -鍉 -鍊 -鍋 -鍍 -鍏 -鍔 -鍘 -鍛 -鍝 -鍟 -鍠 -鍥 -鍩 -鍬 -鍱 -鍳 -鍵 -鍶 -鍷 -鍺 -鍼 -鍾 -鎂 -鎅 -鎊 -鎌 -鎏 -鎓 -鎔 -鎖 -鎗 -鎘 -鎚 -鎛 -鎢 -鎣 -鎦 -鎧 -鎪 -鎬 -鎭 -鎮 -鎰 -鎳 -鎵 -鎻 -鏃 -鏇 -鏈 -鏊 -鏌 -鏐 -鏑 -鏓 -鏖 -鏗 -鏘 -鏜 -鏝 -鏞 -鏟 -鏡 -鏢 -鏤 -鏦 -鏳 -鏴 -鏵 -鏷 -鏻 -鏽 -鐃 -鐇 -鐈 -鐓 -鐔 -鐘 -鐙 -鐠 -鐡 -鐤 -鐦 -鐧 -鐫 -鐬 -鐭 -鐮 -鐲 -鐳 -鐵 -鐸 -鐺 -鐽 -鐿 -鑀 -鑁 -鑂 -鑄 -鑅 -鑊 -鑌 -鑑 -鑒 -鑛 -鑠 -鑣 -鑨 -鑪 -鑫 -鑭 -鑰 -鑲 -鑴 -鑷 -鑼 -鑽 -鑾 -鑿 -長 -門 -閂 -閃 -閆 -閉 -開 -閎 -閏 -閑 -閒 -間 -閔 -閘 -閜 -閞 -閟 -関 -閣 -閥 -閦 -閨 -閩 -閬 -閭 -閰 -閱 -閶 -閹 -閻 -閼 -閾 -閿 -闆 -闇 -闈 -闊 -闋 -闌 -闍 -闐 -闓 -闔 -闕 -闖 -闘 -關 -闞 -闡 -闢 -闥 -阜 -阝 -阡 -阪 -阭 -阮 -阯 -阱 -防 -阻 -阿 -陀 -陁 -陂 -附 -陋 -陌 -降 -限 -陔 -陘 -陛 -陜 -陝 -陞 -陟 -陡 -院 -陣 -除 -陪 -陬 -陰 -陲 -陳 -陵 -陶 -陷 -陸 -険 -陽 -隄 -隅 -隆 -隈 -隊 -隋 -隍 -階 -隔 -隕 -隗 -隘 -隙 -際 -障 -隣 -隧 -隨 -險 -隰 -隱 -隲 -隳 -隴 -隷 -隸 -隹 -隻 -隼 -雀 -雁 -雄 -雅 -集 -雇 -雉 -雋 -雌 -雍 -雎 -雑 -雒 -雕 -雖 -雙 -雛 -雜 -雝 -雞 -離 -難 -雨 -雩 -雪 -雫 -雯 -雱 -雲 -零 -雷 -雹 -電 -需 -霄 -霅 -霆 -震 -霈 -霉 -霊 -霍 -霎 -霏 -霑 -霓 -霖 -霙 -霜 -霞 -霤 -霧 -霨 -霰 -露 -霶 -霸 -霹 -霽 -霾 -靁 -靂 -靄 -靈 -靉 -靑 -青 -靖 -靚 -靛 -靜 -非 -靠 -靡 -面 -革 -靫 -靬 -靭 -靳 -靴 -靶 -靺 -靼 -鞅 -鞆 -鞋 -鞍 -鞏 -鞘 -鞞 -鞠 -鞣 -鞥 -鞦 -鞨 -鞭 -鞮 -鞴 -韁 -韃 -韆 -韋 -韌 -韑 -韓 -韙 -韜 -韞 -韠 -韡 -韭 -韮 -音 -韶 -韺 -韻 -韾 -響 -頁 -頂 -頃 -項 -順 -須 -頊 -頌 -頍 -頎 -頏 -預 -頑 -頒 -頓 -頔 -頗 -領 -頜 -頠 -頡 -頤 -頦 -頫 -頭 -頰 -頴 -頵 -頷 -頸 -頹 -頻 -頼 -顆 -題 -額 -顎 -顏 -顒 -顓 -顔 -顕 -顗 -願 -顙 -顛 -類 -顥 -顧 -顫 -顯 -顰 -顱 -顳 -顴 -風 -颮 -颯 -颱 -颶 -颺 -颼 -飄 -飆 -飈 -飛 -食 -飠 -飡 -飢 -飥 -飩 -飪 -飫 -飬 -飭 -飮 -飯 -飲 -飴 -飼 -飽 -飾 -餃 -餄 -餅 -餉 -養 -餌 -餎 -餐 -餒 -餓 -餗 -餘 -餚 -餛 -餞 -餠 -餡 -館 -餮 -餵 -餺 -餾 -餿 -饃 -饅 -饋 -饌 -饑 -饒 -饕 -饗 -饞 -饟 -饢 -首 -馗 -馘 -香 -馛 -馥 -馦 -馨 -馬 -馭 -馮 -馯 -馱 -馳 -馴 -馼 -駁 -駄 -駅 -駆 -駐 -駑 -駒 -駔 -駕 -駘 -駙 -駛 -駝 -駟 -駢 -駭 -駰 -駱 -駿 -騁 -騂 -騄 -騅 -騋 -騎 -騏 -験 -騖 -騙 -騤 -騨 -騫 -騭 -騮 -騰 -騶 -騷 -騾 -驁 -驃 -驄 -驅 -驊 -驌 -驍 -驎 -驒 -驕 -驗 -驚 -驛 -驟 -驢 -驤 -驥 -驩 -驪 -骨 -骯 -骰 -骶 -骷 -骸 -骼 -髀 -髂 -髎 -髏 -髑 -髒 -髓 -體 -高 -髙 -髡 -髦 -髪 -髭 -髮 -髯 -髲 -髷 -髹 -髻 -鬃 -鬄 -鬅 -鬆 -鬍 -鬚 -鬟 -鬢 -鬣 -鬥 -鬧 -鬨 -鬩 -鬪 -鬬 -鬮 -鬯 -鬱 -鬲 -鬹 -鬻 -鬼 -魁 -魂 -魃 -魄 -魅 -魈 -魋 -魍 -魎 -魏 -魔 -魕 -魘 -魚 -魛 -魞 -魟 -魣 -魨 -魩 -魮 -魯 -魴 -魷 -鮀 -鮁 -鮃 -鮄 -鮊 -鮋 -鮍 -鮐 -鮑 -鮒 -鮓 -鮗 -鮜 -鮟 -鮠 -鮡 -鮣 -鮨 -鮪 -鮫 -鮭 -鮮 -鮰 -鮸 -鮹 -鮻 -鯀 -鯁 -鯃 -鯇 -鯉 -鯊 -鯏 -鯒 -鯓 -鯔 -鯕 -鯖 -鯗 -鯙 -鯛 -鯡 -鯢 -鯤 -鯧 -鯨 -鯪 -鯭 -鯮 -鯰 -鯶 -鯷 -鯻 -鯽 -鯿 -鰂 -鰃 -鰆 -鰈 -鰉 -鰍 -鰏 -鰒 -鰓 -鰕 -鰗 -鰛 -鰜 -鰟 -鰣 -鰤 -鰧 -鰨 -鰩 -鰭 -鰮 -鰱 -鰲 -鰳 -鰶 -鰷 -鰹 -鰺 -鰻 -鰼 -鰾 -鱀 -鱂 -鱅 -鱇 -鱈 -鱉 -鱊 -鱒 -鱓 -鱔 -鱖 -鱗 -鱘 -鱚 -鱝 -鱟 -鱠 -鱣 -鱥 -鱧 -鱨 -鱬 -鱮 -鱰 -鱲 -鱵 -鱷 -鱸 -鱺 -鱻 -鳥 -鳧 -鳩 -鳯 -鳰 -鳳 -鳴 -鳶 -鳽 -鴆 -鴇 -鴉 -鴒 -鴓 -鴕 -鴗 -鴛 -鴝 -鴞 -鴟 -鴡 -鴣 -鴦 -鴨 -鴫 -鴯 -鴰 -鴴 -鴻 -鴿 -鵂 -鵄 -鵎 -鵐 -鵑 -鵒 -鵓 -鵙 -鵜 -鵝 -鵞 -鵟 -鵠 -鵡 -鵪 -鵬 -鵯 -鵰 -鵲 -鵵 -鵼 -鵾 -鶆 -鶇 -鶉 -鶏 -鶒 -鶓 -鶘 -鶚 -鶡 -鶥 -鶩 -鶬 -鶯 -鶲 -鶴 -鶹 -鶺 -鶻 -鶼 -鶿 -鷂 -鷄 -鷉 -鷎 -鷓 -鷗 -鷙 -鷚 -鷟 -鷥 -鷦 -鷫 -鷯 -鷲 -鷳 -鷸 -鷹 -鷺 -鸊 -鸌 -鸐 -鸑 -鸕 -鸘 -鸚 -鸛 -鸜 -鸝 -鸞 -鹮 -鹵 -鹹 -鹼 -鹽 -鹿 -麂 -麅 -麇 -麈 -麊 -麋 -麐 -麒 -麓 -麗 -麝 -麞 -麟 -麥 -麩 -麪 -麯 -麴 -麵 -麹 -麺 -麻 -麼 -麽 -麾 -麿 -黁 -黃 -黇 -黌 -黍 -黎 -黏 -黐 -黑 -黒 -黔 -默 -黙 -黛 -黜 -黝 -點 -黟 -黥 -黧 -黨 -黯 -黴 -黶 -黻 -黼 -黽 -黿 -鼂 -鼇 -鼈 -鼉 -鼎 -鼐 -鼒 -鼓 -鼕 -鼙 -鼠 -鼢 -鼩 -鼬 -鼯 -鼱 -鼴 -鼷 -鼻 -鼽 -鼾 -齊 -齋 -齒 -齕 -齡 -齣 -齦 -齧 -齲 -齶 -龍 -龎 -龐 -龑 -龔 -龕 -龜 -龝 -龠 -龢 -郎 -凉 -﹑ -﹗ -﹝ -﹞ -﹢ -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -A -B -C -D -E -F -G -H -I -K -L -M -N -O -P -R -S -T -U -V -W -Y -Z -[ -] -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -r -s -t -u -z -{ -| -} -~ -¥ -𣇉 - diff --git a/backend/ppocr/utils/dict/cyrillic_dict.txt b/backend/ppocr/utils/dict/cyrillic_dict.txt deleted file mode 100644 index 2b6f664..0000000 --- a/backend/ppocr/utils/dict/cyrillic_dict.txt +++ /dev/null @@ -1,163 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -Ё -Є -І -Ј -Љ -Ў -А -Б -В -Г -Д -Е -Ж -З -И -Й -К -Л -М -Н -О -П -Р -С -Т -У -Ф -Х -Ц -Ч -Ш -Щ -Ъ -Ы -Ь -Э -Ю -Я -а -б -в -г -д -е -ж -з -и -й -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ъ -ы -ь -э -ю -я -ё -ђ -є -і -ј -љ -њ -ћ -ў -џ -Ґ -ґ diff --git a/backend/ppocr/utils/dict/devanagari_dict.txt b/backend/ppocr/utils/dict/devanagari_dict.txt deleted file mode 100644 index f559230..0000000 --- a/backend/ppocr/utils/dict/devanagari_dict.txt +++ /dev/null @@ -1,167 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ँ -ं -ः -अ -आ -इ -ई -उ -ऊ -ऋ -ए -ऐ -ऑ -ओ -औ -क -ख -ग -घ -ङ -च -छ -ज -झ -ञ -ट -ठ -ड -ढ -ण -त -थ -द -ध -न -ऩ -प -फ -ब -भ -म -य -र -ऱ -ल -ळ -व -श -ष -स -ह -़ -ा -ि -ी -ु -ू -ृ -ॅ -े -ै -ॉ -ो -ौ -् -॒ -क़ -ख़ -ग़ -ज़ -ड़ -ढ़ -फ़ -ॠ -। -० -१ -२ -३ -४ -५ -६ -७ -८ -९ -॰ diff --git a/backend/ppocr/utils/dict/en_dict.txt b/backend/ppocr/utils/dict/en_dict.txt deleted file mode 100644 index 7677d31..0000000 --- a/backend/ppocr/utils/dict/en_dict.txt +++ /dev/null @@ -1,95 +0,0 @@ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ - diff --git a/backend/ppocr/utils/dict/es_dict.txt b/backend/ppocr/utils/dict/es_dict.txt deleted file mode 100644 index f195f1e..0000000 --- a/backend/ppocr/utils/dict/es_dict.txt +++ /dev/null @@ -1,110 +0,0 @@ -x -i -_ -m -g -/ -1 -0 -I -L -S -V -R -C -2 -v -a -l -3 -6 -4 -5 -. -j -p - -Q -u -e -r -o -8 -7 -n -c -9 -t -b -é -q -d -ó -y -F -s -, -O -í -T -f -" -U -M -h -: -P -H -A -E -D -z -N -á -ñ -ú -% -; -è -+ -Y -- -B -G -( -) -¿ -? -w -¡ -! -X -É -K -k -Á -ü -Ú -« -» -J -' -ö -W -Z -º -Ö -­ -[ -] -Ç -ç -à -ä -û -ò -Í -ê -ô -ø -ª diff --git a/backend/ppocr/utils/dict/fa_dict.txt b/backend/ppocr/utils/dict/fa_dict.txt deleted file mode 100644 index 2328fbd..0000000 --- a/backend/ppocr/utils/dict/fa_dict.txt +++ /dev/null @@ -1,136 +0,0 @@ -f -a -_ -i -m -g -/ -1 -3 -I -L -S -V -R -C -2 -0 -v -l -6 -8 -5 -. -j -p -و -د -ر -ك -ن -ش -ه -ا -4 -9 -ی -ج -ِ -7 -غ -ل -س -ز -ّ -ت -ک -گ -ي -م -ب -ف -چ -خ -ق -ژ -آ -ص -پ -َ -ع -ئ -ح -ٔ -ض -ُ -ذ -أ -ى -ط -ظ -ث -ة -ً -ء -ؤ -ْ -ۀ -إ -ٍ -ٌ -ٰ -ٓ -ٱ -s -c -e -n -w -N -E -W -Y -D -O -H -A -d -z -r -T -G -o -t -x -h -b -B -M -Z -u -P -F -y -q -U -K -k -J -Q -' -X -# -? -% -$ -, -: -& -! -- -( -É -@ -é -+ - diff --git a/backend/ppocr/utils/dict/french_dict.txt b/backend/ppocr/utils/dict/french_dict.txt deleted file mode 100644 index e8f657d..0000000 --- a/backend/ppocr/utils/dict/french_dict.txt +++ /dev/null @@ -1,136 +0,0 @@ -f -e -n -c -h -_ -i -m -g -/ -r -v -a -l -t -w -o -d -6 -1 -. -p -B -u -2 -à -3 -R -y -4 -U -E -A -5 -P -O -S -T -D -7 -Z -8 -I -N -L -G -M -H -0 -J -K -- -9 -F -C -V -é -X -' -s -Q -: -è -x -b -Y -Œ -É -z -W -Ç -È -k -Ô -ô -€ -À -Ê -q -ù -° -ê -î -* - -j -" -, -â -% -û -ç -ü -? -! -; -ö -( -) -ï -º -ó -ø -å -+ -™ -á -Ë -< -² -Á -Î -& -@ -œ -ε -Ü -ë -[ -] -í -ò -Ö -ä -ß -« -» -ú -ñ -æ -µ -³ -Å -$ -# - diff --git a/backend/ppocr/utils/dict/german_dict.txt b/backend/ppocr/utils/dict/german_dict.txt deleted file mode 100644 index 5e121af..0000000 --- a/backend/ppocr/utils/dict/german_dict.txt +++ /dev/null @@ -1,143 +0,0 @@ - -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -= -> -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -] -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -£ -§ -­ -° -´ -µ -· -º -¿ -Á -Ä -Å -É -Ï -Ô -Ö -Ü -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -í -ï -ñ -ò -ó -ô -ö -ø -ù -ú -û -ü -ō -Š -Ÿ -ʒ -β -δ -з -Ṡ -‘ -€ -© -ª -« -¬ diff --git a/backend/ppocr/utils/dict/hi_dict.txt b/backend/ppocr/utils/dict/hi_dict.txt deleted file mode 100644 index 8dfedb5..0000000 --- a/backend/ppocr/utils/dict/hi_dict.txt +++ /dev/null @@ -1,162 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ँ -ं -ः -अ -आ -इ -ई -उ -ऊ -ऋ -ए -ऐ -ऑ -ओ -औ -क -ख -ग -घ -ङ -च -छ -ज -झ -ञ -ट -ठ -ड -ढ -ण -त -थ -द -ध -न -प -फ -ब -भ -म -य -र -ल -ळ -व -श -ष -स -ह -़ -ा -ि -ी -ु -ू -ृ -ॅ -े -ै -ॉ -ो -ौ -् -क़ -ख़ -ग़ -ज़ -ड़ -ढ़ -फ़ -० -१ -२ -३ -४ -५ -६ -७ -८ -९ -॰ diff --git a/backend/ppocr/utils/dict/it_dict.txt b/backend/ppocr/utils/dict/it_dict.txt deleted file mode 100644 index e692c6d..0000000 --- a/backend/ppocr/utils/dict/it_dict.txt +++ /dev/null @@ -1,118 +0,0 @@ -i -t -_ -m -g -/ -5 -I -L -S -V -R -C -2 -0 -1 -v -a -l -7 -8 -9 -6 -. -j -p - -e -r -o -d -s -n -3 -4 -P -u -c -A -- -, -" -z -h -f -b -q -ì -' -à -O -è -G -ù -é -ò -; -F -E -B -N -H -k -: -U -T -X -D -K -? -[ -M -­ -x -y -( -) -W -ö -º -w -] -Q -J -+ -ü -! -È -á -% -= -» -ñ -Ö -Y -ä -í -Z -« -@ -ó -ø -ï -ú -ê -ç -Á -É -Å -ß -{ -} -& -` -û -î -# -$ diff --git a/backend/ppocr/utils/dict/japan_dict.txt b/backend/ppocr/utils/dict/japan_dict.txt deleted file mode 100644 index 339d4b8..0000000 --- a/backend/ppocr/utils/dict/japan_dict.txt +++ /dev/null @@ -1,4399 +0,0 @@ -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -] -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -© -° -² -´ -½ -Á -Ä -Å -Ç -È -É -Í -Ó -Ö -× -Ü -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -í -ð -ñ -ò -ó -ô -õ -ö -ø -ú -û -ü -ý -ā -ă -ą -ć -Č -č -đ -ē -ė -ę -ğ -ī -ı -Ł -ł -ń -ň -ō -ř -Ş -ş -Š -š -ţ -ū -ż -Ž -ž -Ș -ș -ț -Δ -α -λ -μ -φ -Г -О -а -в -л -о -р -с -т -я -ồ -​ -— -― -’ -“ -” -… -℃ -→ -∇ -− -■ -☆ -  -、 -。 -々 -〆 -〈 -〉 -「 -」 -『 -』 -〔 -〕 -〜 -ぁ -あ -ぃ -い -う -ぇ -え -ぉ -お -か -が -き -ぎ -く -ぐ -け -げ -こ -ご -さ -ざ -し -じ -す -ず -せ -ぜ -そ -ぞ -た -だ -ち -ぢ -っ -つ -づ -て -で -と -ど -な -に -ぬ -ね -の -は -ば -ぱ -ひ -び -ぴ -ふ -ぶ -ぷ -へ -べ -ぺ -ほ -ぼ -ぽ -ま -み -む -め -も -ゃ -や -ゅ -ゆ -ょ -よ -ら -り -る -れ -ろ -わ -ゑ -を -ん -ゝ -ゞ -ァ -ア -ィ -イ -ゥ -ウ -ェ -エ -ォ -オ -カ -ガ -キ -ギ -ク -グ -ケ -ゲ -コ -ゴ -サ -ザ -シ -ジ -ス -ズ -セ -ゼ -ソ -ゾ -タ -ダ -チ -ヂ -ッ -ツ -ヅ -テ -デ -ト -ド -ナ -ニ -ヌ -ネ -ノ -ハ -バ -パ -ヒ -ビ -ピ -フ -ブ -プ -ヘ -ベ -ペ -ホ -ボ -ポ -マ -ミ -ム -メ -モ -ャ -ヤ -ュ -ユ -ョ -ヨ -ラ -リ -ル -レ -ロ -ワ -ヰ -ン -ヴ -ヵ -ヶ -・ -ー -㈱ -一 -丁 -七 -万 -丈 -三 -上 -下 -不 -与 -丑 -且 -世 -丘 -丙 -丞 -両 -並 -中 -串 -丸 -丹 -主 -丼 -丿 -乃 -久 -之 -乎 -乏 -乗 -乘 -乙 -九 -乞 -也 -乱 -乳 -乾 -亀 -了 -予 -争 -事 -二 -于 -互 -五 -井 -亘 -亙 -些 -亜 -亟 -亡 -交 -亥 -亦 -亨 -享 -京 -亭 -亮 -人 -什 -仁 -仇 -今 -介 -仍 -仏 -仔 -仕 -他 -仗 -付 -仙 -代 -令 -以 -仮 -仰 -仲 -件 -任 -企 -伊 -伍 -伎 -伏 -伐 -休 -会 -伝 -伯 -估 -伴 -伶 -伸 -伺 -似 -伽 -佃 -但 -位 -低 -住 -佐 -佑 -体 -何 -余 -佚 -佛 -作 -佩 -佳 -併 -佶 -使 -侈 -例 -侍 -侏 -侑 -侘 -供 -依 -侠 -価 -侮 -侯 -侵 -侶 -便 -係 -促 -俄 -俊 -俔 -俗 -俘 -保 -信 -俣 -俤 -修 -俯 -俳 -俵 -俸 -俺 -倉 -個 -倍 -倒 -候 -借 -倣 -値 -倫 -倭 -倶 -倹 -偃 -假 -偈 -偉 -偏 -偐 -偕 -停 -健 -側 -偵 -偶 -偽 -傀 -傅 -傍 -傑 -傘 -備 -催 -傭 -傲 -傳 -債 -傷 -傾 -僊 -働 -像 -僑 -僕 -僚 -僧 -僭 -僮 -儀 -億 -儇 -儒 -儛 -償 -儡 -優 -儲 -儺 -儼 -兀 -允 -元 -兄 -充 -兆 -先 -光 -克 -兌 -免 -兎 -児 -党 -兜 -入 -全 -八 -公 -六 -共 -兵 -其 -具 -典 -兼 -内 -円 -冊 -再 -冑 -冒 -冗 -写 -冠 -冤 -冥 -冨 -冬 -冲 -决 -冶 -冷 -准 -凉 -凋 -凌 -凍 -凛 -凝 -凞 -几 -凡 -処 -凪 -凰 -凱 -凶 -凸 -凹 -出 -函 -刀 -刃 -分 -切 -刈 -刊 -刎 -刑 -列 -初 -判 -別 -利 -刪 -到 -制 -刷 -券 -刹 -刺 -刻 -剃 -則 -削 -剋 -前 -剖 -剛 -剣 -剤 -剥 -剪 -副 -剰 -割 -創 -剽 -劇 -劉 -劔 -力 -功 -加 -劣 -助 -努 -劫 -劭 -励 -労 -効 -劾 -勃 -勅 -勇 -勉 -勒 -動 -勘 -務 -勝 -募 -勢 -勤 -勧 -勲 -勺 -勾 -勿 -匁 -匂 -包 -匏 -化 -北 -匙 -匝 -匠 -匡 -匣 -匯 -匲 -匹 -区 -医 -匿 -十 -千 -升 -午 -卉 -半 -卍 -卑 -卒 -卓 -協 -南 -単 -博 -卜 -占 -卦 -卯 -印 -危 -即 -却 -卵 -卸 -卿 -厄 -厚 -原 -厠 -厨 -厩 -厭 -厳 -去 -参 -又 -叉 -及 -友 -双 -反 -収 -叔 -取 -受 -叙 -叛 -叟 -叡 -叢 -口 -古 -句 -叩 -只 -叫 -召 -可 -台 -叱 -史 -右 -叶 -号 -司 -吃 -各 -合 -吉 -吊 -同 -名 -后 -吏 -吐 -向 -君 -吝 -吟 -吠 -否 -含 -吸 -吹 -吻 -吽 -吾 -呂 -呆 -呈 -呉 -告 -呑 -周 -呪 -呰 -味 -呼 -命 -咀 -咄 -咋 -和 -咒 -咫 -咲 -咳 -咸 -哀 -品 -哇 -哉 -員 -哨 -哩 -哭 -哲 -哺 -唄 -唆 -唇 -唐 -唖 -唯 -唱 -唳 -唸 -唾 -啄 -商 -問 -啓 -啼 -善 -喋 -喚 -喜 -喝 -喧 -喩 -喪 -喫 -喬 -單 -喰 -営 -嗅 -嗇 -嗔 -嗚 -嗜 -嗣 -嘆 -嘉 -嘗 -嘘 -嘩 -嘯 -嘱 -嘲 -嘴 -噂 -噌 -噛 -器 -噴 -噺 -嚆 -嚢 -囀 -囃 -囉 -囚 -四 -回 -因 -団 -困 -囲 -図 -固 -国 -圀 -圃 -國 -圏 -園 -圓 -團 -圜 -土 -圧 -在 -圭 -地 -址 -坂 -均 -坊 -坐 -坑 -坡 -坤 -坦 -坪 -垂 -型 -垢 -垣 -埃 -埋 -城 -埒 -埔 -域 -埠 -埴 -埵 -執 -培 -基 -埼 -堀 -堂 -堅 -堆 -堕 -堤 -堪 -堯 -堰 -報 -場 -堵 -堺 -塀 -塁 -塊 -塑 -塔 -塗 -塘 -塙 -塚 -塞 -塩 -填 -塵 -塾 -境 -墉 -墓 -増 -墜 -墟 -墨 -墳 -墺 -墻 -墾 -壁 -壇 -壊 -壌 -壕 -士 -壬 -壮 -声 -壱 -売 -壷 -壹 -壺 -壽 -変 -夏 -夕 -外 -夙 -多 -夜 -夢 -夥 -大 -天 -太 -夫 -夬 -夭 -央 -失 -夷 -夾 -奄 -奇 -奈 -奉 -奎 -奏 -契 -奔 -奕 -套 -奘 -奠 -奢 -奥 -奨 -奪 -奮 -女 -奴 -奸 -好 -如 -妃 -妄 -妊 -妍 -妓 -妖 -妙 -妥 -妨 -妬 -妲 -妹 -妻 -妾 -姉 -始 -姐 -姓 -委 -姚 -姜 -姞 -姥 -姦 -姨 -姪 -姫 -姶 -姻 -姿 -威 -娑 -娘 -娟 -娠 -娩 -娯 -娼 -婆 -婉 -婚 -婢 -婦 -婬 -婿 -媄 -媒 -媓 -媚 -媛 -媞 -媽 -嫁 -嫄 -嫉 -嫌 -嫐 -嫗 -嫡 -嬉 -嬌 -嬢 -嬪 -嬬 -嬾 -孁 -子 -孔 -字 -存 -孚 -孝 -孟 -季 -孤 -学 -孫 -孵 -學 -宅 -宇 -守 -安 -宋 -完 -宍 -宏 -宕 -宗 -官 -宙 -定 -宛 -宜 -宝 -実 -客 -宣 -室 -宥 -宮 -宰 -害 -宴 -宵 -家 -宸 -容 -宿 -寂 -寄 -寅 -密 -寇 -富 -寒 -寓 -寔 -寛 -寝 -察 -寡 -實 -寧 -審 -寮 -寵 -寶 -寸 -寺 -対 -寿 -封 -専 -射 -将 -尉 -尊 -尋 -對 -導 -小 -少 -尖 -尚 -尤 -尪 -尭 -就 -尹 -尺 -尻 -尼 -尽 -尾 -尿 -局 -居 -屈 -届 -屋 -屍 -屎 -屏 -屑 -屓 -展 -属 -屠 -層 -履 -屯 -山 -岐 -岑 -岡 -岩 -岫 -岬 -岳 -岷 -岸 -峠 -峡 -峨 -峯 -峰 -島 -峻 -崇 -崋 -崎 -崑 -崖 -崗 -崛 -崩 -嵌 -嵐 -嵩 -嵯 -嶂 -嶋 -嶠 -嶺 -嶼 -嶽 -巀 -巌 -巒 -巖 -川 -州 -巡 -巣 -工 -左 -巧 -巨 -巫 -差 -己 -巳 -巴 -巷 -巻 -巽 -巾 -市 -布 -帆 -希 -帖 -帚 -帛 -帝 -帥 -師 -席 -帯 -帰 -帳 -帷 -常 -帽 -幄 -幅 -幇 -幌 -幔 -幕 -幟 -幡 -幢 -幣 -干 -平 -年 -并 -幸 -幹 -幻 -幼 -幽 -幾 -庁 -広 -庄 -庇 -床 -序 -底 -庖 -店 -庚 -府 -度 -座 -庫 -庭 -庵 -庶 -康 -庸 -廂 -廃 -廉 -廊 -廓 -廟 -廠 -廣 -廬 -延 -廷 -建 -廻 -廼 -廿 -弁 -弄 -弉 -弊 -弌 -式 -弐 -弓 -弔 -引 -弖 -弗 -弘 -弛 -弟 -弥 -弦 -弧 -弱 -張 -強 -弼 -弾 -彈 -彊 -彌 -彎 -当 -彗 -彙 -彝 -形 -彦 -彩 -彫 -彬 -彭 -彰 -影 -彷 -役 -彼 -往 -征 -徂 -径 -待 -律 -後 -徐 -徑 -徒 -従 -得 -徠 -御 -徧 -徨 -復 -循 -徭 -微 -徳 -徴 -德 -徹 -徽 -心 -必 -忉 -忌 -忍 -志 -忘 -忙 -応 -忠 -快 -忯 -念 -忻 -忽 -忿 -怒 -怖 -思 -怠 -怡 -急 -性 -怨 -怪 -怯 -恂 -恋 -恐 -恒 -恕 -恣 -恤 -恥 -恨 -恩 -恬 -恭 -息 -恵 -悉 -悌 -悍 -悔 -悟 -悠 -患 -悦 -悩 -悪 -悲 -悼 -情 -惇 -惑 -惚 -惜 -惟 -惠 -惣 -惧 -惨 -惰 -想 -惹 -惺 -愈 -愉 -愍 -意 -愔 -愚 -愛 -感 -愷 -愿 -慈 -態 -慌 -慎 -慕 -慢 -慣 -慧 -慨 -慮 -慰 -慶 -憂 -憎 -憐 -憑 -憙 -憤 -憧 -憩 -憬 -憲 -憶 -憾 -懇 -應 -懌 -懐 -懲 -懸 -懺 -懽 -懿 -戈 -戊 -戌 -戎 -成 -我 -戒 -戔 -或 -戚 -戟 -戦 -截 -戮 -戯 -戴 -戸 -戻 -房 -所 -扁 -扇 -扈 -扉 -手 -才 -打 -払 -托 -扮 -扱 -扶 -批 -承 -技 -抄 -把 -抑 -抓 -投 -抗 -折 -抜 -択 -披 -抱 -抵 -抹 -押 -抽 -担 -拇 -拈 -拉 -拍 -拏 -拐 -拒 -拓 -拘 -拙 -招 -拝 -拠 -拡 -括 -拭 -拳 -拵 -拶 -拾 -拿 -持 -挂 -指 -按 -挑 -挙 -挟 -挨 -振 -挺 -挽 -挿 -捉 -捕 -捗 -捜 -捧 -捨 -据 -捺 -捻 -掃 -掄 -授 -掌 -排 -掖 -掘 -掛 -掟 -採 -探 -掣 -接 -控 -推 -掩 -措 -掬 -掲 -掴 -掻 -掾 -揃 -揄 -揆 -揉 -描 -提 -揖 -揚 -換 -握 -揮 -援 -揶 -揺 -損 -搦 -搬 -搭 -携 -搾 -摂 -摘 -摩 -摸 -摺 -撃 -撒 -撞 -撤 -撥 -撫 -播 -撮 -撰 -撲 -撹 -擁 -操 -擔 -擦 -擬 -擾 -攘 -攝 -攣 -支 -收 -改 -攻 -放 -政 -故 -敏 -救 -敗 -教 -敢 -散 -敦 -敬 -数 -整 -敵 -敷 -斂 -文 -斉 -斎 -斐 -斑 -斗 -料 -斜 -斟 -斤 -斥 -斧 -斬 -断 -斯 -新 -方 -於 -施 -旁 -旅 -旋 -旌 -族 -旗 -旛 -无 -旡 -既 -日 -旦 -旧 -旨 -早 -旬 -旭 -旺 -旻 -昂 -昆 -昇 -昉 -昌 -明 -昏 -易 -昔 -星 -映 -春 -昧 -昨 -昪 -昭 -是 -昵 -昼 -晁 -時 -晃 -晋 -晏 -晒 -晟 -晦 -晧 -晩 -普 -景 -晴 -晶 -智 -暁 -暇 -暈 -暉 -暑 -暖 -暗 -暘 -暢 -暦 -暫 -暮 -暲 -暴 -暹 -暾 -曄 -曇 -曉 -曖 -曙 -曜 -曝 -曠 -曰 -曲 -曳 -更 -書 -曹 -曼 -曽 -曾 -替 -最 -會 -月 -有 -朋 -服 -朏 -朔 -朕 -朗 -望 -朝 -期 -朧 -木 -未 -末 -本 -札 -朱 -朴 -机 -朽 -杁 -杉 -李 -杏 -材 -村 -杓 -杖 -杜 -杞 -束 -条 -杢 -杣 -来 -杭 -杮 -杯 -東 -杲 -杵 -杷 -杼 -松 -板 -枅 -枇 -析 -枓 -枕 -林 -枚 -果 -枝 -枠 -枡 -枢 -枯 -枳 -架 -柄 -柊 -柏 -某 -柑 -染 -柔 -柘 -柚 -柯 -柱 -柳 -柴 -柵 -査 -柾 -柿 -栂 -栃 -栄 -栖 -栗 -校 -株 -栲 -栴 -核 -根 -栻 -格 -栽 -桁 -桂 -桃 -框 -案 -桐 -桑 -桓 -桔 -桜 -桝 -桟 -桧 -桴 -桶 -桾 -梁 -梅 -梆 -梓 -梔 -梗 -梛 -條 -梟 -梢 -梧 -梨 -械 -梱 -梲 -梵 -梶 -棄 -棋 -棒 -棗 -棘 -棚 -棟 -棠 -森 -棲 -棹 -棺 -椀 -椅 -椋 -植 -椎 -椏 -椒 -椙 -検 -椥 -椹 -椿 -楊 -楓 -楕 -楚 -楞 -楠 -楡 -楢 -楨 -楪 -楫 -業 -楮 -楯 -楳 -極 -楷 -楼 -楽 -概 -榊 -榎 -榕 -榛 -榜 -榮 -榱 -榴 -槃 -槇 -槊 -構 -槌 -槍 -槐 -様 -槙 -槻 -槽 -槿 -樂 -樋 -樓 -樗 -標 -樟 -模 -権 -横 -樫 -樵 -樹 -樺 -樽 -橇 -橋 -橘 -機 -橿 -檀 -檄 -檎 -檐 -檗 -檜 -檣 -檥 -檬 -檮 -檸 -檻 -櫃 -櫓 -櫛 -櫟 -櫨 -櫻 -欄 -欅 -欠 -次 -欣 -欧 -欲 -欺 -欽 -款 -歌 -歎 -歓 -止 -正 -此 -武 -歩 -歪 -歯 -歳 -歴 -死 -殆 -殉 -殊 -残 -殖 -殯 -殴 -段 -殷 -殺 -殻 -殿 -毀 -毅 -母 -毎 -毒 -比 -毘 -毛 -毫 -毬 -氈 -氏 -民 -気 -水 -氷 -永 -氾 -汀 -汁 -求 -汎 -汐 -汗 -汚 -汝 -江 -池 -汪 -汰 -汲 -決 -汽 -沂 -沃 -沅 -沆 -沈 -沌 -沐 -沓 -沖 -沙 -没 -沢 -沱 -河 -沸 -油 -治 -沼 -沽 -沿 -況 -泉 -泊 -泌 -法 -泗 -泡 -波 -泣 -泥 -注 -泯 -泰 -泳 -洋 -洒 -洗 -洛 -洞 -津 -洩 -洪 -洲 -洸 -洹 -活 -洽 -派 -流 -浄 -浅 -浙 -浚 -浜 -浣 -浦 -浩 -浪 -浮 -浴 -海 -浸 -涅 -消 -涌 -涙 -涛 -涯 -液 -涵 -涼 -淀 -淄 -淆 -淇 -淋 -淑 -淘 -淡 -淤 -淨 -淫 -深 -淳 -淵 -混 -淹 -添 -清 -済 -渉 -渋 -渓 -渕 -渚 -減 -渟 -渠 -渡 -渤 -渥 -渦 -温 -渫 -測 -港 -游 -渾 -湊 -湖 -湘 -湛 -湧 -湫 -湯 -湾 -湿 -満 -源 -準 -溜 -溝 -溢 -溥 -溪 -溶 -溺 -滄 -滅 -滋 -滌 -滑 -滕 -滝 -滞 -滴 -滸 -滹 -滿 -漁 -漂 -漆 -漉 -漏 -漑 -演 -漕 -漠 -漢 -漣 -漫 -漬 -漱 -漸 -漿 -潅 -潔 -潙 -潜 -潟 -潤 -潭 -潮 -潰 -潴 -澁 -澂 -澄 -澎 -澗 -澤 -澪 -澱 -澳 -激 -濁 -濃 -濟 -濠 -濡 -濤 -濫 -濯 -濱 -濾 -瀉 -瀋 -瀑 -瀕 -瀞 -瀟 -瀧 -瀬 -瀾 -灌 -灑 -灘 -火 -灯 -灰 -灸 -災 -炉 -炊 -炎 -炒 -炭 -炮 -炷 -点 -為 -烈 -烏 -烙 -烝 -烹 -焔 -焙 -焚 -無 -焦 -然 -焼 -煇 -煉 -煌 -煎 -煕 -煙 -煤 -煥 -照 -煩 -煬 -煮 -煽 -熈 -熊 -熙 -熟 -熨 -熱 -熹 -熾 -燃 -燈 -燎 -燔 -燕 -燗 -燥 -燭 -燻 -爆 -爐 -爪 -爬 -爲 -爵 -父 -爺 -爼 -爽 -爾 -片 -版 -牌 -牒 -牘 -牙 -牛 -牝 -牟 -牡 -牢 -牧 -物 -牲 -特 -牽 -犂 -犠 -犬 -犯 -状 -狂 -狄 -狐 -狗 -狙 -狛 -狡 -狩 -独 -狭 -狷 -狸 -狼 -猊 -猛 -猟 -猥 -猨 -猩 -猪 -猫 -献 -猴 -猶 -猷 -猾 -猿 -獄 -獅 -獏 -獣 -獲 -玄 -玅 -率 -玉 -王 -玖 -玩 -玲 -珀 -珂 -珈 -珉 -珊 -珍 -珎 -珞 -珠 -珣 -珥 -珪 -班 -現 -球 -理 -琉 -琢 -琥 -琦 -琮 -琲 -琳 -琴 -琵 -琶 -瑁 -瑋 -瑙 -瑚 -瑛 -瑜 -瑞 -瑠 -瑤 -瑩 -瑪 -瑳 -瑾 -璃 -璋 -璜 -璞 -璧 -璨 -環 -璵 -璽 -璿 -瓊 -瓔 -瓜 -瓢 -瓦 -瓶 -甍 -甑 -甕 -甘 -甚 -甞 -生 -産 -甥 -用 -甫 -田 -由 -甲 -申 -男 -町 -画 -界 -畏 -畑 -畔 -留 -畜 -畝 -畠 -畢 -略 -番 -異 -畳 -當 -畷 -畸 -畺 -畿 -疆 -疇 -疋 -疎 -疏 -疑 -疫 -疱 -疲 -疹 -疼 -疾 -病 -症 -痒 -痔 -痕 -痘 -痙 -痛 -痢 -痩 -痴 -痺 -瘍 -瘡 -瘧 -療 -癇 -癌 -癒 -癖 -癡 -癪 -発 -登 -白 -百 -的 -皆 -皇 -皋 -皐 -皓 -皮 -皺 -皿 -盂 -盃 -盆 -盈 -益 -盒 -盗 -盛 -盞 -盟 -盡 -監 -盤 -盥 -盧 -目 -盲 -直 -相 -盾 -省 -眉 -看 -県 -眞 -真 -眠 -眷 -眺 -眼 -着 -睡 -督 -睦 -睨 -睿 -瞋 -瞑 -瞞 -瞬 -瞭 -瞰 -瞳 -瞻 -瞼 -瞿 -矍 -矛 -矜 -矢 -知 -矧 -矩 -短 -矮 -矯 -石 -砂 -砌 -研 -砕 -砥 -砦 -砧 -砲 -破 -砺 -硝 -硫 -硬 -硯 -碁 -碇 -碌 -碑 -碓 -碕 -碗 -碣 -碧 -碩 -確 -碾 -磁 -磐 -磔 -磧 -磨 -磬 -磯 -礁 -礎 -礒 -礙 -礫 -礬 -示 -礼 -社 -祀 -祁 -祇 -祈 -祉 -祐 -祓 -祕 -祖 -祗 -祚 -祝 -神 -祟 -祠 -祢 -祥 -票 -祭 -祷 -祺 -禁 -禄 -禅 -禊 -禍 -禎 -福 -禔 -禖 -禛 -禦 -禧 -禮 -禰 -禹 -禽 -禿 -秀 -私 -秋 -科 -秒 -秘 -租 -秤 -秦 -秩 -称 -移 -稀 -程 -税 -稔 -稗 -稙 -稚 -稜 -稠 -種 -稱 -稲 -稷 -稻 -稼 -稽 -稿 -穀 -穂 -穆 -積 -穎 -穏 -穗 -穜 -穢 -穣 -穫 -穴 -究 -空 -突 -窃 -窄 -窒 -窓 -窟 -窠 -窩 -窪 -窮 -窯 -竃 -竄 -竈 -立 -站 -竜 -竝 -竟 -章 -童 -竪 -竭 -端 -竴 -競 -竹 -竺 -竽 -竿 -笄 -笈 -笏 -笑 -笙 -笛 -笞 -笠 -笥 -符 -第 -笹 -筅 -筆 -筇 -筈 -等 -筋 -筌 -筍 -筏 -筐 -筑 -筒 -答 -策 -筝 -筥 -筧 -筬 -筮 -筯 -筰 -筵 -箆 -箇 -箋 -箏 -箒 -箔 -箕 -算 -箙 -箜 -管 -箪 -箭 -箱 -箸 -節 -篁 -範 -篆 -篇 -築 -篋 -篌 -篝 -篠 -篤 -篥 -篦 -篩 -篭 -篳 -篷 -簀 -簒 -簡 -簧 -簪 -簫 -簺 -簾 -簿 -籀 -籃 -籌 -籍 -籐 -籟 -籠 -籤 -籬 -米 -籾 -粂 -粉 -粋 -粒 -粕 -粗 -粘 -粛 -粟 -粥 -粧 -粮 -粳 -精 -糊 -糖 -糜 -糞 -糟 -糠 -糧 -糯 -糸 -糺 -系 -糾 -紀 -約 -紅 -紋 -納 -紐 -純 -紗 -紘 -紙 -級 -紛 -素 -紡 -索 -紫 -紬 -累 -細 -紳 -紵 -紹 -紺 -絁 -終 -絃 -組 -絅 -経 -結 -絖 -絞 -絡 -絣 -給 -統 -絲 -絵 -絶 -絹 -絽 -綏 -經 -継 -続 -綜 -綟 -綬 -維 -綱 -網 -綴 -綸 -綺 -綽 -綾 -綿 -緊 -緋 -総 -緑 -緒 -線 -締 -緥 -編 -緩 -緬 -緯 -練 -緻 -縁 -縄 -縅 -縒 -縛 -縞 -縢 -縣 -縦 -縫 -縮 -縹 -總 -績 -繁 -繊 -繋 -繍 -織 -繕 -繝 -繦 -繧 -繰 -繹 -繼 -纂 -纈 -纏 -纐 -纒 -纛 -缶 -罔 -罠 -罧 -罪 -置 -罰 -署 -罵 -罷 -罹 -羂 -羅 -羆 -羇 -羈 -羊 -羌 -美 -群 -羨 -義 -羯 -羲 -羹 -羽 -翁 -翅 -翌 -習 -翔 -翛 -翠 -翡 -翫 -翰 -翺 -翻 -翼 -耀 -老 -考 -者 -耆 -而 -耐 -耕 -耗 -耨 -耳 -耶 -耽 -聊 -聖 -聘 -聚 -聞 -聟 -聡 -聨 -聯 -聰 -聲 -聴 -職 -聾 -肄 -肆 -肇 -肉 -肋 -肌 -肖 -肘 -肛 -肝 -股 -肢 -肥 -肩 -肪 -肯 -肱 -育 -肴 -肺 -胃 -胆 -背 -胎 -胖 -胚 -胝 -胞 -胡 -胤 -胱 -胴 -胸 -能 -脂 -脅 -脆 -脇 -脈 -脊 -脚 -脛 -脩 -脱 -脳 -腋 -腎 -腐 -腑 -腔 -腕 -腫 -腰 -腱 -腸 -腹 -腺 -腿 -膀 -膏 -膚 -膜 -膝 -膠 -膣 -膨 -膩 -膳 -膵 -膾 -膿 -臂 -臆 -臈 -臍 -臓 -臘 -臚 -臣 -臥 -臨 -自 -臭 -至 -致 -臺 -臼 -舂 -舅 -與 -興 -舌 -舍 -舎 -舒 -舖 -舗 -舘 -舜 -舞 -舟 -舩 -航 -般 -舳 -舶 -船 -艇 -艘 -艦 -艮 -良 -色 -艶 -芋 -芒 -芙 -芝 -芥 -芦 -芬 -芭 -芯 -花 -芳 -芸 -芹 -芻 -芽 -芿 -苅 -苑 -苔 -苗 -苛 -苞 -苡 -若 -苦 -苧 -苫 -英 -苴 -苻 -茂 -范 -茄 -茅 -茎 -茗 -茘 -茜 -茨 -茲 -茵 -茶 -茸 -茹 -草 -荊 -荏 -荒 -荘 -荷 -荻 -荼 -莞 -莪 -莫 -莬 -莱 -莵 -莽 -菅 -菊 -菌 -菓 -菖 -菘 -菜 -菟 -菩 -菫 -華 -菱 -菴 -萄 -萊 -萌 -萍 -萎 -萠 -萩 -萬 -萱 -落 -葉 -著 -葛 -葡 -董 -葦 -葩 -葬 -葭 -葱 -葵 -葺 -蒋 -蒐 -蒔 -蒙 -蒟 -蒡 -蒲 -蒸 -蒻 -蒼 -蒿 -蓄 -蓆 -蓉 -蓋 -蓑 -蓬 -蓮 -蓼 -蔀 -蔑 -蔓 -蔚 -蔡 -蔦 -蔬 -蔭 -蔵 -蔽 -蕃 -蕉 -蕊 -蕎 -蕨 -蕩 -蕪 -蕭 -蕾 -薄 -薇 -薊 -薔 -薗 -薙 -薛 -薦 -薨 -薩 -薪 -薫 -薬 -薭 -薮 -藁 -藉 -藍 -藏 -藐 -藝 -藤 -藩 -藪 -藷 -藹 -藺 -藻 -蘂 -蘆 -蘇 -蘊 -蘭 -虎 -虐 -虔 -虚 -虜 -虞 -號 -虫 -虹 -虻 -蚊 -蚕 -蛇 -蛉 -蛍 -蛎 -蛙 -蛛 -蛟 -蛤 -蛭 -蛮 -蛸 -蛹 -蛾 -蜀 -蜂 -蜃 -蜆 -蜊 -蜘 -蜜 -蜷 -蜻 -蝉 -蝋 -蝕 -蝙 -蝠 -蝦 -蝶 -蝿 -螂 -融 -螣 -螺 -蟄 -蟇 -蟠 -蟷 -蟹 -蟻 -蠢 -蠣 -血 -衆 -行 -衍 -衒 -術 -街 -衙 -衛 -衝 -衞 -衡 -衢 -衣 -表 -衫 -衰 -衵 -衷 -衽 -衾 -衿 -袁 -袈 -袋 -袍 -袒 -袖 -袙 -袞 -袢 -被 -袰 -袱 -袴 -袷 -袿 -裁 -裂 -裃 -装 -裏 -裔 -裕 -裘 -裙 -補 -裟 -裡 -裲 -裳 -裴 -裸 -裹 -製 -裾 -褂 -褄 -複 -褌 -褐 -褒 -褥 -褪 -褶 -褻 -襄 -襖 -襞 -襟 -襠 -襦 -襪 -襲 -襴 -襷 -西 -要 -覆 -覇 -覈 -見 -規 -視 -覗 -覚 -覧 -親 -覲 -観 -覺 -觀 -角 -解 -触 -言 -訂 -計 -討 -訓 -託 -記 -訛 -訟 -訢 -訥 -訪 -設 -許 -訳 -訴 -訶 -診 -註 -証 -詐 -詔 -評 -詛 -詞 -詠 -詢 -詣 -試 -詩 -詫 -詮 -詰 -話 -該 -詳 -誄 -誅 -誇 -誉 -誌 -認 -誓 -誕 -誘 -語 -誠 -誡 -誣 -誤 -誥 -誦 -説 -読 -誰 -課 -誼 -誾 -調 -談 -請 -諌 -諍 -諏 -諒 -論 -諚 -諜 -諟 -諡 -諦 -諧 -諫 -諭 -諮 -諱 -諶 -諷 -諸 -諺 -諾 -謀 -謄 -謌 -謎 -謗 -謙 -謚 -講 -謝 -謡 -謫 -謬 -謹 -證 -識 -譚 -譛 -譜 -警 -譬 -譯 -議 -譲 -譴 -護 -讀 -讃 -讐 -讒 -谷 -谿 -豅 -豆 -豊 -豎 -豐 -豚 -象 -豪 -豫 -豹 -貌 -貝 -貞 -負 -財 -貢 -貧 -貨 -販 -貪 -貫 -責 -貯 -貰 -貴 -買 -貸 -費 -貼 -貿 -賀 -賁 -賂 -賃 -賄 -資 -賈 -賊 -賎 -賑 -賓 -賛 -賜 -賞 -賠 -賢 -賣 -賤 -賦 -質 -賭 -購 -賽 -贄 -贅 -贈 -贋 -贔 -贖 -赤 -赦 -走 -赴 -起 -超 -越 -趙 -趣 -足 -趺 -趾 -跋 -跏 -距 -跡 -跨 -跪 -路 -跳 -践 -踊 -踏 -踐 -踞 -踪 -踵 -蹄 -蹉 -蹊 -蹟 -蹲 -蹴 -躅 -躇 -躊 -躍 -躑 -躙 -躪 -身 -躬 -躯 -躰 -車 -軋 -軌 -軍 -軒 -軟 -転 -軸 -軻 -軽 -軾 -較 -載 -輌 -輔 -輜 -輝 -輦 -輩 -輪 -輯 -輸 -輿 -轄 -轍 -轟 -轢 -辛 -辞 -辟 -辥 -辦 -辨 -辰 -辱 -農 -辺 -辻 -込 -迂 -迅 -迎 -近 -返 -迢 -迦 -迪 -迫 -迭 -述 -迷 -迹 -追 -退 -送 -逃 -逅 -逆 -逍 -透 -逐 -逓 -途 -逕 -逗 -這 -通 -逝 -逞 -速 -造 -逢 -連 -逮 -週 -進 -逸 -逼 -遁 -遂 -遅 -遇 -遊 -運 -遍 -過 -遐 -道 -達 -違 -遙 -遜 -遠 -遡 -遣 -遥 -適 -遭 -遮 -遯 -遵 -遷 -選 -遺 -遼 -避 -邀 -邁 -邂 -邃 -還 -邇 -邉 -邊 -邑 -那 -邦 -邨 -邪 -邯 -邵 -邸 -郁 -郊 -郎 -郡 -郢 -部 -郭 -郴 -郵 -郷 -都 -鄂 -鄙 -鄭 -鄰 -鄲 -酉 -酋 -酌 -配 -酎 -酒 -酔 -酢 -酥 -酪 -酬 -酵 -酷 -酸 -醍 -醐 -醒 -醗 -醜 -醤 -醪 -醵 -醸 -采 -釈 -釉 -釋 -里 -重 -野 -量 -釐 -金 -釘 -釜 -針 -釣 -釧 -釿 -鈍 -鈎 -鈐 -鈔 -鈞 -鈦 -鈴 -鈷 -鈸 -鈿 -鉄 -鉇 -鉉 -鉋 -鉛 -鉢 -鉤 -鉦 -鉱 -鉾 -銀 -銃 -銅 -銈 -銑 -銕 -銘 -銚 -銜 -銭 -鋏 -鋒 -鋤 -鋭 -鋲 -鋳 -鋸 -鋺 -鋼 -錆 -錍 -錐 -錘 -錠 -錣 -錦 -錫 -錬 -錯 -録 -錵 -鍋 -鍍 -鍑 -鍔 -鍛 -鍬 -鍮 -鍵 -鍼 -鍾 -鎌 -鎖 -鎗 -鎚 -鎧 -鎬 -鎮 -鎰 -鎹 -鏃 -鏑 -鏡 -鐃 -鐇 -鐐 -鐔 -鐘 -鐙 -鐚 -鐡 -鐵 -鐸 -鑁 -鑊 -鑑 -鑒 -鑚 -鑠 -鑢 -鑰 -鑵 -鑷 -鑼 -鑽 -鑿 -長 -門 -閃 -閇 -閉 -開 -閏 -閑 -間 -閔 -閘 -関 -閣 -閤 -閥 -閦 -閨 -閬 -閲 -閻 -閼 -閾 -闇 -闍 -闔 -闕 -闘 -關 -闡 -闢 -闥 -阜 -阪 -阮 -阯 -防 -阻 -阿 -陀 -陂 -附 -陌 -降 -限 -陛 -陞 -院 -陣 -除 -陥 -陪 -陬 -陰 -陳 -陵 -陶 -陸 -険 -陽 -隅 -隆 -隈 -隊 -隋 -階 -随 -隔 -際 -障 -隠 -隣 -隧 -隷 -隻 -隼 -雀 -雁 -雄 -雅 -集 -雇 -雉 -雊 -雋 -雌 -雍 -雑 -雖 -雙 -雛 -離 -難 -雨 -雪 -雫 -雰 -雲 -零 -雷 -雹 -電 -需 -震 -霊 -霍 -霖 -霜 -霞 -霧 -霰 -露 -靈 -青 -靖 -静 -靜 -非 -面 -革 -靫 -靭 -靱 -靴 -靺 -鞁 -鞄 -鞆 -鞋 -鞍 -鞏 -鞘 -鞠 -鞨 -鞭 -韋 -韓 -韜 -韮 -音 -韶 -韻 -響 -頁 -頂 -頃 -項 -順 -須 -頌 -預 -頑 -頒 -頓 -領 -頚 -頬 -頭 -頴 -頸 -頻 -頼 -顆 -題 -額 -顎 -顔 -顕 -顗 -願 -顛 -類 -顧 -顯 -風 -飛 -食 -飢 -飩 -飫 -飯 -飲 -飴 -飼 -飽 -飾 -餃 -餅 -餉 -養 -餌 -餐 -餓 -餘 -餝 -餡 -館 -饂 -饅 -饉 -饋 -饌 -饒 -饗 -首 -馗 -香 -馨 -馬 -馳 -馴 -駄 -駅 -駆 -駈 -駐 -駒 -駕 -駝 -駿 -騁 -騎 -騏 -騒 -験 -騙 -騨 -騰 -驕 -驚 -驛 -驢 -骨 -骸 -髄 -體 -高 -髙 -髢 -髪 -髭 -髮 -髷 -髻 -鬘 -鬚 -鬢 -鬨 -鬯 -鬱 -鬼 -魁 -魂 -魄 -魅 -魏 -魔 -魚 -魯 -鮎 -鮑 -鮒 -鮪 -鮫 -鮭 -鮮 -鯉 -鯔 -鯖 -鯛 -鯨 -鯰 -鯱 -鰐 -鰒 -鰭 -鰯 -鰰 -鰹 -鰻 -鱈 -鱒 -鱗 -鱧 -鳥 -鳩 -鳰 -鳳 -鳴 -鳶 -鴈 -鴉 -鴎 -鴛 -鴟 -鴦 -鴨 -鴫 -鴻 -鵄 -鵜 -鵞 -鵡 -鵬 -鵲 -鵺 -鶉 -鶏 -鶯 -鶴 -鷄 -鷙 -鷲 -鷹 -鷺 -鸚 -鸞 -鹸 -鹽 -鹿 -麁 -麒 -麓 -麗 -麝 -麞 -麟 -麦 -麩 -麹 -麺 -麻 -麾 -麿 -黄 -黌 -黍 -黒 -黙 -黛 -黠 -鼈 -鼉 -鼎 -鼓 -鼠 -鼻 -齊 -齋 -齟 -齢 -齬 -龍 -龕 -龗 -! -# -% -& -( -) -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -= -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -R -S -T -U -V -W -X -Z -a -c -d -e -f -h -i -j -k -l -m -n -o -p -r -s -t -u -y -z -~ -・ - diff --git a/backend/ppocr/utils/dict/ka_dict.txt b/backend/ppocr/utils/dict/ka_dict.txt deleted file mode 100644 index d506b69..0000000 --- a/backend/ppocr/utils/dict/ka_dict.txt +++ /dev/null @@ -1,153 +0,0 @@ -k -a -_ -i -m -g -/ -1 -2 -I -L -S -V -R -C -0 -v -l -6 -4 -8 -. -j -p -ಗ -ು -ಣ -ಪ -ಡ -ಿ -ಸ -ಲ -ಾ -ದ -್ -7 -5 -3 -ವ -ಷ -ಬ -ಹ -ೆ -9 -ಅ -ಳ -ನ -ರ -ಉ -ಕ -ಎ -ೇ -ಂ -ೈ -ೊ -ೀ -ಯ -ೋ -ತ -ಶ -ಭ -ಧ -ಚ -ಜ -ೂ -ಮ -ಒ -ೃ -ಥ -ಇ -ಟ -ಖ -ಆ -ಞ -ಫ -- -ಢ -ಊ -ಓ -ಐ -ಃ -ಘ -ಝ -ೌ -ಠ -ಛ -ಔ -ಏ -ಈ -ಋ -೨ -೦ -೧ -೮ -೯ -೪ -, -೫ -೭ -೩ -೬ -ಙ -s -c -e -n -w -o -u -t -d -E -A -T -B -Z -N -G -O -q -z -r -x -P -K -M -J -U -D -f -F -h -b -W -Y -y -H -X -Q -' -# -& -! -@ -$ -: -% -é -É -( -? -+ - diff --git a/backend/ppocr/utils/dict/kie_dict/xfund_class_list.txt b/backend/ppocr/utils/dict/kie_dict/xfund_class_list.txt deleted file mode 100644 index faded9f..0000000 --- a/backend/ppocr/utils/dict/kie_dict/xfund_class_list.txt +++ /dev/null @@ -1,4 +0,0 @@ -OTHER -QUESTION -ANSWER -HEADER diff --git a/backend/ppocr/utils/dict/kn_dict.txt b/backend/ppocr/utils/dict/kn_dict.txt deleted file mode 100644 index 33d605c..0000000 --- a/backend/ppocr/utils/dict/kn_dict.txt +++ /dev/null @@ -1,153 +0,0 @@ -k -a -_ -i -m -g -/ -1 -2 -I -L -S -V -R -C -0 -v -l -6 -4 -8 -. -j -p -ಗ -ು -ಣ -ಪ -ಡ -ಿ -ಸ -ಲ -ಾ -ದ -್ -7 -5 -3 -ವ -ಷ -ಬ -ಹ -ೆ -9 -ಅ -ಳ -ನ -ರ -ಉ -ಕ -ಎ -ೇ -ಂ -ೈ -ೊ -ೀ -ಯ -ೋ -ತ -ಶ -ಭ -ಧ -ಚ -ಜ -ೂ -ಮ -ಒ -ೃ -ಥ -ಇ -ಟ -ಖ -ಆ -ಞ -ಫ -- -ಢ -ಊ -ಓ -ಐ -ಃ -ಘ -ಝ -ೌ -ಠ -ಛ -ಔ -ಏ -ಈ -ಋ -೨ -೦ -೧ -೮ -೯ -೪ -, -೫ -೭ -೩ -೬ -ಙ -s -c -e -n -w -o -u -t -d -E -A -T -B -Z -N -G -O -q -z -r -x -P -K -M -J -U -D -f -F -h -b -W -Y -y -H -X -Q -' -# -& -! -@ -$ -: -% -é -É -( -? -+ - diff --git a/backend/ppocr/utils/dict/korean_dict.txt b/backend/ppocr/utils/dict/korean_dict.txt deleted file mode 100644 index a13899f..0000000 --- a/backend/ppocr/utils/dict/korean_dict.txt +++ /dev/null @@ -1,3688 +0,0 @@ -! -" -# -$ -% -& -' -* -+ -- -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -© -° -² -½ -Á -Ä -Å -Ç -É -Í -Î -Ó -Ö -× -Ü -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -ì -í -î -ï -ð -ñ -ò -ó -ô -õ -ö -ø -ú -û -ü -ý -ā -ă -ą -ć -Č -č -đ -ē -ė -ę -ě -ğ -ī -İ -ı -Ł -ł -ń -ň -ō -ř -Ş -ş -Š -š -ţ -ū -ź -ż -Ž -ž -Ș -ș -Α -Δ -α -λ -φ -Г -О -а -в -л -о -р -с -т -я -​ -’ -“ -” -→ -∇ -∼ -「 -」 -ア -カ -グ -ニ -ラ -ン -ㄱ -ㄴ -ㄷ -ㄸ -ㄹ -ㅂ -ㅅ -ㅆ -ㅇ -ㅈ -ㅊ -ㅋ -ㅌ -ㅎ -ㅓ -ㅜ -ㅣ -一 -丁 -七 -三 -上 -下 -不 -丑 -世 -丘 -丞 -中 -丸 -丹 -主 -乃 -久 -之 -乎 -乘 -九 -也 -乳 -乾 -事 -二 -云 -互 -五 -井 -亞 -亡 -交 -亥 -亨 -享 -京 -亭 -人 -仁 -今 -他 -仙 -代 -令 -以 -仰 -仲 -件 -任 -企 -伊 -伍 -伎 -伏 -伐 -休 -伯 -伴 -伸 -佃 -佈 -位 -低 -住 -佐 -何 -佛 -作 -使 -來 -供 -依 -侯 -侵 -侶 -便 -俗 -保 -俠 -信 -修 -俱 -俳 -倉 -個 -倍 -倒 -候 -借 -値 -倫 -倭 -假 -偈 -偉 -偏 -停 -偶 -傅 -傑 -傳 -傷 -傾 -像 -僞 -僥 -僧 -價 -儀 -儉 -儒 -優 -儼 -兀 -允 -元 -兆 -先 -光 -克 -兒 -入 -內 -全 -八 -公 -六 -共 -兵 -其 -具 -典 -兼 -再 -冠 -冥 -冶 -准 -凞 -凡 -凱 -出 -函 -刀 -分 -刊 -刑 -列 -初 -判 -別 -利 -到 -制 -券 -刺 -刻 -則 -前 -剛 -副 -創 -劃 -劑 -力 -功 -加 -劣 -助 -劫 -勇 -動 -務 -勝 -勢 -勳 -勸 -匈 -化 -北 -匠 -區 -十 -千 -午 -半 -卍 -卑 -卒 -卓 -南 -博 -卜 -占 -卦 -印 -危 -卵 -卷 -卽 -卿 -厄 -原 -厦 -去 -參 -又 -叉 -友 -反 -叔 -受 -口 -古 -句 -可 -台 -史 -右 -司 -各 -合 -吉 -同 -名 -后 -吏 -吐 -君 -吠 -吳 -呂 -告 -周 -味 -呵 -命 -和 -咳 -咸 -咽 -哀 -品 -哨 -哮 -哲 -唐 -唯 -唱 -商 -問 -啼 -善 -喆 -喉 -喜 -喩 -喪 -嘗 -器 -嚴 -囊 -四 -回 -因 -困 -固 -圈 -國 -圍 -園 -圓 -圖 -團 -土 -在 -地 -均 -坊 -坐 -坑 -坵 -型 -垢 -城 -域 -埴 -執 -培 -基 -堂 -堅 -堆 -堤 -堯 -報 -場 -塔 -塚 -塞 -塵 -境 -墜 -墟 -墨 -墳 -墾 -壁 -壇 -壓 -壤 -士 -壬 -壯 -壺 -壽 -夏 -夕 -外 -多 -夜 -夢 -大 -天 -太 -夫 -央 -失 -夷 -奄 -奇 -奉 -奎 -奏 -契 -奔 -奮 -女 -奴 -好 -如 -妄 -妊 -妖 -妙 -始 -姑 -姓 -姚 -姜 -威 -婆 -婚 -婦 -媒 -媚 -子 -孔 -字 -存 -孝 -孟 -季 -孤 -孫 -學 -孺 -宇 -守 -安 -宋 -宗 -官 -宙 -定 -客 -宣 -室 -宮 -害 -家 -容 -寂 -寃 -寄 -寅 -密 -寇 -富 -寒 -寓 -實 -審 -寫 -寬 -寶 -寸 -寺 -封 -將 -專 -尊 -對 -小 -少 -尙 -尹 -尼 -尿 -局 -居 -屈 -屋 -屍 -屎 -屛 -層 -屬 -山 -岐 -岡 -岩 -岳 -岸 -峙 -峰 -島 -峻 -峽 -崇 -崔 -崖 -崩 -嶋 -巖 -川 -州 -巢 -工 -左 -巧 -巨 -巫 -差 -己 -巷 -市 -布 -帝 -師 -帶 -常 -帽 -幕 -干 -平 -年 -幹 -幻 -幼 -幽 -庇 -序 -店 -府 -度 -座 -庫 -庭 -康 -廟 -廣 -廳 -延 -廷 -建 -廻 -弁 -式 -弑 -弓 -引 -弘 -弟 -弱 -張 -强 -弼 -彌 -彛 -形 -彬 -影 -役 -彼 -彿 -往 -征 -待 -律 -後 -徐 -徑 -得 -從 -循 -微 -德 -徹 -心 -必 -忌 -忍 -志 -忠 -思 -怡 -急 -性 -恐 -恒 -恨 -恩 -悅 -悖 -患 -悲 -情 -惑 -惟 -惠 -惡 -想 -惺 -愁 -意 -愚 -愛 -感 -愼 -慈 -態 -慕 -慣 -慧 -慾 -憂 -憤 -憺 -應 -懸 -戎 -成 -我 -戟 -戮 -戰 -戴 -戶 -房 -所 -手 -才 -打 -批 -承 -技 -抄 -把 -抗 -抱 -抽 -拇 -拓 -拘 -拙 -拜 -拾 -持 -指 -捌 -捨 -捿 -授 -掌 -排 -接 -推 -提 -揚 -揭 -援 -損 -搗 -摩 -播 -操 -擒 -擔 -擘 -據 -擧 -攘 -攝 -攬 -支 -改 -攻 -放 -政 -故 -敍 -敎 -救 -敗 -散 -敬 -整 -數 -文 -斗 -料 -斛 -斜 -斧 -斯 -新 -斷 -方 -於 -施 -旋 -族 -旗 -日 -旨 -早 -旱 -昌 -明 -易 -昔 -星 -春 -昧 -昭 -是 -時 -晉 -晋 -晩 -普 -景 -晴 -晶 -智 -暈 -暑 -暗 -暘 -曉 -曜 -曠 -曦 -曰 -曲 -書 -曹 -曼 -曾 -最 -會 -月 -有 -朋 -服 -望 -朝 -期 -木 -未 -末 -本 -朱 -朴 -李 -材 -村 -杖 -杜 -杞 -杭 -杯 -東 -松 -板 -林 -果 -枝 -枯 -枰 -枾 -柏 -柑 -柱 -栗 -校 -栢 -核 -根 -格 -桀 -桂 -案 -桎 -桑 -桓 -桔 -梁 -梏 -梓 -梗 -條 -梨 -梵 -棗 -棟 -森 -植 -椒 -楊 -楓 -楚 -業 -楮 -極 -榮 -槃 -槍 -樂 -樓 -樗 -樣 -樸 -樹 -樺 -樽 -橄 -橋 -橘 -機 -橡 -檀 -檎 -權 -欌 -欖 -次 -欲 -歌 -歐 -止 -正 -此 -步 -武 -歲 -歸 -死 -殖 -段 -殷 -殺 -殿 -毅 -母 -毒 -比 -毛 -氏 -民 -氣 -水 -永 -求 -汎 -汗 -江 -池 -沅 -沒 -沖 -沙 -沛 -河 -油 -治 -沼 -沿 -泉 -泊 -法 -泗 -泡 -波 -注 -泰 -洋 -洙 -洛 -洞 -津 -洲 -活 -派 -流 -浅 -浦 -浮 -浴 -海 -涅 -涇 -消 -涌 -液 -淑 -淡 -淨 -淫 -深 -淳 -淵 -淸 -渠 -渡 -游 -渾 -湖 -湯 -源 -溪 -溫 -溶 -滄 -滅 -滋 -滯 -滿 -漁 -漆 -漢 -漫 -漸 -潑 -潤 -潭 -澄 -澎 -澤 -澳 -澹 -濁 -濕 -濟 -濤 -濯 -瀋 -瀝 -灣 -火 -灰 -灸 -災 -炎 -炭 -点 -烈 -烏 -烙 -焚 -無 -焦 -然 -煌 -煎 -照 -煬 -煮 -熟 -熱 -燁 -燈 -燔 -燕 -燥 -燧 -燮 -爲 -爵 -父 -片 -版 -牌 -牛 -牝 -牟 -牡 -物 -特 -犧 -犬 -狀 -狗 -猥 -猩 -猪 -獨 -獵 -獸 -獻 -玄 -玉 -王 -玲 -珍 -珠 -珪 -班 -現 -球 -理 -琴 -瑞 -瑟 -瑪 -璃 -璋 -璽 -瓜 -瓦 -甑 -甘 -生 -産 -用 -甫 -田 -由 -甲 -申 -男 -界 -畏 -留 -畜 -畢 -略 -番 -異 -畵 -當 -畸 -疏 -疑 -疫 -疹 -疼 -病 -症 -痔 -痛 -痺 -瘀 -瘍 -瘡 -療 -癌 -癖 -登 -發 -白 -百 -的 -皆 -皇 -皮 -盂 -盆 -益 -盛 -盜 -盟 -盡 -盤 -盧 -目 -直 -相 -省 -看 -眞 -眼 -睡 -督 -瞋 -矢 -矣 -知 -短 -石 -破 -碍 -碑 -磁 -磨 -磬 -示 -社 -祇 -祖 -祝 -神 -祥 -祭 -祺 -禁 -禅 -禍 -福 -禦 -禪 -禮 -禹 -禽 -禾 -秀 -私 -秉 -秋 -科 -秘 -秤 -秦 -秩 -移 -稀 -稗 -種 -稱 -稷 -稼 -稽 -穀 -穆 -積 -空 -窮 -竅 -立 -章 -童 -竭 -端 -竹 -笑 -符 -第 -筆 -等 -筍 -答 -策 -箋 -箕 -管 -箱 -節 -篇 -簡 -米 -粉 -粘 -粥 -精 -糖 -糞 -系 -紀 -紂 -約 -紅 -紋 -純 -紙 -級 -素 -索 -紫 -紬 -累 -細 -紳 -終 -組 -結 -絡 -統 -絲 -絶 -絹 -經 -綠 -維 -綱 -網 -綸 -綽 -緖 -線 -緣 -緯 -縣 -縱 -總 -織 -繡 -繩 -繪 -繭 -纂 -續 -罕 -置 -罰 -羅 -羊 -美 -群 -義 -羽 -翁 -習 -翟 -老 -考 -者 -而 -耐 -耕 -耳 -聃 -聖 -聞 -聰 -聲 -職 -肇 -肉 -肖 -肝 -股 -肥 -育 -肺 -胃 -胎 -胚 -胞 -胡 -胥 -能 -脂 -脈 -脚 -脛 -脣 -脩 -脫 -脯 -脾 -腋 -腎 -腫 -腸 -腹 -膜 -膠 -膨 -膽 -臆 -臟 -臣 -臥 -臨 -自 -至 -致 -臺 -臼 -臾 -與 -興 -舊 -舌 -舍 -舒 -舜 -舟 -般 -船 -艦 -良 -色 -芋 -花 -芳 -芽 -苑 -苔 -苕 -苛 -苞 -若 -苦 -英 -茂 -茵 -茶 -茹 -荀 -荇 -草 -荒 -荷 -莊 -莫 -菊 -菌 -菜 -菩 -菫 -華 -菴 -菽 -萊 -萍 -萬 -落 -葉 -著 -葛 -董 -葬 -蒙 -蒜 -蒲 -蒸 -蒿 -蓮 -蔓 -蔘 -蔡 -蔬 -蕃 -蕉 -蕓 -薄 -薑 -薛 -薩 -薪 -薺 -藏 -藝 -藤 -藥 -藩 -藻 -蘆 -蘇 -蘊 -蘚 -蘭 -虎 -處 -虛 -虞 -虹 -蜀 -蜂 -蜜 -蝕 -蝶 -融 -蟬 -蟲 -蠶 -蠻 -血 -衆 -行 -術 -衛 -衡 -衣 -表 -袁 -裔 -裕 -裙 -補 -製 -複 -襄 -西 -要 -見 -視 -親 -覺 -觀 -角 -解 -言 -訂 -訊 -訓 -託 -記 -訣 -設 -診 -註 -評 -詩 -話 -詵 -誅 -誌 -認 -誕 -語 -誠 -誤 -誥 -誦 -說 -調 -談 -諍 -論 -諡 -諫 -諭 -諸 -謙 -講 -謝 -謠 -證 -識 -譚 -譜 -譯 -議 -護 -讀 -變 -谷 -豆 -豊 -豚 -象 -豪 -豫 -貝 -貞 -財 -貧 -貨 -貪 -貫 -貴 -貸 -費 -資 -賊 -賓 -賞 -賢 -賣 -賦 -質 -贍 -赤 -赫 -走 -起 -超 -越 -趙 -趣 -趨 -足 -趾 -跋 -跡 -路 -踏 -蹟 -身 -躬 -車 -軍 -軒 -軟 -載 -輓 -輕 -輪 -輯 -輸 -輻 -輿 -轅 -轉 -辨 -辭 -辯 -辰 -農 -近 -迦 -述 -追 -逆 -透 -逐 -通 -逝 -造 -逢 -連 -進 -逵 -遂 -遊 -運 -遍 -過 -道 -達 -遠 -遡 -適 -遷 -選 -遺 -遽 -還 -邊 -邑 -那 -邪 -郞 -郡 -部 -都 -鄒 -鄕 -鄭 -鄲 -配 -酒 -酸 -醉 -醫 -醯 -釋 -里 -重 -野 -量 -釐 -金 -針 -鈍 -鈴 -鉞 -銀 -銅 -銘 -鋼 -錄 -錢 -錦 -鎭 -鏡 -鐘 -鐵 -鑑 -鑛 -長 -門 -閃 -開 -間 -閔 -閣 -閥 -閭 -閻 -闕 -關 -阪 -防 -阿 -陀 -降 -限 -陝 -院 -陰 -陳 -陵 -陶 -陸 -陽 -隆 -隊 -隋 -階 -際 -障 -隣 -隨 -隱 -隷 -雀 -雄 -雅 -集 -雇 -雌 -雖 -雙 -雜 -離 -難 -雨 -雪 -雲 -電 -霜 -露 -靈 -靑 -靖 -靜 -非 -面 -革 -靴 -鞏 -韓 -音 -韶 -韻 -順 -須 -頊 -頌 -領 -頭 -顔 -願 -顚 -類 -顯 -風 -飛 -食 -飢 -飮 -飯 -飾 -養 -餓 -餘 -首 -香 -馨 -馬 -駒 -騫 -騷 -驕 -骨 -骸 -髓 -體 -高 -髥 -髮 -鬪 -鬱 -鬼 -魏 -魔 -魚 -魯 -鮮 -鰍 -鰐 -鳥 -鳧 -鳳 -鴨 -鵲 -鶴 -鷄 -鷹 -鹽 -鹿 -麗 -麥 -麻 -黃 -黑 -默 -點 -黨 -鼎 -齊 -齋 -齒 -龍 -龜 -가 -각 -간 -갇 -갈 -갉 -감 -갑 -값 -갓 -갔 -강 -갖 -갗 -같 -갚 -갛 -개 -객 -갠 -갤 -갬 -갭 -갯 -갰 -갱 -갸 -걀 -걔 -걘 -거 -걱 -건 -걷 -걸 -검 -겁 -것 -겄 -겅 -겆 -겉 -겊 -겋 -게 -겐 -겔 -겟 -겠 -겡 -겨 -격 -겪 -견 -결 -겸 -겹 -겻 -겼 -경 -곁 -계 -곕 -곗 -고 -곡 -곤 -곧 -골 -곪 -곬 -곯 -곰 -곱 -곳 -공 -곶 -과 -곽 -관 -괄 -괌 -광 -괘 -괜 -괭 -괴 -괸 -굉 -교 -구 -국 -군 -굳 -굴 -굵 -굶 -굼 -굽 -굿 -궁 -궂 -궈 -권 -궐 -궜 -궝 -궤 -귀 -귄 -귈 -귓 -규 -균 -귤 -그 -극 -근 -글 -긁 -금 -급 -긋 -긍 -기 -긴 -길 -김 -깁 -깃 -깅 -깊 -까 -깍 -깎 -깐 -깔 -깜 -깝 -깟 -깡 -깥 -깨 -깬 -깰 -깻 -깼 -깽 -꺄 -꺼 -꺽 -꺾 -껀 -껄 -껌 -껍 -껏 -껐 -껑 -께 -껴 -꼈 -꼍 -꼐 -꼬 -꼭 -꼴 -꼼 -꼽 -꼿 -꽁 -꽂 -꽃 -꽉 -꽝 -꽤 -꽥 -꾀 -꾜 -꾸 -꾹 -꾼 -꿀 -꿇 -꿈 -꿉 -꿋 -꿍 -꿎 -꿔 -꿨 -꿩 -꿰 -꿴 -뀄 -뀌 -뀐 -뀔 -뀜 -뀝 -끄 -끈 -끊 -끌 -끓 -끔 -끕 -끗 -끙 -끝 -끼 -끽 -낀 -낄 -낌 -낍 -낏 -낑 -나 -낙 -낚 -난 -낟 -날 -낡 -남 -납 -낫 -났 -낭 -낮 -낯 -낱 -낳 -내 -낵 -낸 -낼 -냄 -냅 -냇 -냈 -냉 -냐 -냔 -냘 -냥 -너 -넉 -넋 -넌 -널 -넓 -넘 -넙 -넛 -넜 -넝 -넣 -네 -넥 -넨 -넬 -넴 -넵 -넷 -넸 -넹 -녀 -녁 -년 -념 -녔 -녕 -녘 -녜 -노 -녹 -논 -놀 -놈 -놋 -농 -높 -놓 -놔 -놨 -뇌 -뇨 -뇩 -뇽 -누 -눅 -눈 -눌 -눔 -눕 -눗 -눠 -눴 -뉘 -뉜 -뉩 -뉴 -늄 -늅 -늉 -느 -늑 -는 -늘 -늙 -늠 -늡 -능 -늦 -늪 -늬 -니 -닉 -닌 -닐 -님 -닙 -닛 -닝 -닢 -다 -닥 -닦 -단 -닫 -달 -닭 -닮 -닯 -닳 -담 -답 -닷 -당 -닻 -닿 -대 -댁 -댄 -댈 -댐 -댑 -댓 -댔 -댕 -댜 -더 -덕 -덖 -던 -덜 -덟 -덤 -덥 -덧 -덩 -덫 -덮 -데 -덱 -덴 -델 -뎀 -뎃 -뎅 -뎌 -뎠 -뎨 -도 -독 -돈 -돋 -돌 -돔 -돕 -돗 -동 -돛 -돝 -돼 -됐 -되 -된 -될 -됨 -됩 -됴 -두 -둑 -둔 -둘 -둠 -둡 -둣 -둥 -둬 -뒀 -뒤 -뒬 -뒷 -뒹 -듀 -듈 -듐 -드 -득 -든 -듣 -들 -듦 -듬 -듭 -듯 -등 -듸 -디 -딕 -딘 -딛 -딜 -딤 -딥 -딧 -딨 -딩 -딪 -따 -딱 -딴 -딸 -땀 -땄 -땅 -때 -땐 -땔 -땜 -땝 -땠 -땡 -떠 -떡 -떤 -떨 -떫 -떰 -떱 -떳 -떴 -떵 -떻 -떼 -떽 -뗀 -뗄 -뗍 -뗏 -뗐 -뗑 -또 -똑 -똘 -똥 -뙤 -뚜 -뚝 -뚤 -뚫 -뚱 -뛰 -뛴 -뛸 -뜀 -뜁 -뜨 -뜩 -뜬 -뜯 -뜰 -뜸 -뜻 -띄 -띈 -띌 -띔 -띕 -띠 -띤 -띨 -띱 -띵 -라 -락 -란 -랄 -람 -랍 -랏 -랐 -랑 -랒 -랗 -래 -랙 -랜 -랠 -램 -랩 -랫 -랬 -랭 -랴 -략 -량 -러 -럭 -런 -럴 -럼 -럽 -럿 -렀 -렁 -렇 -레 -렉 -렌 -렐 -렘 -렙 -렛 -렝 -려 -력 -련 -렬 -렴 -렵 -렷 -렸 -령 -례 -로 -록 -론 -롤 -롬 -롭 -롯 -롱 -롸 -롹 -뢰 -뢴 -뢸 -룃 -료 -룐 -룡 -루 -룩 -룬 -룰 -룸 -룹 -룻 -룽 -뤄 -뤘 -뤼 -류 -륙 -륜 -률 -륨 -륭 -르 -륵 -른 -를 -름 -릅 -릇 -릉 -릎 -리 -릭 -린 -릴 -림 -립 -릿 -링 -마 -막 -만 -많 -맏 -말 -맑 -맘 -맙 -맛 -망 -맞 -맡 -맣 -매 -맥 -맨 -맬 -맴 -맵 -맷 -맸 -맹 -맺 -먀 -먁 -머 -먹 -먼 -멀 -멈 -멋 -멍 -멎 -메 -멕 -멘 -멜 -멤 -멥 -멧 -멩 -며 -멱 -면 -멸 -몄 -명 -몇 -모 -목 -몫 -몬 -몰 -몸 -몹 -못 -몽 -뫼 -묘 -무 -묵 -묶 -문 -묻 -물 -묽 -뭄 -뭅 -뭇 -뭉 -뭍 -뭏 -뭐 -뭔 -뭘 -뭡 -뭣 -뮈 -뮌 -뮐 -뮤 -뮬 -므 -믈 -믐 -미 -믹 -민 -믿 -밀 -밈 -밉 -밋 -밌 -밍 -및 -밑 -바 -박 -밖 -반 -받 -발 -밝 -밟 -밤 -밥 -밧 -방 -밭 -배 -백 -밴 -밸 -뱀 -뱁 -뱃 -뱄 -뱅 -뱉 -뱍 -뱐 -버 -벅 -번 -벌 -범 -법 -벗 -벙 -벚 -베 -벡 -벤 -벨 -벰 -벱 -벳 -벵 -벼 -벽 -변 -별 -볍 -볏 -볐 -병 -볕 -보 -복 -볶 -본 -볼 -봄 -봅 -봇 -봉 -봐 -봤 -뵈 -뵐 -뵙 -부 -북 -분 -붇 -불 -붉 -붐 -붓 -붕 -붙 -뷔 -뷰 -뷴 -뷸 -브 -븐 -블 -비 -빅 -빈 -빌 -빔 -빕 -빗 -빙 -빚 -빛 -빠 -빡 -빤 -빨 -빳 -빴 -빵 -빻 -빼 -빽 -뺀 -뺄 -뺌 -뺏 -뺐 -뺑 -뺨 -뻐 -뻑 -뻔 -뻗 -뻘 -뻣 -뻤 -뻥 -뻬 -뼈 -뼉 -뼘 -뽀 -뽈 -뽐 -뽑 -뽕 -뾰 -뿌 -뿍 -뿐 -뿔 -뿜 -쁘 -쁜 -쁠 -쁨 -삐 -삔 -삘 -사 -삭 -삯 -산 -살 -삵 -삶 -삼 -삽 -삿 -샀 -상 -샅 -새 -색 -샌 -샐 -샘 -샙 -샛 -샜 -생 -샤 -샨 -샬 -샴 -샵 -샷 -샹 -서 -석 -섞 -선 -섣 -설 -섬 -섭 -섯 -섰 -성 -섶 -세 -섹 -센 -셀 -셈 -셉 -셋 -셌 -셍 -셔 -션 -셜 -셨 -셰 -셴 -셸 -소 -속 -손 -솔 -솜 -솝 -솟 -송 -솥 -쇄 -쇠 -쇤 -쇳 -쇼 -숀 -숄 -숍 -수 -숙 -순 -숟 -술 -숨 -숩 -숫 -숭 -숯 -숱 -숲 -숴 -쉐 -쉘 -쉬 -쉭 -쉰 -쉴 -쉼 -쉽 -슈 -슐 -슘 -슛 -슝 -스 -슥 -슨 -슬 -슭 -슴 -습 -슷 -승 -시 -식 -신 -싣 -실 -싫 -심 -십 -싯 -싱 -싶 -싸 -싹 -싼 -쌀 -쌈 -쌉 -쌌 -쌍 -쌓 -쌔 -쌘 -쌩 -써 -썩 -썬 -썰 -썸 -썹 -썼 -썽 -쎄 -쎈 -쏘 -쏙 -쏜 -쏟 -쏠 -쏭 -쏴 -쐈 -쐐 -쐬 -쑤 -쑥 -쑨 -쒀 -쒔 -쓰 -쓱 -쓴 -쓸 -씀 -씁 -씌 -씨 -씩 -씬 -씰 -씸 -씹 -씻 -씽 -아 -악 -안 -앉 -않 -알 -앎 -앓 -암 -압 -앗 -았 -앙 -앞 -애 -액 -앤 -앨 -앰 -앱 -앳 -앴 -앵 -야 -약 -얀 -얄 -얇 -얌 -얍 -얏 -양 -얕 -얗 -얘 -얜 -어 -억 -언 -얹 -얻 -얼 -얽 -엄 -업 -없 -엇 -었 -엉 -엊 -엌 -엎 -에 -엑 -엔 -엘 -엠 -엡 -엣 -엥 -여 -역 -엮 -연 -열 -엷 -염 -엽 -엾 -엿 -였 -영 -옅 -옆 -옇 -예 -옌 -옐 -옙 -옛 -오 -옥 -온 -올 -옭 -옮 -옳 -옴 -옵 -옷 -옹 -옻 -와 -왁 -완 -왈 -왑 -왓 -왔 -왕 -왜 -왠 -왱 -외 -왼 -요 -욕 -욘 -욜 -욤 -용 -우 -욱 -운 -울 -움 -웁 -웃 -웅 -워 -웍 -원 -월 -웜 -웠 -웡 -웨 -웬 -웰 -웸 -웹 -위 -윅 -윈 -윌 -윔 -윗 -윙 -유 -육 -윤 -율 -윱 -윳 -융 -으 -윽 -은 -을 -읊 -음 -읍 -응 -의 -읜 -읠 -이 -익 -인 -일 -읽 -잃 -임 -입 -잇 -있 -잉 -잊 -잎 -자 -작 -잔 -잖 -잘 -잠 -잡 -잣 -잤 -장 -잦 -재 -잭 -잰 -잴 -잽 -잿 -쟀 -쟁 -쟈 -쟉 -쟤 -저 -적 -전 -절 -젊 -점 -접 -젓 -정 -젖 -제 -젝 -젠 -젤 -젬 -젭 -젯 -져 -젼 -졀 -졌 -졍 -조 -족 -존 -졸 -좀 -좁 -종 -좇 -좋 -좌 -좍 -좽 -죄 -죠 -죤 -주 -죽 -준 -줄 -줌 -줍 -줏 -중 -줘 -줬 -쥐 -쥔 -쥘 -쥬 -쥴 -즈 -즉 -즌 -즐 -즘 -즙 -증 -지 -직 -진 -짇 -질 -짊 -짐 -집 -짓 -징 -짖 -짙 -짚 -짜 -짝 -짠 -짢 -짤 -짧 -짬 -짭 -짰 -짱 -째 -짹 -짼 -쨀 -쨉 -쨋 -쨌 -쨍 -쩄 -쩌 -쩍 -쩐 -쩔 -쩜 -쩝 -쩡 -쩨 -쪄 -쪘 -쪼 -쪽 -쪾 -쫀 -쫄 -쫑 -쫓 -쫙 -쬐 -쭈 -쭉 -쭐 -쭙 -쯔 -쯤 -쯧 -찌 -찍 -찐 -찔 -찜 -찝 -찡 -찢 -찧 -차 -착 -찬 -찮 -찰 -참 -찹 -찻 -찼 -창 -찾 -채 -책 -챈 -챌 -챔 -챕 -챗 -챘 -챙 -챠 -챤 -처 -척 -천 -철 -첨 -첩 -첫 -청 -체 -첵 -첸 -첼 -쳄 -쳇 -쳉 -쳐 -쳔 -쳤 -초 -촉 -촌 -촘 -촛 -총 -촨 -촬 -최 -쵸 -추 -축 -춘 -출 -춤 -춥 -춧 -충 -춰 -췄 -췌 -취 -췬 -츄 -츠 -측 -츨 -츰 -층 -치 -칙 -친 -칠 -칡 -침 -칩 -칫 -칭 -카 -칵 -칸 -칼 -캄 -캅 -캇 -캉 -캐 -캔 -캘 -캠 -캡 -캣 -캤 -캥 -캬 -커 -컥 -컨 -컫 -컬 -컴 -컵 -컷 -컸 -컹 -케 -켄 -켈 -켐 -켓 -켕 -켜 -켠 -켤 -켭 -켯 -켰 -코 -콕 -콘 -콜 -콤 -콥 -콧 -콩 -콰 -콱 -콴 -콸 -쾅 -쾌 -쾡 -쾨 -쾰 -쿄 -쿠 -쿡 -쿤 -쿨 -쿰 -쿵 -쿼 -퀀 -퀄 -퀘 -퀭 -퀴 -퀵 -퀸 -퀼 -큐 -큘 -크 -큰 -클 -큼 -큽 -키 -킥 -킨 -킬 -킴 -킵 -킷 -킹 -타 -탁 -탄 -탈 -탉 -탐 -탑 -탓 -탔 -탕 -태 -택 -탠 -탤 -탬 -탭 -탯 -탰 -탱 -터 -턱 -턴 -털 -텀 -텁 -텃 -텄 -텅 -테 -텍 -텐 -텔 -템 -텝 -텡 -텨 -톈 -토 -톡 -톤 -톨 -톰 -톱 -톳 -통 -퇴 -툇 -투 -툭 -툰 -툴 -툼 -퉁 -퉈 -퉜 -튀 -튄 -튈 -튕 -튜 -튠 -튤 -튬 -트 -특 -튼 -튿 -틀 -틈 -틉 -틋 -틔 -티 -틱 -틴 -틸 -팀 -팁 -팅 -파 -팍 -팎 -판 -팔 -팜 -팝 -팟 -팠 -팡 -팥 -패 -팩 -팬 -팰 -팸 -팻 -팼 -팽 -퍼 -퍽 -펀 -펄 -펌 -펍 -펐 -펑 -페 -펙 -펜 -펠 -펨 -펩 -펫 -펭 -펴 -편 -펼 -폄 -폈 -평 -폐 -포 -폭 -폰 -폴 -폼 -폿 -퐁 -표 -푭 -푸 -푹 -푼 -풀 -품 -풋 -풍 -퓨 -퓬 -퓰 -퓸 -프 -픈 -플 -픔 -픕 -피 -픽 -핀 -필 -핌 -핍 -핏 -핑 -하 -학 -한 -할 -핥 -함 -합 -핫 -항 -해 -핵 -핸 -핼 -햄 -햅 -햇 -했 -행 -햐 -향 -헀 -허 -헉 -헌 -헐 -험 -헙 -헛 -헝 -헤 -헥 -헨 -헬 -헴 -헵 -헷 -헹 -혀 -혁 -현 -혈 -혐 -협 -혓 -혔 -형 -혜 -호 -혹 -혼 -홀 -홈 -홉 -홋 -홍 -홑 -화 -확 -환 -활 -홧 -황 -홰 -홱 -횃 -회 -획 -횝 -횟 -횡 -효 -후 -훅 -훈 -훌 -훑 -훔 -훗 -훤 -훨 -훼 -휄 -휑 -휘 -휙 -휜 -휠 -휩 -휭 -휴 -휼 -흄 -흉 -흐 -흑 -흔 -흘 -흙 -흠 -흡 -흣 -흥 -흩 -희 -흰 -흽 -히 -힉 -힌 -힐 -힘 -힙 -힝 -車 -滑 -金 -奈 -羅 -洛 -卵 -欄 -蘭 -郎 -來 -盧 -老 -魯 -綠 -鹿 -論 -雷 -樓 -縷 -凌 -樂 -不 -參 -葉 -沈 -若 -兩 -凉 -梁 -呂 -女 -廬 -麗 -黎 -曆 -歷 -戀 -蓮 -連 -列 -烈 -裂 -念 -獵 -靈 -領 -例 -禮 -醴 -惡 -尿 -料 -遼 -龍 -暈 -柳 -流 -類 -六 -陸 -倫 -律 -栗 -利 -李 -梨 -理 -離 -燐 -林 -臨 -立 -茶 -切 -宅 - diff --git a/backend/ppocr/utils/dict/latin_dict.txt b/backend/ppocr/utils/dict/latin_dict.txt deleted file mode 100644 index e166bf3..0000000 --- a/backend/ppocr/utils/dict/latin_dict.txt +++ /dev/null @@ -1,185 +0,0 @@ - -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -] -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -} -¡ -£ -§ -ª -« -­ -° -² -³ -´ -µ -· -º -» -¿ -À -Á - -Ä -Å -Ç -È -É -Ê -Ë -Ì -Í -Î -Ï -Ò -Ó -Ô -Õ -Ö -Ú -Ü -Ý -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -ì -í -î -ï -ñ -ò -ó -ô -õ -ö -ø -ù -ú -û -ü -ý -ą -Ć -ć -Č -č -Đ -đ -ę -ı -Ł -ł -ō -Œ -œ -Š -š -Ÿ -Ž -ž -ʒ -β -δ -ε -з -Ṡ -‘ -€ -™ diff --git a/backend/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt b/backend/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt deleted file mode 100644 index 8be0f48..0000000 --- a/backend/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt +++ /dev/null @@ -1,10 +0,0 @@ -text -title -figure -figure_caption -table -table_caption -header -footer -reference -equation \ No newline at end of file diff --git a/backend/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt b/backend/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt deleted file mode 100644 index ca6acf4..0000000 --- a/backend/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt +++ /dev/null @@ -1,5 +0,0 @@ -text -title -list -table -figure \ No newline at end of file diff --git a/backend/ppocr/utils/dict/layout_dict/layout_table_dict.txt b/backend/ppocr/utils/dict/layout_dict/layout_table_dict.txt deleted file mode 100644 index faea15e..0000000 --- a/backend/ppocr/utils/dict/layout_dict/layout_table_dict.txt +++ /dev/null @@ -1 +0,0 @@ -table \ No newline at end of file diff --git a/backend/ppocr/utils/dict/mr_dict.txt b/backend/ppocr/utils/dict/mr_dict.txt deleted file mode 100644 index 283b150..0000000 --- a/backend/ppocr/utils/dict/mr_dict.txt +++ /dev/null @@ -1,153 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ँ -ं -ः -अ -आ -इ -ई -उ -ऊ -ए -ऐ -ऑ -ओ -औ -क -ख -ग -घ -च -छ -ज -झ -ञ -ट -ठ -ड -ढ -ण -त -थ -द -ध -न -प -फ -ब -भ -म -य -र -ऱ -ल -ळ -व -श -ष -स -ह -़ -ा -ि -ी -ु -ू -ृ -ॅ -े -ै -ॉ -ो -ौ -् -० -१ -२ -३ -४ -५ -६ -७ -८ -९ diff --git a/backend/ppocr/utils/dict/ne_dict.txt b/backend/ppocr/utils/dict/ne_dict.txt deleted file mode 100644 index 5a7df95..0000000 --- a/backend/ppocr/utils/dict/ne_dict.txt +++ /dev/null @@ -1,153 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -ः -अ -आ -इ -ई -उ -ऊ -ऋ -ए -ऐ -ओ -औ -क -ख -ग -घ -ङ -च -छ -ज -झ -ञ -ट -ठ -ड -ढ -ण -त -थ -द -ध -न -ऩ -प -फ -ब -भ -म -य -र -ऱ -ल -व -श -ष -स -ह -़ -ा -ि -ी -ु -ू -ृ -े -ै -ो -ौ -् -॒ -ॠ -। -० -१ -२ -३ -४ -५ -६ -७ -८ -९ diff --git a/backend/ppocr/utils/dict/oc_dict.txt b/backend/ppocr/utils/dict/oc_dict.txt deleted file mode 100644 index e88af8b..0000000 --- a/backend/ppocr/utils/dict/oc_dict.txt +++ /dev/null @@ -1,96 +0,0 @@ -o -c -_ -i -m -g -/ -2 -0 -I -L -S -V -R -C -1 -v -a -l -4 -3 -. -j -p -r -e -è -t -9 -7 -5 -8 -n -' -b -s -6 -q -u -á -d -ò -à -h -z -f -ï -í -A -ç -x -ó -é -P -O -Ò -ü -k -À -F -- -ú -­ -æ -Á -D -E -w -K -T -N -y -U -Z -G -B -J -H -M -W -Y -X -Q -% -$ -, -@ -& -! -: -( -# -? -+ -É - diff --git a/backend/ppocr/utils/dict/pt_dict.txt b/backend/ppocr/utils/dict/pt_dict.txt deleted file mode 100644 index 9500fae..0000000 --- a/backend/ppocr/utils/dict/pt_dict.txt +++ /dev/null @@ -1,130 +0,0 @@ -p -u -_ -i -m -g -/ -8 -I -L -S -V -R -C -2 -0 -1 -v -a -l -6 -7 -4 -5 -. -j - -q -e -s -t -ã -o -x -9 -c -n -r -z -ç -õ -3 -A -U -d -º -ô -­ -, -E -; -ó -á -b -D -? -ú -ê -- -h -P -f -à -N -í -O -M -G -É -é -â -F -: -T -Á -" -Q -) -W -J -B -H -( -ö -% -Ö -« -w -K -y -! -k -] -' -Z -+ -Ç -Õ -Y -À -X -µ -» -ª -Í -ü -ä -´ -è -ñ -ß -ï -Ú -ë -Ô -Ï -Ó -[ -Ì -< - -ò -§ -³ -ø -å -# -$ -& -@ diff --git a/backend/ppocr/utils/dict/pu_dict.txt b/backend/ppocr/utils/dict/pu_dict.txt deleted file mode 100644 index 9500fae..0000000 --- a/backend/ppocr/utils/dict/pu_dict.txt +++ /dev/null @@ -1,130 +0,0 @@ -p -u -_ -i -m -g -/ -8 -I -L -S -V -R -C -2 -0 -1 -v -a -l -6 -7 -4 -5 -. -j - -q -e -s -t -ã -o -x -9 -c -n -r -z -ç -õ -3 -A -U -d -º -ô -­ -, -E -; -ó -á -b -D -? -ú -ê -- -h -P -f -à -N -í -O -M -G -É -é -â -F -: -T -Á -" -Q -) -W -J -B -H -( -ö -% -Ö -« -w -K -y -! -k -] -' -Z -+ -Ç -Õ -Y -À -X -µ -» -ª -Í -ü -ä -´ -è -ñ -ß -ï -Ú -ë -Ô -Ï -Ó -[ -Ì -< - -ò -§ -³ -ø -å -# -$ -& -@ diff --git a/backend/ppocr/utils/dict/rs_cyrillic_dict.txt b/backend/ppocr/utils/dict/rs_cyrillic_dict.txt deleted file mode 100644 index 95dd463..0000000 --- a/backend/ppocr/utils/dict/rs_cyrillic_dict.txt +++ /dev/null @@ -1,134 +0,0 @@ -r -s -c -_ -i -m -g -/ -5 -I -L -S -V -R -C -2 -0 -1 -v -a -l -9 -7 -8 -. -j -p -м -а -с -и -р -ћ -е -ш -3 -4 -о -г -н -з -в -л -6 -т -ж -у -к -п -њ -д -ч -С -ј -ф -ц -љ -х -О -И -А -б -Ш -К -ђ -џ -М -В -З -Д -Р -У -Н -Т -Б -? -П -Х -Ј -Ц -Г -Љ -Л -Ф -e -n -w -E -F -A -N -f -o -b -M -G -t -y -W -k -P -u -H -B -T -z -h -O -Y -d -U -K -D -x -X -J -Z -Q -q -' -- -@ -é -# -! -, -% -$ -: -& -+ -( -É - diff --git a/backend/ppocr/utils/dict/rs_dict.txt b/backend/ppocr/utils/dict/rs_dict.txt deleted file mode 100644 index d1ce46d..0000000 --- a/backend/ppocr/utils/dict/rs_dict.txt +++ /dev/null @@ -1,91 +0,0 @@ -r -s -_ -i -m -g -/ -1 -I -L -S -V -R -C -2 -0 -v -a -l -7 -5 -8 -6 -. -j -p - -t -d -9 -3 -e -š -4 -k -u -ć -c -n -đ -o -z -č -b -ž -f -Z -T -h -M -F -O -Š -B -H -A -E -Đ -Ž -D -P -G -Č -K -U -N -J -Ć -w -y -W -x -Y -X -q -Q -# -& -$ -, -- -% -' -@ -! -: -? -( -É -é -+ diff --git a/backend/ppocr/utils/dict/rs_latin_dict.txt b/backend/ppocr/utils/dict/rs_latin_dict.txt deleted file mode 100644 index d1ce46d..0000000 --- a/backend/ppocr/utils/dict/rs_latin_dict.txt +++ /dev/null @@ -1,91 +0,0 @@ -r -s -_ -i -m -g -/ -1 -I -L -S -V -R -C -2 -0 -v -a -l -7 -5 -8 -6 -. -j -p - -t -d -9 -3 -e -š -4 -k -u -ć -c -n -đ -o -z -č -b -ž -f -Z -T -h -M -F -O -Š -B -H -A -E -Đ -Ž -D -P -G -Č -K -U -N -J -Ć -w -y -W -x -Y -X -q -Q -# -& -$ -, -- -% -' -@ -! -: -? -( -É -é -+ diff --git a/backend/ppocr/utils/dict/rsc_dict.txt b/backend/ppocr/utils/dict/rsc_dict.txt deleted file mode 100644 index 95dd463..0000000 --- a/backend/ppocr/utils/dict/rsc_dict.txt +++ /dev/null @@ -1,134 +0,0 @@ -r -s -c -_ -i -m -g -/ -5 -I -L -S -V -R -C -2 -0 -1 -v -a -l -9 -7 -8 -. -j -p -м -а -с -и -р -ћ -е -ш -3 -4 -о -г -н -з -в -л -6 -т -ж -у -к -п -њ -д -ч -С -ј -ф -ц -љ -х -О -И -А -б -Ш -К -ђ -џ -М -В -З -Д -Р -У -Н -Т -Б -? -П -Х -Ј -Ц -Г -Љ -Л -Ф -e -n -w -E -F -A -N -f -o -b -M -G -t -y -W -k -P -u -H -B -T -z -h -O -Y -d -U -K -D -x -X -J -Z -Q -q -' -- -@ -é -# -! -, -% -$ -: -& -+ -( -É - diff --git a/backend/ppocr/utils/dict/ru_dict.txt b/backend/ppocr/utils/dict/ru_dict.txt deleted file mode 100644 index aff9c16..0000000 --- a/backend/ppocr/utils/dict/ru_dict.txt +++ /dev/null @@ -1,163 +0,0 @@ - -! -# -$ -% -& -' -( -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -_ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -É -é -Ё -Є -І -Ј -Љ -Ў -А -Б -В -Г -Д -Е -Ж -З -И -Й -К -Л -М -Н -О -П -Р -С -Т -У -Ф -Х -Ц -Ч -Ш -Щ -Ъ -Ы -Ь -Э -Ю -Я -а -б -в -г -д -е -ж -з -и -й -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ъ -ы -ь -э -ю -я -ё -ђ -є -і -ј -љ -њ -ћ -ў -џ -Ґ -ґ diff --git a/backend/ppocr/utils/dict/spin_dict.txt b/backend/ppocr/utils/dict/spin_dict.txt deleted file mode 100644 index 8ee8347..0000000 --- a/backend/ppocr/utils/dict/spin_dict.txt +++ /dev/null @@ -1,68 +0,0 @@ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -: -( -' -- -, -% -> -. -[ -? -) -" -= -_ -* -] -; -& -+ -$ -@ -/ -| -! -< -# -` -{ -~ -\ -} -^ \ No newline at end of file diff --git a/backend/ppocr/utils/dict/ta_dict.txt b/backend/ppocr/utils/dict/ta_dict.txt deleted file mode 100644 index 19d8189..0000000 --- a/backend/ppocr/utils/dict/ta_dict.txt +++ /dev/null @@ -1,128 +0,0 @@ -t -a -_ -i -m -g -/ -3 -I -L -S -V -R -C -2 -0 -1 -v -l -9 -7 -8 -. -j -p -ப -ூ -த -ம -ி -வ -ர -் -ந -ோ -ன -6 -ஆ -ற -ல -5 -ள -ா -ொ -ழ -ு -4 -ெ -ண -க -ட -ை -ே -ச -ய -ஒ -இ -அ -ங -உ -ீ -ஞ -எ -ஓ -ஃ -ஜ -ஷ -ஸ -ஏ -ஊ -ஹ -ஈ -ஐ -ௌ -ஔ -s -c -e -n -w -F -T -O -P -K -A -N -G -Y -E -M -H -U -B -o -b -D -d -r -W -u -y -f -X -k -q -h -J -z -Z -Q -x -- -' -$ -, -% -@ -é -! -# -+ -É -& -: -( -? - diff --git a/backend/ppocr/utils/dict/table_dict.txt b/backend/ppocr/utils/dict/table_dict.txt deleted file mode 100644 index 2ef028c..0000000 --- a/backend/ppocr/utils/dict/table_dict.txt +++ /dev/null @@ -1,277 +0,0 @@ -← - -☆ -─ -α - - -⋅ -$ -ω -ψ -χ -( -υ -≥ -σ -, -ρ -ε -0 -■ -4 -8 -✗ -b -< -✓ -Ψ -Ω -€ -D -3 -Π -H -║ - -L -Φ -Χ -θ -P -κ -λ -μ -T -ξ -X -β -γ -δ -\ -ζ -η -` -d - -h -f -l -Θ -p -√ -t - -x -Β -Γ -Δ -| -ǂ -ɛ -j -̧ -➢ -⁡ -̌ -′ -« -△ -▲ -# - -' -Ι -+ -¶ -/ -▼ -⇑ -□ -· -7 -▪ -; -? -➔ -∩ -C -÷ -G -⇒ -K - -O -S -С -W -Α -[ -○ -_ -● -‡ -c -z -g - -o - -〈 -〉 -s -⩽ -w -φ -ʹ -{ -» -∣ -̆ -e -ˆ -∈ -τ -◆ -ι -∅ -∆ -∙ -∘ -Ø -ß -✔ -∞ -∑ -− -× -◊ -∗ -∖ -˃ -˂ -∫ -" -i -& -π -↔ -* -∥ -æ -∧ -. -⁄ -ø -Q -∼ -6 -⁎ -: -★ -> -a -B -≈ -F -J -̄ -N -♯ -R -V - -― -Z -♣ -^ -¤ -¥ -§ - -¢ -£ -≦ -­ -≤ -‖ -Λ -© -n -↓ -→ -↑ -r -° -± -v - -♂ -k -♀ -~ -ᅟ -̇ -@ -” -♦ -ł -® -⊕ -„ -! - -% -⇓ -) -- -1 -5 -9 -= -А -A -‰ -⋆ -Σ -E -◦ -I -※ -M -m -̨ -⩾ -† - -• -U -Y -
 -] -̸ -2 -‐ -– -‒ -̂ -— -̀ -́ -’ -‘ -⋮ -⋯ -̊ -“ -̈ -≧ -q -u -ı -y - -​ -̃ -} -ν diff --git a/backend/ppocr/utils/dict/table_master_structure_dict.txt b/backend/ppocr/utils/dict/table_master_structure_dict.txt deleted file mode 100644 index 95ab253..0000000 --- a/backend/ppocr/utils/dict/table_master_structure_dict.txt +++ /dev/null @@ -1,39 +0,0 @@ - - - - - - - - - - - colspan="2" - colspan="3" - - - rowspan="2" - colspan="4" - colspan="6" - rowspan="3" - colspan="9" - colspan="10" - colspan="7" - rowspan="4" - rowspan="5" - rowspan="9" - colspan="8" - rowspan="8" - rowspan="6" - rowspan="7" - rowspan="10" - - - - - - - - diff --git a/backend/ppocr/utils/dict/table_structure_dict.txt b/backend/ppocr/utils/dict/table_structure_dict.txt deleted file mode 100644 index 8edb10b..0000000 --- a/backend/ppocr/utils/dict/table_structure_dict.txt +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - - colspan="2" - colspan="3" - rowspan="2" - colspan="4" - colspan="6" - rowspan="3" - colspan="9" - colspan="10" - colspan="7" - rowspan="4" - rowspan="5" - rowspan="9" - colspan="8" - rowspan="8" - rowspan="6" - rowspan="7" - rowspan="10" \ No newline at end of file diff --git a/backend/ppocr/utils/dict/table_structure_dict_ch.txt b/backend/ppocr/utils/dict/table_structure_dict_ch.txt deleted file mode 100644 index 0c59c0e..0000000 --- a/backend/ppocr/utils/dict/table_structure_dict_ch.txt +++ /dev/null @@ -1,48 +0,0 @@ - - - - - - - - - - colspan="2" - colspan="3" - colspan="4" - colspan="5" - colspan="6" - colspan="7" - colspan="8" - colspan="9" - colspan="10" - colspan="11" - colspan="12" - colspan="13" - colspan="14" - colspan="15" - colspan="16" - colspan="17" - colspan="18" - colspan="19" - colspan="20" - rowspan="2" - rowspan="3" - rowspan="4" - rowspan="5" - rowspan="6" - rowspan="7" - rowspan="8" - rowspan="9" - rowspan="10" - rowspan="11" - rowspan="12" - rowspan="13" - rowspan="14" - rowspan="15" - rowspan="16" - rowspan="17" - rowspan="18" - rowspan="19" - rowspan="20" diff --git a/backend/ppocr/utils/dict/te_dict.txt b/backend/ppocr/utils/dict/te_dict.txt deleted file mode 100644 index 83d74cc..0000000 --- a/backend/ppocr/utils/dict/te_dict.txt +++ /dev/null @@ -1,151 +0,0 @@ -t -e -_ -i -m -g -/ -5 -I -L -S -V -R -C -2 -0 -1 -v -a -l -3 -4 -8 -9 -. -j -p -త -ె -ర -క -్ -ి -ం -చ -ే -ద -ు -7 -6 -ఉ -ా -మ -ట -ో -వ -ప -ల -శ -ఆ -య -ై -భ -' -ీ -గ -ూ -డ -ధ -హ -న -జ -స -[ -‌ -ష -అ -ణ -ఫ -బ -ఎ -; -ళ -థ -ొ -ఠ -ృ -ఒ -ఇ -ః -ఊ -ఖ -- -ఐ -ఘ -ౌ -ఏ -ఈ -ఛ -, -ఓ -ఞ -| -? -: -ఢ -" -( -” -! -+ -) -* -= -& -“ -€ -] -£ -$ -s -c -n -w -k -J -G -u -d -r -E -o -h -y -b -f -B -M -O -T -N -D -P -A -F -x -W -Y -U -H -K -X -z -Z -Q -q -É -% -# -@ -é diff --git a/backend/ppocr/utils/dict/ug_dict.txt b/backend/ppocr/utils/dict/ug_dict.txt deleted file mode 100644 index 77602f2..0000000 --- a/backend/ppocr/utils/dict/ug_dict.txt +++ /dev/null @@ -1,114 +0,0 @@ -u -g -_ -i -m -/ -1 -I -L -S -V -R -C -2 -0 -v -a -l -8 -5 -3 -6 -9 -. -j -p - -ق -ا -پ -ل -4 -7 -ئ -ى -ش -ت -ي -ك -د -ف -ر -و -ن -ب -ە -خ -ې -چ -ۇ -ز -س -م -ۋ -گ -ڭ -ۆ -ۈ -ج -غ -ھ -ژ -s -c -e -n -w -P -E -D -U -d -r -b -y -B -o -O -Y -N -T -k -t -h -A -H -F -z -W -K -G -M -f -Z -X -Q -J -x -q -- -! -% -# -? -: -$ -, -& -' -É -@ -é -( -+ diff --git a/backend/ppocr/utils/dict/uk_dict.txt b/backend/ppocr/utils/dict/uk_dict.txt deleted file mode 100644 index c5ffc0a..0000000 --- a/backend/ppocr/utils/dict/uk_dict.txt +++ /dev/null @@ -1,142 +0,0 @@ -u -k -_ -i -m -g -/ -1 -6 -I -L -S -V -R -C -2 -0 -v -a -l -7 -9 -. -j -p -в -і -д -п -о -н -с -т -ю -4 -5 -3 -а -и -м -е -р -ч -у -Б -з -л -к -8 -А -В -г -є -б -ь -х -ґ -ш -ц -ф -я -щ -ж -Г -Х -У -Т -Е -І -Н -П -З -Л -Ю -С -Д -М -К -Р -Ф -О -Ц -И -Я -Ч -Ш -Ж -Є -Ґ -Ь -s -c -e -n -w -A -P -r -E -t -o -h -d -y -M -G -N -F -B -T -D -U -O -W -Z -f -H -Y -b -K -z -x -Q -X -q -J -$ -- -' -# -& -% -? -: -! -, -+ -@ -( -é -É - diff --git a/backend/ppocr/utils/dict/ur_dict.txt b/backend/ppocr/utils/dict/ur_dict.txt deleted file mode 100644 index c06786a..0000000 --- a/backend/ppocr/utils/dict/ur_dict.txt +++ /dev/null @@ -1,137 +0,0 @@ -u -r -_ -i -m -g -/ -3 -I -L -S -V -R -C -2 -0 -1 -v -a -l -9 -7 -8 -. -j -p - -چ -ٹ -پ -ا -ئ -ی -ے -4 -6 -و -ل -ن -ڈ -ھ -ک -ت -ش -ف -ق -ر -د -5 -ب -ج -خ -ہ -س -ز -غ -ڑ -ں -آ -م -ؤ -ط -ص -ح -ع -گ -ث -ض -ذ -ۓ -ِ -ء -ظ -ً -ي -ُ -ۃ -أ -ٰ -ە -ژ -ۂ -ة -ّ -ك -ه -s -c -e -n -w -o -d -t -D -M -T -U -E -b -P -h -y -W -H -A -x -B -O -N -G -Y -Q -F -k -K -q -J -Z -f -z -X -' -@ -& -! -, -: -$ -- -# -? -% -é -+ -( -É diff --git a/backend/ppocr/utils/dict/xi_dict.txt b/backend/ppocr/utils/dict/xi_dict.txt deleted file mode 100644 index f195f1e..0000000 --- a/backend/ppocr/utils/dict/xi_dict.txt +++ /dev/null @@ -1,110 +0,0 @@ -x -i -_ -m -g -/ -1 -0 -I -L -S -V -R -C -2 -v -a -l -3 -6 -4 -5 -. -j -p - -Q -u -e -r -o -8 -7 -n -c -9 -t -b -é -q -d -ó -y -F -s -, -O -í -T -f -" -U -M -h -: -P -H -A -E -D -z -N -á -ñ -ú -% -; -è -+ -Y -- -B -G -( -) -¿ -? -w -¡ -! -X -É -K -k -Á -ü -Ú -« -» -J -' -ö -W -Z -º -Ö -­ -[ -] -Ç -ç -à -ä -û -ò -Í -ê -ô -ø -ª diff --git a/backend/ppocr/utils/e2e_metric/Deteval.py b/backend/ppocr/utils/e2e_metric/Deteval.py deleted file mode 100755 index 45567a7..0000000 --- a/backend/ppocr/utils/e2e_metric/Deteval.py +++ /dev/null @@ -1,574 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import scipy.io as io -from ppocr.utils.e2e_metric.polygon_fast import iod, area_of_intersection, area - - -def get_socre_A(gt_dir, pred_dict): - allInputs = 1 - - def input_reading_mod(pred_dict): - """This helper reads input from txt files""" - det = [] - n = len(pred_dict) - for i in range(n): - points = pred_dict[i]['points'] - text = pred_dict[i]['texts'] - point = ",".join(map(str, points.reshape(-1, ))) - det.append([point, text]) - return det - - def gt_reading_mod(gt_dict): - """This helper reads groundtruths from mat files""" - gt = [] - n = len(gt_dict) - for i in range(n): - points = gt_dict[i]['points'].tolist() - h = len(points) - text = gt_dict[i]['text'] - xx = [ - np.array( - ['x:'], dtype=' 1): - gt_x = list(map(int, np.squeeze(gt[1]))) - gt_y = list(map(int, np.squeeze(gt[3]))) - for det_id, detection in enumerate(detections): - detection_orig = detection - detection = [float(x) for x in detection[0].split(',')] - detection = list(map(int, detection)) - det_x = detection[0::2] - det_y = detection[1::2] - det_gt_iou = iod(det_x, det_y, gt_x, gt_y) - if det_gt_iou > threshold: - detections[det_id] = [] - - detections[:] = [item for item in detections if item != []] - return detections - - def sigma_calculation(det_x, det_y, gt_x, gt_y): - """ - sigma = inter_area / gt_area - """ - return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / - area(gt_x, gt_y)), 2) - - def tau_calculation(det_x, det_y, gt_x, gt_y): - if area(det_x, det_y) == 0.0: - return 0 - return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / - area(det_x, det_y)), 2) - - ##############################Initialization################################### - # global_sigma = [] - # global_tau = [] - # global_pred_str = [] - # global_gt_str = [] - ############################################################################### - - for input_id in range(allInputs): - if (input_id != '.DS_Store') and (input_id != 'Pascal_result.txt') and ( - input_id != 'Pascal_result_curved.txt') and (input_id != 'Pascal_result_non_curved.txt') and ( - input_id != 'Deteval_result.txt') and (input_id != 'Deteval_result_curved.txt') \ - and (input_id != 'Deteval_result_non_curved.txt'): - detections = input_reading_mod(pred_dict) - groundtruths = gt_reading_mod(gt_dir) - detections = detection_filtering( - detections, - groundtruths) # filters detections overlapping with DC area - dc_id = [] - for i in range(len(groundtruths)): - if groundtruths[i][5] == '#': - dc_id.append(i) - cnt = 0 - for a in dc_id: - num = a - cnt - del groundtruths[num] - cnt += 1 - - local_sigma_table = np.zeros((len(groundtruths), len(detections))) - local_tau_table = np.zeros((len(groundtruths), len(detections))) - local_pred_str = {} - local_gt_str = {} - - for gt_id, gt in enumerate(groundtruths): - if len(detections) > 0: - for det_id, detection in enumerate(detections): - detection_orig = detection - detection = [float(x) for x in detection[0].split(',')] - detection = list(map(int, detection)) - pred_seq_str = detection_orig[1].strip() - det_x = detection[0::2] - det_y = detection[1::2] - gt_x = list(map(int, np.squeeze(gt[1]))) - gt_y = list(map(int, np.squeeze(gt[3]))) - gt_seq_str = str(gt[4].tolist()[0]) - - local_sigma_table[gt_id, det_id] = sigma_calculation( - det_x, det_y, gt_x, gt_y) - local_tau_table[gt_id, det_id] = tau_calculation( - det_x, det_y, gt_x, gt_y) - local_pred_str[det_id] = pred_seq_str - local_gt_str[gt_id] = gt_seq_str - - global_sigma = local_sigma_table - global_tau = local_tau_table - global_pred_str = local_pred_str - global_gt_str = local_gt_str - - single_data = {} - single_data['sigma'] = global_sigma - single_data['global_tau'] = global_tau - single_data['global_pred_str'] = global_pred_str - single_data['global_gt_str'] = global_gt_str - return single_data - - -def get_socre_B(gt_dir, img_id, pred_dict): - allInputs = 1 - - def input_reading_mod(pred_dict): - """This helper reads input from txt files""" - det = [] - n = len(pred_dict) - for i in range(n): - points = pred_dict[i]['points'] - text = pred_dict[i]['texts'] - point = ",".join(map(str, points.reshape(-1, ))) - det.append([point, text]) - return det - - def gt_reading_mod(gt_dir, gt_id): - gt = io.loadmat('%s/poly_gt_img%s.mat' % (gt_dir, gt_id)) - gt = gt['polygt'] - return gt - - def detection_filtering(detections, groundtruths, threshold=0.5): - for gt_id, gt in enumerate(groundtruths): - if (gt[5] == '#') and (gt[1].shape[1] > 1): - gt_x = list(map(int, np.squeeze(gt[1]))) - gt_y = list(map(int, np.squeeze(gt[3]))) - for det_id, detection in enumerate(detections): - detection_orig = detection - detection = [float(x) for x in detection[0].split(',')] - detection = list(map(int, detection)) - det_x = detection[0::2] - det_y = detection[1::2] - det_gt_iou = iod(det_x, det_y, gt_x, gt_y) - if det_gt_iou > threshold: - detections[det_id] = [] - - detections[:] = [item for item in detections if item != []] - return detections - - def sigma_calculation(det_x, det_y, gt_x, gt_y): - """ - sigma = inter_area / gt_area - """ - return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / - area(gt_x, gt_y)), 2) - - def tau_calculation(det_x, det_y, gt_x, gt_y): - if area(det_x, det_y) == 0.0: - return 0 - return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / - area(det_x, det_y)), 2) - - ##############################Initialization################################### - # global_sigma = [] - # global_tau = [] - # global_pred_str = [] - # global_gt_str = [] - ############################################################################### - - for input_id in range(allInputs): - if (input_id != '.DS_Store') and (input_id != 'Pascal_result.txt') and ( - input_id != 'Pascal_result_curved.txt') and (input_id != 'Pascal_result_non_curved.txt') and ( - input_id != 'Deteval_result.txt') and (input_id != 'Deteval_result_curved.txt') \ - and (input_id != 'Deteval_result_non_curved.txt'): - detections = input_reading_mod(pred_dict) - groundtruths = gt_reading_mod(gt_dir, img_id).tolist() - detections = detection_filtering( - detections, - groundtruths) # filters detections overlapping with DC area - dc_id = [] - for i in range(len(groundtruths)): - if groundtruths[i][5] == '#': - dc_id.append(i) - cnt = 0 - for a in dc_id: - num = a - cnt - del groundtruths[num] - cnt += 1 - - local_sigma_table = np.zeros((len(groundtruths), len(detections))) - local_tau_table = np.zeros((len(groundtruths), len(detections))) - local_pred_str = {} - local_gt_str = {} - - for gt_id, gt in enumerate(groundtruths): - if len(detections) > 0: - for det_id, detection in enumerate(detections): - detection_orig = detection - detection = [float(x) for x in detection[0].split(',')] - detection = list(map(int, detection)) - pred_seq_str = detection_orig[1].strip() - det_x = detection[0::2] - det_y = detection[1::2] - gt_x = list(map(int, np.squeeze(gt[1]))) - gt_y = list(map(int, np.squeeze(gt[3]))) - gt_seq_str = str(gt[4].tolist()[0]) - - local_sigma_table[gt_id, det_id] = sigma_calculation( - det_x, det_y, gt_x, gt_y) - local_tau_table[gt_id, det_id] = tau_calculation( - det_x, det_y, gt_x, gt_y) - local_pred_str[det_id] = pred_seq_str - local_gt_str[gt_id] = gt_seq_str - - global_sigma = local_sigma_table - global_tau = local_tau_table - global_pred_str = local_pred_str - global_gt_str = local_gt_str - - single_data = {} - single_data['sigma'] = global_sigma - single_data['global_tau'] = global_tau - single_data['global_pred_str'] = global_pred_str - single_data['global_gt_str'] = global_gt_str - return single_data - - -def combine_results(all_data): - tr = 0.7 - tp = 0.6 - fsc_k = 0.8 - k = 2 - global_sigma = [] - global_tau = [] - global_pred_str = [] - global_gt_str = [] - for data in all_data: - global_sigma.append(data['sigma']) - global_tau.append(data['global_tau']) - global_pred_str.append(data['global_pred_str']) - global_gt_str.append(data['global_gt_str']) - - global_accumulative_recall = 0 - global_accumulative_precision = 0 - total_num_gt = 0 - total_num_det = 0 - hit_str_count = 0 - hit_count = 0 - - def one_to_one(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idy): - hit_str_num = 0 - for gt_id in range(num_gt): - gt_matching_qualified_sigma_candidates = np.where( - local_sigma_table[gt_id, :] > tr) - gt_matching_num_qualified_sigma_candidates = gt_matching_qualified_sigma_candidates[ - 0].shape[0] - gt_matching_qualified_tau_candidates = np.where( - local_tau_table[gt_id, :] > tp) - gt_matching_num_qualified_tau_candidates = gt_matching_qualified_tau_candidates[ - 0].shape[0] - - det_matching_qualified_sigma_candidates = np.where( - local_sigma_table[:, gt_matching_qualified_sigma_candidates[0]] - > tr) - det_matching_num_qualified_sigma_candidates = det_matching_qualified_sigma_candidates[ - 0].shape[0] - det_matching_qualified_tau_candidates = np.where( - local_tau_table[:, gt_matching_qualified_tau_candidates[0]] > - tp) - det_matching_num_qualified_tau_candidates = det_matching_qualified_tau_candidates[ - 0].shape[0] - - if (gt_matching_num_qualified_sigma_candidates == 1) and (gt_matching_num_qualified_tau_candidates == 1) and \ - (det_matching_num_qualified_sigma_candidates == 1) and ( - det_matching_num_qualified_tau_candidates == 1): - global_accumulative_recall = global_accumulative_recall + 1.0 - global_accumulative_precision = global_accumulative_precision + 1.0 - local_accumulative_recall = local_accumulative_recall + 1.0 - local_accumulative_precision = local_accumulative_precision + 1.0 - - gt_flag[0, gt_id] = 1 - matched_det_id = np.where(local_sigma_table[gt_id, :] > tr) - # recg start - gt_str_cur = global_gt_str[idy][gt_id] - pred_str_cur = global_pred_str[idy][matched_det_id[0].tolist()[ - 0]] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - # recg end - det_flag[0, matched_det_id] = 1 - return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag, hit_str_num - - def one_to_many(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idy): - hit_str_num = 0 - for gt_id in range(num_gt): - # skip the following if the groundtruth was matched - if gt_flag[0, gt_id] > 0: - continue - - non_zero_in_sigma = np.where(local_sigma_table[gt_id, :] > 0) - num_non_zero_in_sigma = non_zero_in_sigma[0].shape[0] - - if num_non_zero_in_sigma >= k: - ####search for all detections that overlaps with this groundtruth - qualified_tau_candidates = np.where((local_tau_table[ - gt_id, :] >= tp) & (det_flag[0, :] == 0)) - num_qualified_tau_candidates = qualified_tau_candidates[ - 0].shape[0] - - if num_qualified_tau_candidates == 1: - if ((local_tau_table[gt_id, qualified_tau_candidates] >= tp) - and - (local_sigma_table[gt_id, qualified_tau_candidates] >= - tr)): - # became an one-to-one case - global_accumulative_recall = global_accumulative_recall + 1.0 - global_accumulative_precision = global_accumulative_precision + 1.0 - local_accumulative_recall = local_accumulative_recall + 1.0 - local_accumulative_precision = local_accumulative_precision + 1.0 - - gt_flag[0, gt_id] = 1 - det_flag[0, qualified_tau_candidates] = 1 - # recg start - gt_str_cur = global_gt_str[idy][gt_id] - pred_str_cur = global_pred_str[idy][ - qualified_tau_candidates[0].tolist()[0]] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - # recg end - elif (np.sum(local_sigma_table[gt_id, qualified_tau_candidates]) - >= tr): - gt_flag[0, gt_id] = 1 - det_flag[0, qualified_tau_candidates] = 1 - # recg start - gt_str_cur = global_gt_str[idy][gt_id] - pred_str_cur = global_pred_str[idy][ - qualified_tau_candidates[0].tolist()[0]] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - # recg end - - global_accumulative_recall = global_accumulative_recall + fsc_k - global_accumulative_precision = global_accumulative_precision + num_qualified_tau_candidates * fsc_k - - local_accumulative_recall = local_accumulative_recall + fsc_k - local_accumulative_precision = local_accumulative_precision + num_qualified_tau_candidates * fsc_k - - return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag, hit_str_num - - def many_to_one(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idy): - hit_str_num = 0 - for det_id in range(num_det): - # skip the following if the detection was matched - if det_flag[0, det_id] > 0: - continue - - non_zero_in_tau = np.where(local_tau_table[:, det_id] > 0) - num_non_zero_in_tau = non_zero_in_tau[0].shape[0] - - if num_non_zero_in_tau >= k: - ####search for all detections that overlaps with this groundtruth - qualified_sigma_candidates = np.where(( - local_sigma_table[:, det_id] >= tp) & (gt_flag[0, :] == 0)) - num_qualified_sigma_candidates = qualified_sigma_candidates[ - 0].shape[0] - - if num_qualified_sigma_candidates == 1: - if ((local_tau_table[qualified_sigma_candidates, det_id] >= - tp) and - (local_sigma_table[qualified_sigma_candidates, det_id] - >= tr)): - # became an one-to-one case - global_accumulative_recall = global_accumulative_recall + 1.0 - global_accumulative_precision = global_accumulative_precision + 1.0 - local_accumulative_recall = local_accumulative_recall + 1.0 - local_accumulative_precision = local_accumulative_precision + 1.0 - - gt_flag[0, qualified_sigma_candidates] = 1 - det_flag[0, det_id] = 1 - # recg start - pred_str_cur = global_pred_str[idy][det_id] - gt_len = len(qualified_sigma_candidates[0]) - for idx in range(gt_len): - ele_gt_id = qualified_sigma_candidates[0].tolist()[ - idx] - if ele_gt_id not in global_gt_str[idy]: - continue - gt_str_cur = global_gt_str[idy][ele_gt_id] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - break - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - break - # recg end - elif (np.sum(local_tau_table[qualified_sigma_candidates, - det_id]) >= tp): - det_flag[0, det_id] = 1 - gt_flag[0, qualified_sigma_candidates] = 1 - # recg start - pred_str_cur = global_pred_str[idy][det_id] - gt_len = len(qualified_sigma_candidates[0]) - for idx in range(gt_len): - ele_gt_id = qualified_sigma_candidates[0].tolist()[idx] - if ele_gt_id not in global_gt_str[idy]: - continue - gt_str_cur = global_gt_str[idy][ele_gt_id] - if pred_str_cur == gt_str_cur: - hit_str_num += 1 - break - else: - if pred_str_cur.lower() == gt_str_cur.lower(): - hit_str_num += 1 - break - # recg end - - global_accumulative_recall = global_accumulative_recall + num_qualified_sigma_candidates * fsc_k - global_accumulative_precision = global_accumulative_precision + fsc_k - - local_accumulative_recall = local_accumulative_recall + num_qualified_sigma_candidates * fsc_k - local_accumulative_precision = local_accumulative_precision + fsc_k - return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag, hit_str_num - - for idx in range(len(global_sigma)): - local_sigma_table = np.array(global_sigma[idx]) - local_tau_table = global_tau[idx] - - num_gt = local_sigma_table.shape[0] - num_det = local_sigma_table.shape[1] - - total_num_gt = total_num_gt + num_gt - total_num_det = total_num_det + num_det - - local_accumulative_recall = 0 - local_accumulative_precision = 0 - gt_flag = np.zeros((1, num_gt)) - det_flag = np.zeros((1, num_det)) - - #######first check for one-to-one case########## - local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ - gt_flag, det_flag, hit_str_num = one_to_one(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idx) - - hit_str_count += hit_str_num - #######then check for one-to-many case########## - local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ - gt_flag, det_flag, hit_str_num = one_to_many(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idx) - hit_str_count += hit_str_num - #######then check for many-to-one case########## - local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ - gt_flag, det_flag, hit_str_num = many_to_one(local_sigma_table, local_tau_table, - local_accumulative_recall, local_accumulative_precision, - global_accumulative_recall, global_accumulative_precision, - gt_flag, det_flag, idx) - hit_str_count += hit_str_num - - try: - recall = global_accumulative_recall / total_num_gt - except ZeroDivisionError: - recall = 0 - - try: - precision = global_accumulative_precision / total_num_det - except ZeroDivisionError: - precision = 0 - - try: - f_score = 2 * precision * recall / (precision + recall) - except ZeroDivisionError: - f_score = 0 - - try: - seqerr = 1 - float(hit_str_count) / global_accumulative_recall - except ZeroDivisionError: - seqerr = 1 - - try: - recall_e2e = float(hit_str_count) / total_num_gt - except ZeroDivisionError: - recall_e2e = 0 - - try: - precision_e2e = float(hit_str_count) / total_num_det - except ZeroDivisionError: - precision_e2e = 0 - - try: - f_score_e2e = 2 * precision_e2e * recall_e2e / ( - precision_e2e + recall_e2e) - except ZeroDivisionError: - f_score_e2e = 0 - - final = { - 'total_num_gt': total_num_gt, - 'total_num_det': total_num_det, - 'global_accumulative_recall': global_accumulative_recall, - 'hit_str_count': hit_str_count, - 'recall': recall, - 'precision': precision, - 'f_score': f_score, - 'seqerr': seqerr, - 'recall_e2e': recall_e2e, - 'precision_e2e': precision_e2e, - 'f_score_e2e': f_score_e2e - } - return final diff --git a/backend/ppocr/utils/e2e_metric/polygon_fast.py b/backend/ppocr/utils/e2e_metric/polygon_fast.py deleted file mode 100755 index 81c9ad7..0000000 --- a/backend/ppocr/utils/e2e_metric/polygon_fast.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -from shapely.geometry import Polygon -""" -:param det_x: [1, N] Xs of detection's vertices -:param det_y: [1, N] Ys of detection's vertices -:param gt_x: [1, N] Xs of groundtruth's vertices -:param gt_y: [1, N] Ys of groundtruth's vertices - -############## -All the calculation of 'AREA' in this script is handled by: -1) First generating a binary mask with the polygon area filled up with 1's -2) Summing up all the 1's -""" - - -def area(x, y): - polygon = Polygon(np.stack([x, y], axis=1)) - return float(polygon.area) - - -def approx_area_of_intersection(det_x, det_y, gt_x, gt_y): - """ - This helper determine if both polygons are intersecting with each others with an approximation method. - Area of intersection represented by the minimum bounding rectangular [xmin, ymin, xmax, ymax] - """ - det_ymax = np.max(det_y) - det_xmax = np.max(det_x) - det_ymin = np.min(det_y) - det_xmin = np.min(det_x) - - gt_ymax = np.max(gt_y) - gt_xmax = np.max(gt_x) - gt_ymin = np.min(gt_y) - gt_xmin = np.min(gt_x) - - all_min_ymax = np.minimum(det_ymax, gt_ymax) - all_max_ymin = np.maximum(det_ymin, gt_ymin) - - intersect_heights = np.maximum(0.0, (all_min_ymax - all_max_ymin)) - - all_min_xmax = np.minimum(det_xmax, gt_xmax) - all_max_xmin = np.maximum(det_xmin, gt_xmin) - intersect_widths = np.maximum(0.0, (all_min_xmax - all_max_xmin)) - - return intersect_heights * intersect_widths - - -def area_of_intersection(det_x, det_y, gt_x, gt_y): - p1 = Polygon(np.stack([det_x, det_y], axis=1)).buffer(0) - p2 = Polygon(np.stack([gt_x, gt_y], axis=1)).buffer(0) - return float(p1.intersection(p2).area) - - -def area_of_union(det_x, det_y, gt_x, gt_y): - p1 = Polygon(np.stack([det_x, det_y], axis=1)).buffer(0) - p2 = Polygon(np.stack([gt_x, gt_y], axis=1)).buffer(0) - return float(p1.union(p2).area) - - -def iou(det_x, det_y, gt_x, gt_y): - return area_of_intersection(det_x, det_y, gt_x, gt_y) / ( - area_of_union(det_x, det_y, gt_x, gt_y) + 1.0) - - -def iod(det_x, det_y, gt_x, gt_y): - """ - This helper determine the fraction of intersection area over detection area - """ - return area_of_intersection(det_x, det_y, gt_x, gt_y) / ( - area(det_x, det_y) + 1.0) diff --git a/backend/ppocr/utils/e2e_utils/extract_batchsize.py b/backend/ppocr/utils/e2e_utils/extract_batchsize.py deleted file mode 100644 index e99a833..0000000 --- a/backend/ppocr/utils/e2e_utils/extract_batchsize.py +++ /dev/null @@ -1,87 +0,0 @@ -import paddle -import numpy as np -import copy - - -def org_tcl_rois(batch_size, pos_lists, pos_masks, label_lists, tcl_bs): - """ - """ - pos_lists_, pos_masks_, label_lists_ = [], [], [] - img_bs = batch_size - ngpu = int(batch_size / img_bs) - img_ids = np.array(pos_lists, dtype=np.int32)[:, 0, 0].copy() - pos_lists_split, pos_masks_split, label_lists_split = [], [], [] - for i in range(ngpu): - pos_lists_split.append([]) - pos_masks_split.append([]) - label_lists_split.append([]) - - for i in range(img_ids.shape[0]): - img_id = img_ids[i] - gpu_id = int(img_id / img_bs) - img_id = img_id % img_bs - pos_list = pos_lists[i].copy() - pos_list[:, 0] = img_id - pos_lists_split[gpu_id].append(pos_list) - pos_masks_split[gpu_id].append(pos_masks[i].copy()) - label_lists_split[gpu_id].append(copy.deepcopy(label_lists[i])) - # repeat or delete - for i in range(ngpu): - vp_len = len(pos_lists_split[i]) - if vp_len <= tcl_bs: - for j in range(0, tcl_bs - vp_len): - pos_list = pos_lists_split[i][j].copy() - pos_lists_split[i].append(pos_list) - pos_mask = pos_masks_split[i][j].copy() - pos_masks_split[i].append(pos_mask) - label_list = copy.deepcopy(label_lists_split[i][j]) - label_lists_split[i].append(label_list) - else: - for j in range(0, vp_len - tcl_bs): - c_len = len(pos_lists_split[i]) - pop_id = np.random.permutation(c_len)[0] - pos_lists_split[i].pop(pop_id) - pos_masks_split[i].pop(pop_id) - label_lists_split[i].pop(pop_id) - # merge - for i in range(ngpu): - pos_lists_.extend(pos_lists_split[i]) - pos_masks_.extend(pos_masks_split[i]) - label_lists_.extend(label_lists_split[i]) - return pos_lists_, pos_masks_, label_lists_ - - -def pre_process(label_list, pos_list, pos_mask, max_text_length, max_text_nums, - pad_num, tcl_bs): - label_list = label_list.numpy() - batch, _, _, _ = label_list.shape - pos_list = pos_list.numpy() - pos_mask = pos_mask.numpy() - pos_list_t = [] - pos_mask_t = [] - label_list_t = [] - for i in range(batch): - for j in range(max_text_nums): - if pos_mask[i, j].any(): - pos_list_t.append(pos_list[i][j]) - pos_mask_t.append(pos_mask[i][j]) - label_list_t.append(label_list[i][j]) - pos_list, pos_mask, label_list = org_tcl_rois(batch, pos_list_t, pos_mask_t, - label_list_t, tcl_bs) - label = [] - tt = [l.tolist() for l in label_list] - for i in range(tcl_bs): - k = 0 - for j in range(max_text_length): - if tt[i][j][0] != pad_num: - k += 1 - else: - break - label.append(k) - label = paddle.to_tensor(label) - label = paddle.cast(label, dtype='int64') - pos_list = paddle.to_tensor(pos_list) - pos_mask = paddle.to_tensor(pos_mask) - label_list = paddle.squeeze(paddle.to_tensor(label_list), axis=2) - label_list = paddle.cast(label_list, dtype='int32') - return pos_list, pos_mask, label_list, label diff --git a/backend/ppocr/utils/e2e_utils/extract_textpoint_fast.py b/backend/ppocr/utils/e2e_utils/extract_textpoint_fast.py deleted file mode 100644 index 787cd30..0000000 --- a/backend/ppocr/utils/e2e_utils/extract_textpoint_fast.py +++ /dev/null @@ -1,457 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Contains various CTC decoders.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import cv2 -import math - -import numpy as np -from itertools import groupby -from skimage.morphology._skeletonize import thin - - -def get_dict(character_dict_path): - character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - character_str += line - dict_character = list(character_str) - return dict_character - - -def softmax(logits): - """ - logits: N x d - """ - max_value = np.max(logits, axis=1, keepdims=True) - exp = np.exp(logits - max_value) - exp_sum = np.sum(exp, axis=1, keepdims=True) - dist = exp / exp_sum - return dist - - -def get_keep_pos_idxs(labels, remove_blank=None): - """ - Remove duplicate and get pos idxs of keep items. - The value of keep_blank should be [None, 95]. - """ - duplicate_len_list = [] - keep_pos_idx_list = [] - keep_char_idx_list = [] - for k, v_ in groupby(labels): - current_len = len(list(v_)) - if k != remove_blank: - current_idx = int(sum(duplicate_len_list) + current_len // 2) - keep_pos_idx_list.append(current_idx) - keep_char_idx_list.append(k) - duplicate_len_list.append(current_len) - return keep_char_idx_list, keep_pos_idx_list - - -def remove_blank(labels, blank=0): - new_labels = [x for x in labels if x != blank] - return new_labels - - -def insert_blank(labels, blank=0): - new_labels = [blank] - for l in labels: - new_labels += [l, blank] - return new_labels - - -def ctc_greedy_decoder(probs_seq, blank=95, keep_blank_in_idxs=True): - """ - CTC greedy (best path) decoder. - """ - raw_str = np.argmax(np.array(probs_seq), axis=1) - remove_blank_in_pos = None if keep_blank_in_idxs else blank - dedup_str, keep_idx_list = get_keep_pos_idxs( - raw_str, remove_blank=remove_blank_in_pos) - dst_str = remove_blank(dedup_str, blank=blank) - return dst_str, keep_idx_list - - -def instance_ctc_greedy_decoder(gather_info, logits_map, pts_num=4): - _, _, C = logits_map.shape - ys, xs = zip(*gather_info) - logits_seq = logits_map[list(ys), list(xs)] - probs_seq = logits_seq - labels = np.argmax(probs_seq, axis=1) - dst_str = [k for k, v_ in groupby(labels) if k != C - 1] - detal = len(gather_info) // (pts_num - 1) - keep_idx_list = [0] + [detal * (i + 1) for i in range(pts_num - 2)] + [-1] - keep_gather_list = [gather_info[idx] for idx in keep_idx_list] - return dst_str, keep_gather_list - - -def ctc_decoder_for_image(gather_info_list, - logits_map, - Lexicon_Table, - pts_num=6): - """ - CTC decoder using multiple processes. - """ - decoder_str = [] - decoder_xys = [] - for gather_info in gather_info_list: - if len(gather_info) < pts_num: - continue - dst_str, xys_list = instance_ctc_greedy_decoder( - gather_info, logits_map, pts_num=pts_num) - dst_str_readable = ''.join([Lexicon_Table[idx] for idx in dst_str]) - if len(dst_str_readable) < 2: - continue - decoder_str.append(dst_str_readable) - decoder_xys.append(xys_list) - return decoder_str, decoder_xys - - -def sort_with_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - - def sort_part_with_direction(pos_list, point_direction): - pos_list = np.array(pos_list).reshape(-1, 2) - point_direction = np.array(point_direction).reshape(-1, 2) - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list[np.argsort(pos_proj_leng)].tolist() - sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist() - return sorted_list, sorted_direction - - pos_list = np.array(pos_list).reshape(-1, 2) - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - sorted_point, sorted_direction = sort_part_with_direction(pos_list, - point_direction) - - point_num = len(sorted_point) - if point_num >= 16: - middle_num = point_num // 2 - first_part_point = sorted_point[:middle_num] - first_point_direction = sorted_direction[:middle_num] - sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction( - first_part_point, first_point_direction) - - last_part_point = sorted_point[middle_num:] - last_point_direction = sorted_direction[middle_num:] - sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction( - last_part_point, last_point_direction) - sorted_point = sorted_fist_part_point + sorted_last_part_point - sorted_direction = sorted_fist_part_direction + sorted_last_part_direction - - return sorted_point, np.array(sorted_direction) - - -def add_id(pos_list, image_id=0): - """ - Add id for gather feature, for inference. - """ - new_list = [] - for item in pos_list: - new_list.append((image_id, item[0], item[1])) - return new_list - - -def sort_and_expand_with_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - h, w, _ = f_direction.shape - sorted_list, point_direction = sort_with_direction(pos_list, f_direction) - - point_num = len(sorted_list) - sub_direction_len = max(point_num // 3, 2) - left_direction = point_direction[:sub_direction_len, :] - right_dirction = point_direction[point_num - sub_direction_len:, :] - - left_average_direction = -np.mean(left_direction, axis=0, keepdims=True) - left_average_len = np.linalg.norm(left_average_direction) - left_start = np.array(sorted_list[0]) - left_step = left_average_direction / (left_average_len + 1e-6) - - right_average_direction = np.mean(right_dirction, axis=0, keepdims=True) - right_average_len = np.linalg.norm(right_average_direction) - right_step = right_average_direction / (right_average_len + 1e-6) - right_start = np.array(sorted_list[-1]) - - append_num = max( - int((left_average_len + right_average_len) / 2.0 * 0.15), 1) - left_list = [] - right_list = [] - for i in range(append_num): - ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ly < h and lx < w and (ly, lx) not in left_list: - left_list.append((ly, lx)) - ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ry < h and rx < w and (ry, rx) not in right_list: - right_list.append((ry, rx)) - - all_list = left_list[::-1] + sorted_list + right_list - return all_list - - -def sort_and_expand_with_direction_v2(pos_list, f_direction, binary_tcl_map): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - binary_tcl_map: h x w - """ - h, w, _ = f_direction.shape - sorted_list, point_direction = sort_with_direction(pos_list, f_direction) - - point_num = len(sorted_list) - sub_direction_len = max(point_num // 3, 2) - left_direction = point_direction[:sub_direction_len, :] - right_dirction = point_direction[point_num - sub_direction_len:, :] - - left_average_direction = -np.mean(left_direction, axis=0, keepdims=True) - left_average_len = np.linalg.norm(left_average_direction) - left_start = np.array(sorted_list[0]) - left_step = left_average_direction / (left_average_len + 1e-6) - - right_average_direction = np.mean(right_dirction, axis=0, keepdims=True) - right_average_len = np.linalg.norm(right_average_direction) - right_step = right_average_direction / (right_average_len + 1e-6) - right_start = np.array(sorted_list[-1]) - - append_num = max( - int((left_average_len + right_average_len) / 2.0 * 0.15), 1) - max_append_num = 2 * append_num - - left_list = [] - right_list = [] - for i in range(max_append_num): - ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ly < h and lx < w and (ly, lx) not in left_list: - if binary_tcl_map[ly, lx] > 0.5: - left_list.append((ly, lx)) - else: - break - - for i in range(max_append_num): - ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ry < h and rx < w and (ry, rx) not in right_list: - if binary_tcl_map[ry, rx] > 0.5: - right_list.append((ry, rx)) - else: - break - - all_list = left_list[::-1] + sorted_list + right_list - return all_list - - -def point_pair2poly(point_pair_list): - """ - Transfer vertical point_pairs into poly point in clockwise. - """ - point_num = len(point_pair_list) * 2 - point_list = [0] * point_num - for idx, point_pair in enumerate(point_pair_list): - point_list[idx] = point_pair[0] - point_list[point_num - 1 - idx] = point_pair[1] - return np.array(point_list).reshape(-1, 2) - - -def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.): - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - -def expand_poly_along_width(poly, shrink_ratio_of_width=0.3): - """ - expand poly along width. - """ - point_num = poly.shape[0] - left_quad = np.array( - [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) - left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \ - (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) - left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0) - right_quad = np.array( - [ - poly[point_num // 2 - 2], poly[point_num // 2 - 1], - poly[point_num // 2], poly[point_num // 2 + 1] - ], - dtype=np.float32) - right_ratio = 1.0 + shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \ - (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) - right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio) - poly[0] = left_quad_expand[0] - poly[-1] = left_quad_expand[-1] - poly[point_num // 2 - 1] = right_quad_expand[1] - poly[point_num // 2] = right_quad_expand[2] - return poly - - -def restore_poly(instance_yxs_list, seq_strs, p_border, ratio_w, ratio_h, src_w, - src_h, valid_set): - poly_list = [] - keep_str_list = [] - for yx_center_line, keep_str in zip(instance_yxs_list, seq_strs): - if len(keep_str) < 2: - print('--> too short, {}'.format(keep_str)) - continue - - offset_expand = 1.0 - if valid_set == 'totaltext': - offset_expand = 1.2 - - point_pair_list = [] - for y, x in yx_center_line: - offset = p_border[:, y, x].reshape(2, 2) * offset_expand - ori_yx = np.array([y, x], dtype=np.float32) - point_pair = (ori_yx + offset)[:, ::-1] * 4.0 / np.array( - [ratio_w, ratio_h]).reshape(-1, 2) - point_pair_list.append(point_pair) - - detected_poly = point_pair2poly(point_pair_list) - detected_poly = expand_poly_along_width( - detected_poly, shrink_ratio_of_width=0.2) - detected_poly[:, 0] = np.clip(detected_poly[:, 0], a_min=0, a_max=src_w) - detected_poly[:, 1] = np.clip(detected_poly[:, 1], a_min=0, a_max=src_h) - - keep_str_list.append(keep_str) - if valid_set == 'partvgg': - middle_point = len(detected_poly) // 2 - detected_poly = detected_poly[ - [0, middle_point - 1, middle_point, -1], :] - poly_list.append(detected_poly) - elif valid_set == 'totaltext': - poly_list.append(detected_poly) - else: - print('--> Not supported format.') - exit(-1) - return poly_list, keep_str_list - - -def generate_pivot_list_fast(p_score, - p_char_maps, - f_direction, - Lexicon_Table, - score_thresh=0.5): - """ - return center point and end point of TCL instance; filter with the char maps; - """ - p_score = p_score[0] - f_direction = f_direction.transpose(1, 2, 0) - p_tcl_map = (p_score > score_thresh) * 1.0 - skeleton_map = thin(p_tcl_map.astype(np.uint8)) - instance_count, instance_label_map = cv2.connectedComponents( - skeleton_map.astype(np.uint8), connectivity=8) - - # get TCL Instance - all_pos_yxs = [] - if instance_count > 0: - for instance_id in range(1, instance_count): - pos_list = [] - ys, xs = np.where(instance_label_map == instance_id) - pos_list = list(zip(ys, xs)) - - if len(pos_list) < 3: - continue - - pos_list_sorted = sort_and_expand_with_direction_v2( - pos_list, f_direction, p_tcl_map) - all_pos_yxs.append(pos_list_sorted) - - p_char_maps = p_char_maps.transpose([1, 2, 0]) - decoded_str, keep_yxs_list = ctc_decoder_for_image( - all_pos_yxs, logits_map=p_char_maps, Lexicon_Table=Lexicon_Table) - return keep_yxs_list, decoded_str - - -def extract_main_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - pos_list = np.array(pos_list) - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] - point_direction = point_direction[:, ::-1] # x, y -> y, x - average_direction = np.mean(point_direction, axis=0, keepdims=True) - average_direction = average_direction / ( - np.linalg.norm(average_direction) + 1e-6) - return average_direction - - -def sort_by_direction_with_image_id_deprecated(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[id, y, x], [id, y, x], [id, y, x] ...] - """ - pos_list_full = np.array(pos_list).reshape(-1, 3) - pos_list = pos_list_full[:, 1:] - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist() - return sorted_list - - -def sort_by_direction_with_image_id(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - - def sort_part_with_direction(pos_list_full, point_direction): - pos_list_full = np.array(pos_list_full).reshape(-1, 3) - pos_list = pos_list_full[:, 1:] - point_direction = np.array(point_direction).reshape(-1, 2) - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist() - sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist() - return sorted_list, sorted_direction - - pos_list = np.array(pos_list).reshape(-1, 3) - point_direction = f_direction[pos_list[:, 1], pos_list[:, 2]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - sorted_point, sorted_direction = sort_part_with_direction(pos_list, - point_direction) - - point_num = len(sorted_point) - if point_num >= 16: - middle_num = point_num // 2 - first_part_point = sorted_point[:middle_num] - first_point_direction = sorted_direction[:middle_num] - sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction( - first_part_point, first_point_direction) - - last_part_point = sorted_point[middle_num:] - last_point_direction = sorted_direction[middle_num:] - sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction( - last_part_point, last_point_direction) - sorted_point = sorted_fist_part_point + sorted_last_part_point - sorted_direction = sorted_fist_part_direction + sorted_last_part_direction - - return sorted_point diff --git a/backend/ppocr/utils/e2e_utils/extract_textpoint_slow.py b/backend/ppocr/utils/e2e_utils/extract_textpoint_slow.py deleted file mode 100644 index ace46fb..0000000 --- a/backend/ppocr/utils/e2e_utils/extract_textpoint_slow.py +++ /dev/null @@ -1,592 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Contains various CTC decoders.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import cv2 -import math - -import numpy as np -from itertools import groupby -from skimage.morphology._skeletonize import thin - - -def get_dict(character_dict_path): - character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - character_str += line - dict_character = list(character_str) - return dict_character - - -def point_pair2poly(point_pair_list): - """ - Transfer vertical point_pairs into poly point in clockwise. - """ - pair_length_list = [] - for point_pair in point_pair_list: - pair_length = np.linalg.norm(point_pair[0] - point_pair[1]) - pair_length_list.append(pair_length) - pair_length_list = np.array(pair_length_list) - pair_info = (pair_length_list.max(), pair_length_list.min(), - pair_length_list.mean()) - - point_num = len(point_pair_list) * 2 - point_list = [0] * point_num - for idx, point_pair in enumerate(point_pair_list): - point_list[idx] = point_pair[0] - point_list[point_num - 1 - idx] = point_pair[1] - return np.array(point_list).reshape(-1, 2), pair_info - - -def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - -def expand_poly_along_width(poly, shrink_ratio_of_width=0.3): - """ - expand poly along width. - """ - point_num = poly.shape[0] - left_quad = np.array( - [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) - left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \ - (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) - left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0) - right_quad = np.array( - [ - poly[point_num // 2 - 2], poly[point_num // 2 - 1], - poly[point_num // 2], poly[point_num // 2 + 1] - ], - dtype=np.float32) - right_ratio = 1.0 + \ - shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \ - (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) - right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio) - poly[0] = left_quad_expand[0] - poly[-1] = left_quad_expand[-1] - poly[point_num // 2 - 1] = right_quad_expand[1] - poly[point_num // 2] = right_quad_expand[2] - return poly - - -def softmax(logits): - """ - logits: N x d - """ - max_value = np.max(logits, axis=1, keepdims=True) - exp = np.exp(logits - max_value) - exp_sum = np.sum(exp, axis=1, keepdims=True) - dist = exp / exp_sum - return dist - - -def get_keep_pos_idxs(labels, remove_blank=None): - """ - Remove duplicate and get pos idxs of keep items. - The value of keep_blank should be [None, 95]. - """ - duplicate_len_list = [] - keep_pos_idx_list = [] - keep_char_idx_list = [] - for k, v_ in groupby(labels): - current_len = len(list(v_)) - if k != remove_blank: - current_idx = int(sum(duplicate_len_list) + current_len // 2) - keep_pos_idx_list.append(current_idx) - keep_char_idx_list.append(k) - duplicate_len_list.append(current_len) - return keep_char_idx_list, keep_pos_idx_list - - -def remove_blank(labels, blank=0): - new_labels = [x for x in labels if x != blank] - return new_labels - - -def insert_blank(labels, blank=0): - new_labels = [blank] - for l in labels: - new_labels += [l, blank] - return new_labels - - -def ctc_greedy_decoder(probs_seq, blank=95, keep_blank_in_idxs=True): - """ - CTC greedy (best path) decoder. - """ - raw_str = np.argmax(np.array(probs_seq), axis=1) - remove_blank_in_pos = None if keep_blank_in_idxs else blank - dedup_str, keep_idx_list = get_keep_pos_idxs( - raw_str, remove_blank=remove_blank_in_pos) - dst_str = remove_blank(dedup_str, blank=blank) - return dst_str, keep_idx_list - - -def instance_ctc_greedy_decoder(gather_info, - logits_map, - keep_blank_in_idxs=True): - """ - gather_info: [[x, y], [x, y] ...] - logits_map: H x W X (n_chars + 1) - """ - _, _, C = logits_map.shape - ys, xs = zip(*gather_info) - logits_seq = logits_map[list(ys), list(xs)] # n x 96 - probs_seq = softmax(logits_seq) - dst_str, keep_idx_list = ctc_greedy_decoder( - probs_seq, blank=C - 1, keep_blank_in_idxs=keep_blank_in_idxs) - keep_gather_list = [gather_info[idx] for idx in keep_idx_list] - return dst_str, keep_gather_list - - -def ctc_decoder_for_image(gather_info_list, logits_map, - keep_blank_in_idxs=True): - """ - CTC decoder using multiple processes. - """ - decoder_results = [] - for gather_info in gather_info_list: - res = instance_ctc_greedy_decoder( - gather_info, logits_map, keep_blank_in_idxs=keep_blank_in_idxs) - decoder_results.append(res) - return decoder_results - - -def sort_with_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - - def sort_part_with_direction(pos_list, point_direction): - pos_list = np.array(pos_list).reshape(-1, 2) - point_direction = np.array(point_direction).reshape(-1, 2) - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list[np.argsort(pos_proj_leng)].tolist() - sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist() - return sorted_list, sorted_direction - - pos_list = np.array(pos_list).reshape(-1, 2) - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - sorted_point, sorted_direction = sort_part_with_direction(pos_list, - point_direction) - - point_num = len(sorted_point) - if point_num >= 16: - middle_num = point_num // 2 - first_part_point = sorted_point[:middle_num] - first_point_direction = sorted_direction[:middle_num] - sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction( - first_part_point, first_point_direction) - - last_part_point = sorted_point[middle_num:] - last_point_direction = sorted_direction[middle_num:] - sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction( - last_part_point, last_point_direction) - sorted_point = sorted_fist_part_point + sorted_last_part_point - sorted_direction = sorted_fist_part_direction + sorted_last_part_direction - - return sorted_point, np.array(sorted_direction) - - -def add_id(pos_list, image_id=0): - """ - Add id for gather feature, for inference. - """ - new_list = [] - for item in pos_list: - new_list.append((image_id, item[0], item[1])) - return new_list - - -def sort_and_expand_with_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - h, w, _ = f_direction.shape - sorted_list, point_direction = sort_with_direction(pos_list, f_direction) - - # expand along - point_num = len(sorted_list) - sub_direction_len = max(point_num // 3, 2) - left_direction = point_direction[:sub_direction_len, :] - right_dirction = point_direction[point_num - sub_direction_len:, :] - - left_average_direction = -np.mean(left_direction, axis=0, keepdims=True) - left_average_len = np.linalg.norm(left_average_direction) - left_start = np.array(sorted_list[0]) - left_step = left_average_direction / (left_average_len + 1e-6) - - right_average_direction = np.mean(right_dirction, axis=0, keepdims=True) - right_average_len = np.linalg.norm(right_average_direction) - right_step = right_average_direction / (right_average_len + 1e-6) - right_start = np.array(sorted_list[-1]) - - append_num = max( - int((left_average_len + right_average_len) / 2.0 * 0.15), 1) - left_list = [] - right_list = [] - for i in range(append_num): - ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ly < h and lx < w and (ly, lx) not in left_list: - left_list.append((ly, lx)) - ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ry < h and rx < w and (ry, rx) not in right_list: - right_list.append((ry, rx)) - - all_list = left_list[::-1] + sorted_list + right_list - return all_list - - -def sort_and_expand_with_direction_v2(pos_list, f_direction, binary_tcl_map): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - binary_tcl_map: h x w - """ - h, w, _ = f_direction.shape - sorted_list, point_direction = sort_with_direction(pos_list, f_direction) - - # expand along - point_num = len(sorted_list) - sub_direction_len = max(point_num // 3, 2) - left_direction = point_direction[:sub_direction_len, :] - right_dirction = point_direction[point_num - sub_direction_len:, :] - - left_average_direction = -np.mean(left_direction, axis=0, keepdims=True) - left_average_len = np.linalg.norm(left_average_direction) - left_start = np.array(sorted_list[0]) - left_step = left_average_direction / (left_average_len + 1e-6) - - right_average_direction = np.mean(right_dirction, axis=0, keepdims=True) - right_average_len = np.linalg.norm(right_average_direction) - right_step = right_average_direction / (right_average_len + 1e-6) - right_start = np.array(sorted_list[-1]) - - append_num = max( - int((left_average_len + right_average_len) / 2.0 * 0.15), 1) - max_append_num = 2 * append_num - - left_list = [] - right_list = [] - for i in range(max_append_num): - ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ly < h and lx < w and (ly, lx) not in left_list: - if binary_tcl_map[ly, lx] > 0.5: - left_list.append((ly, lx)) - else: - break - - for i in range(max_append_num): - ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype( - 'int32').tolist() - if ry < h and rx < w and (ry, rx) not in right_list: - if binary_tcl_map[ry, rx] > 0.5: - right_list.append((ry, rx)) - else: - break - - all_list = left_list[::-1] + sorted_list + right_list - return all_list - - -def generate_pivot_list_curved(p_score, - p_char_maps, - f_direction, - score_thresh=0.5, - is_expand=True, - is_backbone=False, - image_id=0): - """ - return center point and end point of TCL instance; filter with the char maps; - """ - p_score = p_score[0] - f_direction = f_direction.transpose(1, 2, 0) - p_tcl_map = (p_score > score_thresh) * 1.0 - skeleton_map = thin(p_tcl_map) - instance_count, instance_label_map = cv2.connectedComponents( - skeleton_map.astype(np.uint8), connectivity=8) - - # get TCL Instance - all_pos_yxs = [] - center_pos_yxs = [] - end_points_yxs = [] - instance_center_pos_yxs = [] - pred_strs = [] - if instance_count > 0: - for instance_id in range(1, instance_count): - pos_list = [] - ys, xs = np.where(instance_label_map == instance_id) - pos_list = list(zip(ys, xs)) - - ### FIX-ME, eliminate outlier - if len(pos_list) < 3: - continue - - if is_expand: - pos_list_sorted = sort_and_expand_with_direction_v2( - pos_list, f_direction, p_tcl_map) - else: - pos_list_sorted, _ = sort_with_direction(pos_list, f_direction) - all_pos_yxs.append(pos_list_sorted) - - # use decoder to filter backgroud points. - p_char_maps = p_char_maps.transpose([1, 2, 0]) - decode_res = ctc_decoder_for_image( - all_pos_yxs, logits_map=p_char_maps, keep_blank_in_idxs=True) - for decoded_str, keep_yxs_list in decode_res: - if is_backbone: - keep_yxs_list_with_id = add_id(keep_yxs_list, image_id=image_id) - instance_center_pos_yxs.append(keep_yxs_list_with_id) - pred_strs.append(decoded_str) - else: - end_points_yxs.extend((keep_yxs_list[0], keep_yxs_list[-1])) - center_pos_yxs.extend(keep_yxs_list) - - if is_backbone: - return pred_strs, instance_center_pos_yxs - else: - return center_pos_yxs, end_points_yxs - - -def generate_pivot_list_horizontal(p_score, - p_char_maps, - f_direction, - score_thresh=0.5, - is_backbone=False, - image_id=0): - """ - return center point and end point of TCL instance; filter with the char maps; - """ - p_score = p_score[0] - f_direction = f_direction.transpose(1, 2, 0) - p_tcl_map_bi = (p_score > score_thresh) * 1.0 - instance_count, instance_label_map = cv2.connectedComponents( - p_tcl_map_bi.astype(np.uint8), connectivity=8) - - # get TCL Instance - all_pos_yxs = [] - center_pos_yxs = [] - end_points_yxs = [] - instance_center_pos_yxs = [] - - if instance_count > 0: - for instance_id in range(1, instance_count): - pos_list = [] - ys, xs = np.where(instance_label_map == instance_id) - pos_list = list(zip(ys, xs)) - - ### FIX-ME, eliminate outlier - if len(pos_list) < 5: - continue - - # add rule here - main_direction = extract_main_direction(pos_list, - f_direction) # y x - reference_directin = np.array([0, 1]).reshape([-1, 2]) # y x - is_h_angle = abs(np.sum( - main_direction * reference_directin)) < math.cos(math.pi / 180 * - 70) - - point_yxs = np.array(pos_list) - max_y, max_x = np.max(point_yxs, axis=0) - min_y, min_x = np.min(point_yxs, axis=0) - is_h_len = (max_y - min_y) < 1.5 * (max_x - min_x) - - pos_list_final = [] - if is_h_len: - xs = np.unique(xs) - for x in xs: - ys = instance_label_map[:, x].copy().reshape((-1, )) - y = int(np.where(ys == instance_id)[0].mean()) - pos_list_final.append((y, x)) - else: - ys = np.unique(ys) - for y in ys: - xs = instance_label_map[y, :].copy().reshape((-1, )) - x = int(np.where(xs == instance_id)[0].mean()) - pos_list_final.append((y, x)) - - pos_list_sorted, _ = sort_with_direction(pos_list_final, - f_direction) - all_pos_yxs.append(pos_list_sorted) - - # use decoder to filter backgroud points. - p_char_maps = p_char_maps.transpose([1, 2, 0]) - decode_res = ctc_decoder_for_image( - all_pos_yxs, logits_map=p_char_maps, keep_blank_in_idxs=True) - for decoded_str, keep_yxs_list in decode_res: - if is_backbone: - keep_yxs_list_with_id = add_id(keep_yxs_list, image_id=image_id) - instance_center_pos_yxs.append(keep_yxs_list_with_id) - else: - end_points_yxs.extend((keep_yxs_list[0], keep_yxs_list[-1])) - center_pos_yxs.extend(keep_yxs_list) - - if is_backbone: - return instance_center_pos_yxs - else: - return center_pos_yxs, end_points_yxs - - -def generate_pivot_list_slow(p_score, - p_char_maps, - f_direction, - score_thresh=0.5, - is_backbone=False, - is_curved=True, - image_id=0): - """ - Warp all the function together. - """ - if is_curved: - return generate_pivot_list_curved( - p_score, - p_char_maps, - f_direction, - score_thresh=score_thresh, - is_expand=True, - is_backbone=is_backbone, - image_id=image_id) - else: - return generate_pivot_list_horizontal( - p_score, - p_char_maps, - f_direction, - score_thresh=score_thresh, - is_backbone=is_backbone, - image_id=image_id) - - -# for refine module -def extract_main_direction(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - pos_list = np.array(pos_list) - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] - point_direction = point_direction[:, ::-1] # x, y -> y, x - average_direction = np.mean(point_direction, axis=0, keepdims=True) - average_direction = average_direction / ( - np.linalg.norm(average_direction) + 1e-6) - return average_direction - - -def sort_by_direction_with_image_id_deprecated(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[id, y, x], [id, y, x], [id, y, x] ...] - """ - pos_list_full = np.array(pos_list).reshape(-1, 3) - pos_list = pos_list_full[:, 1:] - point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist() - return sorted_list - - -def sort_by_direction_with_image_id(pos_list, f_direction): - """ - f_direction: h x w x 2 - pos_list: [[y, x], [y, x], [y, x] ...] - """ - - def sort_part_with_direction(pos_list_full, point_direction): - pos_list_full = np.array(pos_list_full).reshape(-1, 3) - pos_list = pos_list_full[:, 1:] - point_direction = np.array(point_direction).reshape(-1, 2) - average_direction = np.mean(point_direction, axis=0, keepdims=True) - pos_proj_leng = np.sum(pos_list * average_direction, axis=1) - sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist() - sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist() - return sorted_list, sorted_direction - - pos_list = np.array(pos_list).reshape(-1, 3) - point_direction = f_direction[pos_list[:, 1], pos_list[:, 2]] # x, y - point_direction = point_direction[:, ::-1] # x, y -> y, x - sorted_point, sorted_direction = sort_part_with_direction(pos_list, - point_direction) - - point_num = len(sorted_point) - if point_num >= 16: - middle_num = point_num // 2 - first_part_point = sorted_point[:middle_num] - first_point_direction = sorted_direction[:middle_num] - sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction( - first_part_point, first_point_direction) - - last_part_point = sorted_point[middle_num:] - last_point_direction = sorted_direction[middle_num:] - sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction( - last_part_point, last_point_direction) - sorted_point = sorted_fist_part_point + sorted_last_part_point - sorted_direction = sorted_fist_part_direction + sorted_last_part_direction - - return sorted_point - - -def generate_pivot_list_tt_inference(p_score, - p_char_maps, - f_direction, - score_thresh=0.5, - is_backbone=False, - is_curved=True, - image_id=0): - """ - return center point and end point of TCL instance; filter with the char maps; - """ - p_score = p_score[0] - f_direction = f_direction.transpose(1, 2, 0) - p_tcl_map = (p_score > score_thresh) * 1.0 - skeleton_map = thin(p_tcl_map) - instance_count, instance_label_map = cv2.connectedComponents( - skeleton_map.astype(np.uint8), connectivity=8) - - # get TCL Instance - all_pos_yxs = [] - if instance_count > 0: - for instance_id in range(1, instance_count): - pos_list = [] - ys, xs = np.where(instance_label_map == instance_id) - pos_list = list(zip(ys, xs)) - ### FIX-ME, eliminate outlier - if len(pos_list) < 3: - continue - pos_list_sorted = sort_and_expand_with_direction_v2( - pos_list, f_direction, p_tcl_map) - pos_list_sorted_with_id = add_id(pos_list_sorted, image_id=image_id) - all_pos_yxs.append(pos_list_sorted_with_id) - return all_pos_yxs diff --git a/backend/ppocr/utils/e2e_utils/pgnet_pp_utils.py b/backend/ppocr/utils/e2e_utils/pgnet_pp_utils.py deleted file mode 100644 index a15503c..0000000 --- a/backend/ppocr/utils/e2e_utils/pgnet_pp_utils.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle -import os -import sys - -__dir__ = os.path.dirname(__file__) -sys.path.append(__dir__) -sys.path.append(os.path.join(__dir__, '..')) -from extract_textpoint_slow import * -from extract_textpoint_fast import generate_pivot_list_fast, restore_poly - - -class PGNet_PostProcess(object): - # two different post-process - def __init__(self, character_dict_path, valid_set, score_thresh, outs_dict, - shape_list): - self.Lexicon_Table = get_dict(character_dict_path) - self.valid_set = valid_set - self.score_thresh = score_thresh - self.outs_dict = outs_dict - self.shape_list = shape_list - - def pg_postprocess_fast(self): - p_score = self.outs_dict['f_score'] - p_border = self.outs_dict['f_border'] - p_char = self.outs_dict['f_char'] - p_direction = self.outs_dict['f_direction'] - if isinstance(p_score, paddle.Tensor): - p_score = p_score[0].numpy() - p_border = p_border[0].numpy() - p_direction = p_direction[0].numpy() - p_char = p_char[0].numpy() - else: - p_score = p_score[0] - p_border = p_border[0] - p_direction = p_direction[0] - p_char = p_char[0] - - src_h, src_w, ratio_h, ratio_w = self.shape_list[0] - instance_yxs_list, seq_strs = generate_pivot_list_fast( - p_score, - p_char, - p_direction, - self.Lexicon_Table, - score_thresh=self.score_thresh) - poly_list, keep_str_list = restore_poly(instance_yxs_list, seq_strs, - p_border, ratio_w, ratio_h, - src_w, src_h, self.valid_set) - data = { - 'points': poly_list, - 'texts': keep_str_list, - } - return data - - def pg_postprocess_slow(self): - p_score = self.outs_dict['f_score'] - p_border = self.outs_dict['f_border'] - p_char = self.outs_dict['f_char'] - p_direction = self.outs_dict['f_direction'] - if isinstance(p_score, paddle.Tensor): - p_score = p_score[0].numpy() - p_border = p_border[0].numpy() - p_direction = p_direction[0].numpy() - p_char = p_char[0].numpy() - else: - p_score = p_score[0] - p_border = p_border[0] - p_direction = p_direction[0] - p_char = p_char[0] - src_h, src_w, ratio_h, ratio_w = self.shape_list[0] - is_curved = self.valid_set == "totaltext" - char_seq_idx_set, instance_yxs_list = generate_pivot_list_slow( - p_score, - p_char, - p_direction, - score_thresh=self.score_thresh, - is_backbone=True, - is_curved=is_curved) - seq_strs = [] - for char_idx_set in char_seq_idx_set: - pr_str = ''.join([self.Lexicon_Table[pos] for pos in char_idx_set]) - seq_strs.append(pr_str) - poly_list = [] - keep_str_list = [] - all_point_list = [] - all_point_pair_list = [] - for yx_center_line, keep_str in zip(instance_yxs_list, seq_strs): - if len(yx_center_line) == 1: - yx_center_line.append(yx_center_line[-1]) - - offset_expand = 1.0 - if self.valid_set == 'totaltext': - offset_expand = 1.2 - - point_pair_list = [] - for batch_id, y, x in yx_center_line: - offset = p_border[:, y, x].reshape(2, 2) - if offset_expand != 1.0: - offset_length = np.linalg.norm( - offset, axis=1, keepdims=True) - expand_length = np.clip( - offset_length * (offset_expand - 1), - a_min=0.5, - a_max=3.0) - offset_detal = offset / offset_length * expand_length - offset = offset + offset_detal - ori_yx = np.array([y, x], dtype=np.float32) - point_pair = (ori_yx + offset)[:, ::-1] * 4.0 / np.array( - [ratio_w, ratio_h]).reshape(-1, 2) - point_pair_list.append(point_pair) - - all_point_list.append([ - int(round(x * 4.0 / ratio_w)), - int(round(y * 4.0 / ratio_h)) - ]) - all_point_pair_list.append(point_pair.round().astype(np.int32) - .tolist()) - - detected_poly, pair_length_info = point_pair2poly(point_pair_list) - detected_poly = expand_poly_along_width( - detected_poly, shrink_ratio_of_width=0.2) - detected_poly[:, 0] = np.clip( - detected_poly[:, 0], a_min=0, a_max=src_w) - detected_poly[:, 1] = np.clip( - detected_poly[:, 1], a_min=0, a_max=src_h) - - if len(keep_str) < 2: - continue - - keep_str_list.append(keep_str) - detected_poly = np.round(detected_poly).astype('int32') - if self.valid_set == 'partvgg': - middle_point = len(detected_poly) // 2 - detected_poly = detected_poly[ - [0, middle_point - 1, middle_point, -1], :] - poly_list.append(detected_poly) - elif self.valid_set == 'totaltext': - poly_list.append(detected_poly) - else: - print('--> Not supported format.') - exit(-1) - data = { - 'points': poly_list, - 'texts': keep_str_list, - } - return data diff --git a/backend/ppocr/utils/e2e_utils/visual.py b/backend/ppocr/utils/e2e_utils/visual.py deleted file mode 100644 index e6e4fd0..0000000 --- a/backend/ppocr/utils/e2e_utils/visual.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import cv2 -import time - - -def resize_image(im, max_side_len=512): - """ - resize image to a size multiple of max_stride which is required by the network - :param im: the resized image - :param max_side_len: limit of max image size to avoid out of memory in gpu - :return: the resized image and the resize ratio - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - - if resize_h > resize_w: - ratio = float(max_side_len) / resize_h - else: - ratio = float(max_side_len) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - - return im, (ratio_h, ratio_w) - - -def resize_image_min(im, max_side_len=512): - """ - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - - if resize_h < resize_w: - ratio = float(max_side_len) / resize_h - else: - ratio = float(max_side_len) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return im, (ratio_h, ratio_w) - - -def resize_image_for_totaltext(im, max_side_len=512): - """ - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - ratio = 1.25 - if h * ratio > max_side_len: - ratio = float(max_side_len) / resize_h - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return im, (ratio_h, ratio_w) - - -def point_pair2poly(point_pair_list): - """ - Transfer vertical point_pairs into poly point in clockwise. - """ - pair_length_list = [] - for point_pair in point_pair_list: - pair_length = np.linalg.norm(point_pair[0] - point_pair[1]) - pair_length_list.append(pair_length) - pair_length_list = np.array(pair_length_list) - pair_info = (pair_length_list.max(), pair_length_list.min(), - pair_length_list.mean()) - - point_num = len(point_pair_list) * 2 - point_list = [0] * point_num - for idx, point_pair in enumerate(point_pair_list): - point_list[idx] = point_pair[0] - point_list[point_num - 1 - idx] = point_pair[1] - return np.array(point_list).reshape(-1, 2), pair_info - - -def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.): - """ - Generate shrink_quad_along_width. - """ - ratio_pair = np.array( - [[begin_width_ratio], [end_width_ratio]], dtype=np.float32) - p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair - p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair - return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]]) - - -def expand_poly_along_width(poly, shrink_ratio_of_width=0.3): - """ - expand poly along width. - """ - point_num = poly.shape[0] - left_quad = np.array( - [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) - left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \ - (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) - left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0) - right_quad = np.array( - [ - poly[point_num // 2 - 2], poly[point_num // 2 - 1], - poly[point_num // 2], poly[point_num // 2 + 1] - ], - dtype=np.float32) - right_ratio = 1.0 + \ - shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \ - (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) - right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio) - poly[0] = left_quad_expand[0] - poly[-1] = left_quad_expand[-1] - poly[point_num // 2 - 1] = right_quad_expand[1] - poly[point_num // 2] = right_quad_expand[2] - return poly - - -def norm2(x, axis=None): - if axis: - return np.sqrt(np.sum(x**2, axis=axis)) - return np.sqrt(np.sum(x**2)) - - -def cos(p1, p2): - return (p1 * p2).sum() / (norm2(p1) * norm2(p2)) diff --git a/backend/ppocr/utils/iou.py b/backend/ppocr/utils/iou.py deleted file mode 100644 index 35459f5..0000000 --- a/backend/ppocr/utils/iou.py +++ /dev/null @@ -1,54 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/whai362/PSENet/blob/python3/models/loss/iou.py -""" - -import paddle - -EPS = 1e-6 - - -def iou_single(a, b, mask, n_class): - valid = mask == 1 - a = a.masked_select(valid) - b = b.masked_select(valid) - miou = [] - for i in range(n_class): - if a.shape == [0] and a.shape == b.shape: - inter = paddle.to_tensor(0.0) - union = paddle.to_tensor(0.0) - else: - inter = ((a == i).logical_and(b == i)).astype('float32') - union = ((a == i).logical_or(b == i)).astype('float32') - miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS)) - miou = sum(miou) / len(miou) - return miou - - -def iou(a, b, mask, n_class=2, reduce=True): - batch_size = a.shape[0] - - a = a.reshape([batch_size, -1]) - b = b.reshape([batch_size, -1]) - mask = mask.reshape([batch_size, -1]) - - iou = paddle.zeros((batch_size, ), dtype='float32') - for i in range(batch_size): - iou[i] = iou_single(a[i], b[i], mask[i], n_class) - - if reduce: - iou = paddle.mean(iou) - return iou diff --git a/backend/ppocr/utils/loggers/__init__.py b/backend/ppocr/utils/loggers/__init__.py deleted file mode 100644 index b1e92f7..0000000 --- a/backend/ppocr/utils/loggers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .vdl_logger import VDLLogger -from .wandb_logger import WandbLogger -from .loggers import Loggers diff --git a/backend/ppocr/utils/loggers/base_logger.py b/backend/ppocr/utils/loggers/base_logger.py deleted file mode 100644 index 3a7fc35..0000000 --- a/backend/ppocr/utils/loggers/base_logger.py +++ /dev/null @@ -1,15 +0,0 @@ -import os -from abc import ABC, abstractmethod - -class BaseLogger(ABC): - def __init__(self, save_dir): - self.save_dir = save_dir - os.makedirs(self.save_dir, exist_ok=True) - - @abstractmethod - def log_metrics(self, metrics, prefix=None): - pass - - @abstractmethod - def close(self): - pass \ No newline at end of file diff --git a/backend/ppocr/utils/loggers/loggers.py b/backend/ppocr/utils/loggers/loggers.py deleted file mode 100644 index 2601466..0000000 --- a/backend/ppocr/utils/loggers/loggers.py +++ /dev/null @@ -1,18 +0,0 @@ -from .wandb_logger import WandbLogger - -class Loggers(object): - def __init__(self, loggers): - super().__init__() - self.loggers = loggers - - def log_metrics(self, metrics, prefix=None, step=None): - for logger in self.loggers: - logger.log_metrics(metrics, prefix=prefix, step=step) - - def log_model(self, is_best, prefix, metadata=None): - for logger in self.loggers: - logger.log_model(is_best=is_best, prefix=prefix, metadata=metadata) - - def close(self): - for logger in self.loggers: - logger.close() \ No newline at end of file diff --git a/backend/ppocr/utils/loggers/vdl_logger.py b/backend/ppocr/utils/loggers/vdl_logger.py deleted file mode 100644 index c345f93..0000000 --- a/backend/ppocr/utils/loggers/vdl_logger.py +++ /dev/null @@ -1,21 +0,0 @@ -from .base_logger import BaseLogger -from visualdl import LogWriter - -class VDLLogger(BaseLogger): - def __init__(self, save_dir): - super().__init__(save_dir) - self.vdl_writer = LogWriter(logdir=save_dir) - - def log_metrics(self, metrics, prefix=None, step=None): - if not prefix: - prefix = "" - updated_metrics = {prefix + "/" + k: v for k, v in metrics.items()} - - for k, v in updated_metrics.items(): - self.vdl_writer.add_scalar(k, v, step) - - def log_model(self, is_best, prefix, metadata=None): - pass - - def close(self): - self.vdl_writer.close() \ No newline at end of file diff --git a/backend/ppocr/utils/loggers/wandb_logger.py b/backend/ppocr/utils/loggers/wandb_logger.py deleted file mode 100644 index 5c805f4..0000000 --- a/backend/ppocr/utils/loggers/wandb_logger.py +++ /dev/null @@ -1,78 +0,0 @@ -import os -from .base_logger import BaseLogger - -class WandbLogger(BaseLogger): - def __init__(self, - project=None, - name=None, - id=None, - entity=None, - save_dir=None, - config=None, - **kwargs): - try: - import wandb - self.wandb = wandb - except ModuleNotFoundError: - raise ModuleNotFoundError( - "Please install wandb using `pip install wandb`" - ) - - self.project = project - self.name = name - self.id = id - self.save_dir = save_dir - self.config = config - self.kwargs = kwargs - self.entity = entity - self._run = None - self._wandb_init = dict( - project=self.project, - name=self.name, - id=self.id, - entity=self.entity, - dir=self.save_dir, - resume="allow" - ) - self._wandb_init.update(**kwargs) - - _ = self.run - - if self.config: - self.run.settings_config.update(self.config) - - @property - def run(self): - if self._run is None: - if self.wandb.run is not None: - logger.info( - "There is a wandb run already in progress " - "and newly created instances of `WandbLogger` will reuse" - " this run. If this is not desired, call `wandb.finish()`" - "before instantiating `WandbLogger`." - ) - self._run = self.wandb.run - else: - self._run = self.wandb.init(**self._wandb_init) - return self._run - - def log_metrics(self, metrics, prefix=None, step=None): - if not prefix: - prefix = "" - updated_metrics = {prefix.lower() + "/" + k: v for k, v in metrics.items()} - - self.run.log(updated_metrics, step=step) - - def log_model(self, is_best, prefix, metadata=None): - model_path = os.path.join(self.save_dir, prefix + '.pdparams') - artifact = self.wandb.Artifact('model-{}'.format(self.run.id), type='model', metadata=metadata) - artifact.add_file(model_path, name="model_ckpt.pdparams") - - aliases = [prefix] - if is_best: - aliases.append("best") - - self.run.log_artifact(artifact, aliases=aliases) - - def close(self): - self.run.finish() \ No newline at end of file diff --git a/backend/ppocr/utils/logging.py b/backend/ppocr/utils/logging.py deleted file mode 100644 index 1eac8f3..0000000 --- a/backend/ppocr/utils/logging.py +++ /dev/null @@ -1,71 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is refer from: -https://github.com/WenmuZhou/PytorchOCR/blob/master/torchocr/utils/logging.py -""" - -import os -import sys -import logging -import functools -import paddle.distributed as dist - -logger_initialized = {} - - -@functools.lru_cache() -def get_logger(name='ppocr', log_file=None, log_level=logging.DEBUG): - """Initialize and get a logger by name. - If the logger has not been initialized, this method will initialize the - logger by adding one or two handlers, otherwise the initialized logger will - be directly returned. During initialization, a StreamHandler will always be - added. If `log_file` is specified a FileHandler will also be added. - Args: - name (str): Logger name. - log_file (str | None): The log filename. If specified, a FileHandler - will be added to the logger. - log_level (int): The logger level. Note that only the process of - rank 0 is affected, and other processes will set the level to - "Error" thus be silent most of the time. - Returns: - logging.Logger: The expected logger. - """ - logger = logging.getLogger(name) - if name in logger_initialized: - return logger - for logger_name in logger_initialized: - if name.startswith(logger_name): - return logger - - formatter = logging.Formatter( - '[%(asctime)s] %(name)s %(levelname)s: %(message)s', - datefmt="%Y/%m/%d %H:%M:%S") - - stream_handler = logging.StreamHandler(stream=sys.stdout) - stream_handler.setFormatter(formatter) - logger.addHandler(stream_handler) - if log_file is not None and dist.get_rank() == 0: - log_file_folder = os.path.split(log_file)[0] - os.makedirs(log_file_folder, exist_ok=True) - file_handler = logging.FileHandler(log_file, 'a') - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) - if dist.get_rank() == 0: - logger.setLevel(log_level) - else: - logger.setLevel(logging.ERROR) - logger_initialized[name] = True - logger.propagate = False - return logger diff --git a/backend/ppocr/utils/network.py b/backend/ppocr/utils/network.py deleted file mode 100644 index 118d1be..0000000 --- a/backend/ppocr/utils/network.py +++ /dev/null @@ -1,84 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import tarfile -import requests -from tqdm import tqdm - -from ppocr.utils.logging import get_logger - - -def download_with_progressbar(url, save_path): - logger = get_logger() - response = requests.get(url, stream=True) - if response.status_code == 200: - total_size_in_bytes = int(response.headers.get('content-length', 1)) - block_size = 1024 # 1 Kibibyte - progress_bar = tqdm( - total=total_size_in_bytes, unit='iB', unit_scale=True) - with open(save_path, 'wb') as file: - for data in response.iter_content(block_size): - progress_bar.update(len(data)) - file.write(data) - progress_bar.close() - else: - logger.error("Something went wrong while downloading models") - sys.exit(0) - - -def maybe_download(model_storage_directory, url): - # using custom model - tar_file_name_list = [ - 'inference.pdiparams', 'inference.pdiparams.info', 'inference.pdmodel' - ] - if not os.path.exists( - os.path.join(model_storage_directory, 'inference.pdiparams') - ) or not os.path.exists( - os.path.join(model_storage_directory, 'inference.pdmodel')): - assert url.endswith('.tar'), 'Only supports tar compressed package' - tmp_path = os.path.join(model_storage_directory, url.split('/')[-1]) - print('download {} to {}'.format(url, tmp_path)) - os.makedirs(model_storage_directory, exist_ok=True) - download_with_progressbar(url, tmp_path) - with tarfile.open(tmp_path, 'r') as tarObj: - for member in tarObj.getmembers(): - filename = None - for tar_file_name in tar_file_name_list: - if tar_file_name in member.name: - filename = tar_file_name - if filename is None: - continue - file = tarObj.extractfile(member) - with open( - os.path.join(model_storage_directory, filename), - 'wb') as f: - f.write(file.read()) - os.remove(tmp_path) - - -def is_link(s): - return s is not None and s.startswith('http') - - -def confirm_model_dir_url(model_dir, default_model_dir, default_url): - url = default_url - if model_dir is None or is_link(model_dir): - if is_link(model_dir): - url = model_dir - file_name = url.split('/')[-1][:-4] - model_dir = default_model_dir - model_dir = os.path.join(model_dir, file_name) - return model_dir, url diff --git a/backend/ppocr/utils/poly_nms.py b/backend/ppocr/utils/poly_nms.py deleted file mode 100644 index 9dcb3d2..0000000 --- a/backend/ppocr/utils/poly_nms.py +++ /dev/null @@ -1,146 +0,0 @@ -# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from shapely.geometry import Polygon - - -def points2polygon(points): - """Convert k points to 1 polygon. - - Args: - points (ndarray or list): A ndarray or a list of shape (2k) - that indicates k points. - - Returns: - polygon (Polygon): A polygon object. - """ - if isinstance(points, list): - points = np.array(points) - - assert isinstance(points, np.ndarray) - assert (points.size % 2 == 0) and (points.size >= 8) - - point_mat = points.reshape([-1, 2]) - return Polygon(point_mat) - - -def poly_intersection(poly_det, poly_gt, buffer=0.0001): - """Calculate the intersection area between two polygon. - - Args: - poly_det (Polygon): A polygon predicted by detector. - poly_gt (Polygon): A gt polygon. - - Returns: - intersection_area (float): The intersection area between two polygons. - """ - assert isinstance(poly_det, Polygon) - assert isinstance(poly_gt, Polygon) - - if buffer == 0: - poly_inter = poly_det & poly_gt - else: - poly_inter = poly_det.buffer(buffer) & poly_gt.buffer(buffer) - return poly_inter.area, poly_inter - - -def poly_union(poly_det, poly_gt): - """Calculate the union area between two polygon. - - Args: - poly_det (Polygon): A polygon predicted by detector. - poly_gt (Polygon): A gt polygon. - - Returns: - union_area (float): The union area between two polygons. - """ - assert isinstance(poly_det, Polygon) - assert isinstance(poly_gt, Polygon) - - area_det = poly_det.area - area_gt = poly_gt.area - area_inters, _ = poly_intersection(poly_det, poly_gt) - return area_det + area_gt - area_inters - - -def valid_boundary(x, with_score=True): - num = len(x) - if num < 8: - return False - if num % 2 == 0 and (not with_score): - return True - if num % 2 == 1 and with_score: - return True - - return False - - -def boundary_iou(src, target): - """Calculate the IOU between two boundaries. - - Args: - src (list): Source boundary. - target (list): Target boundary. - - Returns: - iou (float): The iou between two boundaries. - """ - assert valid_boundary(src, False) - assert valid_boundary(target, False) - src_poly = points2polygon(src) - target_poly = points2polygon(target) - - return poly_iou(src_poly, target_poly) - - -def poly_iou(poly_det, poly_gt): - """Calculate the IOU between two polygons. - - Args: - poly_det (Polygon): A polygon predicted by detector. - poly_gt (Polygon): A gt polygon. - - Returns: - iou (float): The IOU between two polygons. - """ - assert isinstance(poly_det, Polygon) - assert isinstance(poly_gt, Polygon) - area_inters, _ = poly_intersection(poly_det, poly_gt) - area_union = poly_union(poly_det, poly_gt) - if area_union == 0: - return 0.0 - return area_inters / area_union - - -def poly_nms(polygons, threshold): - assert isinstance(polygons, list) - - polygons = np.array(sorted(polygons, key=lambda x: x[-1])) - - keep_poly = [] - index = [i for i in range(polygons.shape[0])] - - while len(index) > 0: - keep_poly.append(polygons[index[-1]].tolist()) - A = polygons[index[-1]][:-1] - index = np.delete(index, -1) - iou_list = np.zeros((len(index), )) - for i in range(len(index)): - B = polygons[index[i]][:-1] - iou_list[i] = boundary_iou(A, B) - remove_index = np.where(iou_list > threshold) - index = np.delete(index, remove_index) - - return keep_poly diff --git a/backend/ppocr/utils/profiler.py b/backend/ppocr/utils/profiler.py deleted file mode 100644 index c4e28bc..0000000 --- a/backend/ppocr/utils/profiler.py +++ /dev/null @@ -1,110 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import paddle - -# A global variable to record the number of calling times for profiler -# functions. It is used to specify the tracing range of training steps. -_profiler_step_id = 0 - -# A global variable to avoid parsing from string every time. -_profiler_options = None - - -class ProfilerOptions(object): - ''' - Use a string to initialize a ProfilerOptions. - The string should be in the format: "key1=value1;key2=value;key3=value3". - For example: - "profile_path=model.profile" - "batch_range=[50, 60]; profile_path=model.profile" - "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile" - ProfilerOptions supports following key-value pair: - batch_range - a integer list, e.g. [100, 110]. - state - a string, the optional values are 'CPU', 'GPU' or 'All'. - sorted_key - a string, the optional values are 'calls', 'total', - 'max', 'min' or 'ave. - tracer_option - a string, the optional values are 'Default', 'OpDetail', - 'AllOpDetail'. - profile_path - a string, the path to save the serialized profile data, - which can be used to generate a timeline. - exit_on_finished - a boolean. - ''' - - def __init__(self, options_str): - assert isinstance(options_str, str) - - self._options = { - 'batch_range': [10, 20], - 'state': 'All', - 'sorted_key': 'total', - 'tracer_option': 'Default', - 'profile_path': '/tmp/profile', - 'exit_on_finished': True - } - self._parse_from_string(options_str) - - def _parse_from_string(self, options_str): - for kv in options_str.replace(' ', '').split(';'): - key, value = kv.split('=') - if key == 'batch_range': - value_list = value.replace('[', '').replace(']', '').split(',') - value_list = list(map(int, value_list)) - if len(value_list) >= 2 and value_list[0] >= 0 and value_list[ - 1] > value_list[0]: - self._options[key] = value_list - elif key == 'exit_on_finished': - self._options[key] = value.lower() in ("yes", "true", "t", "1") - elif key in [ - 'state', 'sorted_key', 'tracer_option', 'profile_path' - ]: - self._options[key] = value - - def __getitem__(self, name): - if self._options.get(name, None) is None: - raise ValueError( - "ProfilerOptions does not have an option named %s." % name) - return self._options[name] - - -def add_profiler_step(options_str=None): - ''' - Enable the operator-level timing using PaddlePaddle's profiler. - The profiler uses a independent variable to count the profiler steps. - One call of this function is treated as a profiler step. - - Args: - profiler_options - a string to initialize the ProfilerOptions. - Default is None, and the profiler is disabled. - ''' - if options_str is None: - return - - global _profiler_step_id - global _profiler_options - - if _profiler_options is None: - _profiler_options = ProfilerOptions(options_str) - - if _profiler_step_id == _profiler_options['batch_range'][0]: - paddle.utils.profiler.start_profiler( - _profiler_options['state'], _profiler_options['tracer_option']) - elif _profiler_step_id == _profiler_options['batch_range'][1]: - paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'], - _profiler_options['profile_path']) - if _profiler_options['exit_on_finished']: - sys.exit(0) - - _profiler_step_id += 1 diff --git a/backend/ppocr/utils/save_load.py b/backend/ppocr/utils/save_load.py deleted file mode 100644 index b09f1db..0000000 --- a/backend/ppocr/utils/save_load.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import errno -import os -import pickle -import six - -import paddle - -from ppocr.utils.logging import get_logger - -__all__ = ['load_model'] - - -def _mkdir_if_not_exist(path, logger): - """ - mkdir if not exists, ignore the exception when multiprocess mkdir together - """ - if not os.path.exists(path): - try: - os.makedirs(path) - except OSError as e: - if e.errno == errno.EEXIST and os.path.isdir(path): - logger.warning( - 'be happy if some process has already created {}'.format( - path)) - else: - raise OSError('Failed to mkdir {}'.format(path)) - - -def load_model(config, model, optimizer=None, model_type='det'): - """ - load model from checkpoint or pretrained_model - """ - logger = get_logger() - global_config = config['Global'] - checkpoints = global_config.get('checkpoints') - pretrained_model = global_config.get('pretrained_model') - best_model_dict = {} - - if model_type == 'vqa': - checkpoints = config['Architecture']['Backbone']['checkpoints'] - # load vqa method metric - if checkpoints: - if os.path.exists(os.path.join(checkpoints, 'metric.states')): - with open(os.path.join(checkpoints, 'metric.states'), - 'rb') as f: - states_dict = pickle.load(f) if six.PY2 else pickle.load( - f, encoding='latin1') - best_model_dict = states_dict.get('best_model_dict', {}) - if 'epoch' in states_dict: - best_model_dict['start_epoch'] = states_dict['epoch'] + 1 - logger.info("resume from {}".format(checkpoints)) - - if optimizer is not None: - if checkpoints[-1] in ['/', '\\']: - checkpoints = checkpoints[:-1] - if os.path.exists(checkpoints + '.pdopt'): - optim_dict = paddle.load(checkpoints + '.pdopt') - optimizer.set_state_dict(optim_dict) - else: - logger.warning( - "{}.pdopt is not exists, params of optimizer is not loaded". - format(checkpoints)) - return best_model_dict - - if checkpoints: - if checkpoints.endswith('.pdparams'): - checkpoints = checkpoints.replace('.pdparams', '') - assert os.path.exists(checkpoints + ".pdparams"), \ - "The {}.pdparams does not exists!".format(checkpoints) - - # load params from trained model - params = paddle.load(checkpoints + '.pdparams') - state_dict = model.state_dict() - new_state_dict = {} - for key, value in state_dict.items(): - if key not in params: - logger.warning("{} not in loaded params {} !".format( - key, params.keys())) - continue - pre_value = params[key] - if list(value.shape) == list(pre_value.shape): - new_state_dict[key] = pre_value - else: - logger.warning( - "The shape of model params {} {} not matched with loaded params shape {} !". - format(key, value.shape, pre_value.shape)) - model.set_state_dict(new_state_dict) - - if optimizer is not None: - if os.path.exists(checkpoints + '.pdopt'): - optim_dict = paddle.load(checkpoints + '.pdopt') - optimizer.set_state_dict(optim_dict) - else: - logger.warning( - "{}.pdopt is not exists, params of optimizer is not loaded". - format(checkpoints)) - - if os.path.exists(checkpoints + '.states'): - with open(checkpoints + '.states', 'rb') as f: - states_dict = pickle.load(f) if six.PY2 else pickle.load( - f, encoding='latin1') - best_model_dict = states_dict.get('best_model_dict', {}) - if 'epoch' in states_dict: - best_model_dict['start_epoch'] = states_dict['epoch'] + 1 - logger.info("resume from {}".format(checkpoints)) - elif pretrained_model: - load_pretrained_params(model, pretrained_model) - else: - logger.info('train from scratch') - return best_model_dict - - -def load_pretrained_params(model, path): - logger = get_logger() - if path.endswith('.pdparams'): - path = path.replace('.pdparams', '') - assert os.path.exists(path + ".pdparams"), \ - "The {}.pdparams does not exists!".format(path) - - params = paddle.load(path + '.pdparams') - state_dict = model.state_dict() - new_state_dict = {} - for k1 in params.keys(): - if k1 not in state_dict.keys(): - logger.warning("The pretrained params {} not in model".format(k1)) - else: - if list(state_dict[k1].shape) == list(params[k1].shape): - new_state_dict[k1] = params[k1] - else: - logger.warning( - "The shape of model params {} {} not matched with loaded params {} {} !". - format(k1, state_dict[k1].shape, k1, params[k1].shape)) - model.set_state_dict(new_state_dict) - logger.info("load pretrain successful from {}".format(path)) - return model - - -def save_model(model, - optimizer, - model_path, - logger, - config, - is_best=False, - prefix='ppocr', - **kwargs): - """ - save model to the target path - """ - _mkdir_if_not_exist(model_path, logger) - model_prefix = os.path.join(model_path, prefix) - paddle.save(optimizer.state_dict(), model_prefix + '.pdopt') - if config['Architecture']["model_type"] != 'vqa': - paddle.save(model.state_dict(), model_prefix + '.pdparams') - metric_prefix = model_prefix - else: - if config['Global']['distributed']: - model._layers.backbone.model.save_pretrained(model_prefix) - else: - model.backbone.model.save_pretrained(model_prefix) - metric_prefix = os.path.join(model_prefix, 'metric') - # save metric and config - if is_best: - with open(metric_prefix + '.states', 'wb') as f: - pickle.dump(kwargs, f, protocol=2) - logger.info('save best model is to {}'.format(model_prefix)) - else: - logger.info("save model in {}".format(model_prefix)) diff --git a/backend/ppocr/utils/stats.py b/backend/ppocr/utils/stats.py deleted file mode 100755 index 179b008..0000000 --- a/backend/ppocr/utils/stats.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import collections -import numpy as np -import datetime - -__all__ = ['TrainingStats', 'Time'] - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self, window_size): - self.deque = collections.deque(maxlen=window_size) - - def add_value(self, value): - self.deque.append(value) - - def get_median_value(self): - return np.median(self.deque) - - -def Time(): - return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') - - -class TrainingStats(object): - def __init__(self, window_size, stats_keys): - self.window_size = window_size - self.smoothed_losses_and_metrics = { - key: SmoothedValue(window_size) - for key in stats_keys - } - - def update(self, stats): - for k, v in stats.items(): - if k not in self.smoothed_losses_and_metrics: - self.smoothed_losses_and_metrics[k] = SmoothedValue( - self.window_size) - self.smoothed_losses_and_metrics[k].add_value(v) - - def get(self, extras=None): - stats = collections.OrderedDict() - if extras: - for k, v in extras.items(): - stats[k] = v - for k, v in self.smoothed_losses_and_metrics.items(): - stats[k] = round(v.get_median_value(), 6) - - return stats - - def log(self, extras=None): - d = self.get(extras) - strs = [] - for k, v in d.items(): - strs.append('{}: {:x<6f}'.format(k, v)) - strs = ', '.join(strs) - return strs diff --git a/backend/ppocr/utils/utility.py b/backend/ppocr/utils/utility.py deleted file mode 100755 index 4a25ff8..0000000 --- a/backend/ppocr/utils/utility.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import imghdr -import cv2 -import random -import numpy as np -import paddle - - -def print_dict(d, logger, delimiter=0): - """ - Recursively visualize a dict and - indenting acrrording by the relationship of keys. - """ - for k, v in sorted(d.items()): - if isinstance(v, dict): - logger.info("{}{} : ".format(delimiter * " ", str(k))) - print_dict(v, logger, delimiter + 4) - elif isinstance(v, list) and len(v) >= 1 and isinstance(v[0], dict): - logger.info("{}{} : ".format(delimiter * " ", str(k))) - for value in v: - print_dict(value, logger, delimiter + 4) - else: - logger.info("{}{} : {}".format(delimiter * " ", k, v)) - - -def get_check_global_params(mode): - check_params = ['use_gpu', 'max_text_length', 'image_shape', \ - 'image_shape', 'character_type', 'loss_type'] - if mode == "train_eval": - check_params = check_params + [ \ - 'train_batch_size_per_card', 'test_batch_size_per_card'] - elif mode == "test": - check_params = check_params + ['test_batch_size_per_card'] - return check_params - - -def _check_image_file(path): - img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif'} - return any([path.lower().endswith(e) for e in img_end]) - - -def get_image_file_list(img_file): - imgs_lists = [] - if img_file is None or not os.path.exists(img_file): - raise Exception("not found any img file in {}".format(img_file)) - - img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif'} - if os.path.isfile(img_file) and _check_image_file(img_file): - imgs_lists.append(img_file) - elif os.path.isdir(img_file): - for single_file in os.listdir(img_file): - file_path = os.path.join(img_file, single_file) - if os.path.isfile(file_path) and _check_image_file(file_path): - imgs_lists.append(file_path) - if len(imgs_lists) == 0: - raise Exception("not found any img file in {}".format(img_file)) - imgs_lists = sorted(imgs_lists) - return imgs_lists - - -def check_and_read_gif(img_path): - if os.path.basename(img_path)[-3:] in ['gif', 'GIF']: - gif = cv2.VideoCapture(img_path) - ret, frame = gif.read() - if not ret: - logger = logging.getLogger('ppocr') - logger.info("Cannot read {}. This gif image maybe corrupted.") - return None, False - if len(frame.shape) == 2 or frame.shape[-1] == 1: - frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) - imgvalue = frame[:, :, ::-1] - return imgvalue, True - return None, False - - -def load_vqa_bio_label_maps(label_map_path): - with open(label_map_path, "r", encoding='utf-8') as fin: - lines = fin.readlines() - lines = [line.strip() for line in lines] - if "O" not in lines: - lines.insert(0, "O") - labels = [] - for line in lines: - if line == "O": - labels.append("O") - else: - labels.append("B-" + line) - labels.append("I-" + line) - label2id_map = {label: idx for idx, label in enumerate(labels)} - id2label_map = {idx: label for idx, label in enumerate(labels)} - return label2id_map, id2label_map - - -def set_seed(seed=1024): - random.seed(seed) - np.random.seed(seed) - paddle.seed(seed) - - -class AverageMeter: - def __init__(self): - self.reset() - - def reset(self): - """reset""" - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - """update""" - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count diff --git a/backend/ppocr/utils/visual.py b/backend/ppocr/utils/visual.py deleted file mode 100644 index 7a8c167..0000000 --- a/backend/ppocr/utils/visual.py +++ /dev/null @@ -1,98 +0,0 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import numpy as np -from PIL import Image, ImageDraw, ImageFont - - -def draw_ser_results(image, - ocr_results, - font_path="doc/fonts/simfang.ttf", - font_size=18): - np.random.seed(2021) - color = (np.random.permutation(range(255)), - np.random.permutation(range(255)), - np.random.permutation(range(255))) - color_map = { - idx: (color[0][idx], color[1][idx], color[2][idx]) - for idx in range(1, 255) - } - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - elif isinstance(image, str) and os.path.isfile(image): - image = Image.open(image).convert('RGB') - img_new = image.copy() - draw = ImageDraw.Draw(img_new) - - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - for ocr_info in ocr_results: - if ocr_info["pred_id"] not in color_map: - continue - color = color_map[ocr_info["pred_id"]] - text = "{}: {}".format(ocr_info["pred"], ocr_info["text"]) - - draw_box_txt(ocr_info["bbox"], text, draw, font, font_size, color) - - img_new = Image.blend(image, img_new, 0.5) - return np.array(img_new) - - -def draw_box_txt(bbox, text, draw, font, font_size, color): - # draw ocr results outline - bbox = ((bbox[0], bbox[1]), (bbox[2], bbox[3])) - draw.rectangle(bbox, fill=color) - - # draw ocr results - start_y = max(0, bbox[0][1] - font_size) - tw = font.getsize(text)[0] - draw.rectangle( - [(bbox[0][0] + 1, start_y), (bbox[0][0] + tw + 1, start_y + font_size)], - fill=(0, 0, 255)) - draw.text((bbox[0][0] + 1, start_y), text, fill=(255, 255, 255), font=font) - - -def draw_re_results(image, - result, - font_path="doc/fonts/simfang.ttf", - font_size=18): - np.random.seed(0) - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - elif isinstance(image, str) and os.path.isfile(image): - image = Image.open(image).convert('RGB') - img_new = image.copy() - draw = ImageDraw.Draw(img_new) - - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - color_head = (0, 0, 255) - color_tail = (255, 0, 0) - color_line = (0, 255, 0) - - for ocr_info_head, ocr_info_tail in result: - draw_box_txt(ocr_info_head["bbox"], ocr_info_head["text"], draw, font, - font_size, color_head) - draw_box_txt(ocr_info_tail["bbox"], ocr_info_tail["text"], draw, font, - font_size, color_tail) - - center_head = ( - (ocr_info_head['bbox'][0] + ocr_info_head['bbox'][2]) // 2, - (ocr_info_head['bbox'][1] + ocr_info_head['bbox'][3]) // 2) - center_tail = ( - (ocr_info_tail['bbox'][0] + ocr_info_tail['bbox'][2]) // 2, - (ocr_info_tail['bbox'][1] + ocr_info_tail['bbox'][3]) // 2) - - draw.line([center_head, center_tail], fill=color_line, width=5) - - img_new = Image.blend(image, img_new, 0.5) - return np.array(img_new) diff --git a/backend/tools/infer/predict_cls.py b/backend/tools/infer/predict_cls.py deleted file mode 100755 index ed2f47c..0000000 --- a/backend/tools/infer/predict_cls.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import copy -import numpy as np -import math -import time -import traceback - -import tools.infer.utility as utility -from ppocr.postprocess import build_post_process -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif - -logger = get_logger() - - -class TextClassifier(object): - def __init__(self, args): - self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] - self.cls_batch_num = args.cls_batch_num - self.cls_thresh = args.cls_thresh - postprocess_params = { - 'name': 'ClsPostProcess', - "label_list": args.label_list, - } - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors, _ = \ - utility.create_predictor(args, 'cls', logger) - self.use_onnx = args.use_onnx - - def resize_norm_img(self, img): - imgC, imgH, imgW = self.cls_image_shape - h = img.shape[0] - w = img.shape[1] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - if self.cls_image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def __call__(self, img_list): - img_list = copy.deepcopy(img_list) - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the cls process - indices = np.argsort(np.array(width_list)) - - cls_res = [['', 0.0]] * img_num - batch_num = self.cls_batch_num - elapse = 0 - for beg_img_no in range(0, img_num, batch_num): - - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - starttime = time.time() - for ino in range(beg_img_no, end_img_no): - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img(img_list[indices[ino]]) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, input_dict) - prob_out = outputs[0] - else: - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.run() - prob_out = self.output_tensors[0].copy_to_cpu() - self.predictor.try_shrink_memory() - cls_result = self.postprocess_op(prob_out) - elapse += time.time() - starttime - for rno in range(len(cls_result)): - label, score = cls_result[rno] - cls_res[indices[beg_img_no + rno]] = [label, score] - if '180' in label and score > self.cls_thresh: - img_list[indices[beg_img_no + rno]] = cv2.rotate( - img_list[indices[beg_img_no + rno]], 1) - return img_list, cls_res, elapse - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - text_classifier = TextClassifier(args) - valid_image_file_list = [] - img_list = [] - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - valid_image_file_list.append(image_file) - img_list.append(img) - try: - img_list, cls_res, predict_time = text_classifier(img_list) - except Exception as E: - logger.info(traceback.format_exc()) - logger.info(E) - exit() - for ino in range(len(img_list)): - logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], - cls_res[ino])) - - -if __name__ == "__main__": - main(utility.parse_args()) diff --git a/backend/tools/infer/predict_det.py b/backend/tools/infer/predict_det.py deleted file mode 100755 index 5f2675d..0000000 --- a/backend/tools/infer/predict_det.py +++ /dev/null @@ -1,302 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import numpy as np -import time -import sys - -import tools.infer.utility as utility -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppocr.data import create_operators, transform -from ppocr.postprocess import build_post_process -import json -logger = get_logger() - - -class TextDetector(object): - def __init__(self, args): - self.args = args - self.det_algorithm = args.det_algorithm - self.use_onnx = args.use_onnx - pre_process_list = [{ - 'DetResizeForTest': { - 'limit_side_len': args.det_limit_side_len, - 'limit_type': args.det_limit_type, - } - }, { - 'NormalizeImage': { - 'std': [0.229, 0.224, 0.225], - 'mean': [0.485, 0.456, 0.406], - 'scale': '1./255.', - 'order': 'hwc' - } - }, { - 'ToCHWImage': None - }, { - 'KeepKeys': { - 'keep_keys': ['image', 'shape'] - } - }] - postprocess_params = {} - if self.det_algorithm == "DB": - postprocess_params['name'] = 'DBPostProcess' - postprocess_params["thresh"] = args.det_db_thresh - postprocess_params["box_thresh"] = args.det_db_box_thresh - postprocess_params["max_candidates"] = 1000 - postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio - postprocess_params["use_dilation"] = args.use_dilation - postprocess_params["score_mode"] = args.det_db_score_mode - elif self.det_algorithm == "EAST": - postprocess_params['name'] = 'EASTPostProcess' - postprocess_params["score_thresh"] = args.det_east_score_thresh - postprocess_params["cover_thresh"] = args.det_east_cover_thresh - postprocess_params["nms_thresh"] = args.det_east_nms_thresh - elif self.det_algorithm == "SAST": - pre_process_list[0] = { - 'DetResizeForTest': { - 'resize_long': args.det_limit_side_len - } - } - postprocess_params['name'] = 'SASTPostProcess' - postprocess_params["score_thresh"] = args.det_sast_score_thresh - postprocess_params["nms_thresh"] = args.det_sast_nms_thresh - self.det_sast_polygon = args.det_sast_polygon - if self.det_sast_polygon: - postprocess_params["sample_pts_num"] = 6 - postprocess_params["expand_scale"] = 1.2 - postprocess_params["shrink_ratio_of_width"] = 0.2 - else: - postprocess_params["sample_pts_num"] = 2 - postprocess_params["expand_scale"] = 1.0 - postprocess_params["shrink_ratio_of_width"] = 0.3 - elif self.det_algorithm == "PSE": - postprocess_params['name'] = 'PSEPostProcess' - postprocess_params["thresh"] = args.det_pse_thresh - postprocess_params["box_thresh"] = args.det_pse_box_thresh - postprocess_params["min_area"] = args.det_pse_min_area - postprocess_params["box_type"] = args.det_pse_box_type - postprocess_params["scale"] = args.det_pse_scale - self.det_pse_box_type = args.det_pse_box_type - elif self.det_algorithm == "FCE": - pre_process_list[0] = { - 'DetResizeForTest': { - 'rescale_img': [1080, 736] - } - } - postprocess_params['name'] = 'FCEPostProcess' - postprocess_params["scales"] = args.scales - postprocess_params["alpha"] = args.alpha - postprocess_params["beta"] = args.beta - postprocess_params["fourier_degree"] = args.fourier_degree - postprocess_params["box_type"] = args.det_fce_box_type - else: - logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) - sys.exit(0) - - self.preprocess_op = create_operators(pre_process_list) - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor( - args, 'det', logger) - - if self.use_onnx: - img_h, img_w = self.input_tensor.shape[2:] - if img_h is not None and img_w is not None and img_h > 0 and img_w > 0: - pre_process_list[0] = { - 'DetResizeForTest': { - 'image_shape': [img_h, img_w] - } - } - self.preprocess_op = create_operators(pre_process_list) - - if args.benchmark: - import auto_log - pid = os.getpid() - gpu_id = utility.get_infer_gpuid() - self.autolog = auto_log.AutoLogger( - model_name="det", - model_precision=args.precision, - batch_size=1, - data_shape="dynamic", - save_path=None, - inference_config=self.config, - pids=pid, - process_name=None, - gpu_ids=gpu_id if args.use_gpu else None, - time_keys=[ - 'preprocess_time', 'inference_time', 'postprocess_time' - ], - warmup=2, - logger=logger) - - def order_points_clockwise(self, pts): - rect = np.zeros((4, 2), dtype="float32") - s = pts.sum(axis=1) - rect[0] = pts[np.argmin(s)] - rect[2] = pts[np.argmax(s)] - diff = np.diff(pts, axis=1) - rect[1] = pts[np.argmin(diff)] - rect[3] = pts[np.argmax(diff)] - return rect - - def clip_det_res(self, points, img_height, img_width): - for pno in range(points.shape[0]): - points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) - points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) - return points - - def filter_tag_det_res(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.order_points_clockwise(box) - box = self.clip_det_res(box, img_height, img_width) - rect_width = int(np.linalg.norm(box[0] - box[1])) - rect_height = int(np.linalg.norm(box[0] - box[3])) - if rect_width <= 3 or rect_height <= 3: - continue - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.clip_det_res(box, img_height, img_width) - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def __call__(self, img): - ori_im = img.copy() - data = {'image': img} - - st = time.time() - - if self.args.benchmark: - self.autolog.times.start() - - data = transform(data, self.preprocess_op) - img, shape_list = data - if img is None: - return None, 0 - img = np.expand_dims(img, axis=0) - shape_list = np.expand_dims(shape_list, axis=0) - img = img.copy() - - if self.args.benchmark: - self.autolog.times.stamp() - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = img - outputs = self.predictor.run(self.output_tensors, input_dict) - else: - self.input_tensor.copy_from_cpu(img) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.args.benchmark: - self.autolog.times.stamp() - - preds = {} - if self.det_algorithm == "EAST": - preds['f_geo'] = outputs[0] - preds['f_score'] = outputs[1] - elif self.det_algorithm == 'SAST': - preds['f_border'] = outputs[0] - preds['f_score'] = outputs[1] - preds['f_tco'] = outputs[2] - preds['f_tvo'] = outputs[3] - elif self.det_algorithm in ['DB', 'PSE']: - preds['maps'] = outputs[0] - elif self.det_algorithm == 'FCE': - for i, output in enumerate(outputs): - preds['level_{}'.format(i)] = output - else: - raise NotImplementedError - - #self.predictor.try_shrink_memory() - post_result = self.postprocess_op(preds, shape_list) - dt_boxes = post_result[0]['points'] - if (self.det_algorithm == "SAST" and self.det_sast_polygon) or ( - self.det_algorithm in ["PSE", "FCE"] and - self.postprocess_op.box_type == 'poly'): - dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape) - else: - dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) - - if self.args.benchmark: - self.autolog.times.end(stamp=True) - et = time.time() - return dt_boxes, et - st - - -if __name__ == "__main__": - args = utility.parse_args() - image_file_list = get_image_file_list(args.image_dir) - text_detector = TextDetector(args) - count = 0 - total_time = 0 - draw_img_save = "./inference_results" - - if args.warmup: - img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8) - for i in range(2): - res = text_detector(img) - - if not os.path.exists(draw_img_save): - os.makedirs(draw_img_save) - save_results = [] - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - st = time.time() - dt_boxes, _ = text_detector(img) - elapse = time.time() - st - if count > 0: - total_time += elapse - count += 1 - save_pred = os.path.basename(image_file) + "\t" + str( - json.dumps([x.tolist() for x in dt_boxes])) + "\n" - save_results.append(save_pred) - logger.info(save_pred) - logger.info("The predict time of {}: {}".format(image_file, elapse)) - src_im = utility.draw_text_det_res(dt_boxes, image_file) - img_name_pure = os.path.split(image_file)[-1] - img_path = os.path.join(draw_img_save, - "det_res_{}".format(img_name_pure)) - cv2.imwrite(img_path, src_im) - logger.info("The visualized image saved in {}".format(img_path)) - - with open(os.path.join(draw_img_save, "det_results.txt"), 'w') as f: - f.writelines(save_results) - f.close() - if args.benchmark: - text_detector.autolog.report() diff --git a/backend/tools/infer/predict_e2e.py b/backend/tools/infer/predict_e2e.py deleted file mode 100755 index fb2859f..0000000 --- a/backend/tools/infer/predict_e2e.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import numpy as np -import time -import sys - -import tools.infer.utility as utility -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppocr.data import create_operators, transform -from ppocr.postprocess import build_post_process - -logger = get_logger() - - -class TextE2E(object): - def __init__(self, args): - self.args = args - self.e2e_algorithm = args.e2e_algorithm - self.use_onnx = args.use_onnx - pre_process_list = [{ - 'E2EResizeForTest': {} - }, { - 'NormalizeImage': { - 'std': [0.229, 0.224, 0.225], - 'mean': [0.485, 0.456, 0.406], - 'scale': '1./255.', - 'order': 'hwc' - } - }, { - 'ToCHWImage': None - }, { - 'KeepKeys': { - 'keep_keys': ['image', 'shape'] - } - }] - postprocess_params = {} - if self.e2e_algorithm == "PGNet": - pre_process_list[0] = { - 'E2EResizeForTest': { - 'max_side_len': args.e2e_limit_side_len, - 'valid_set': 'totaltext' - } - } - postprocess_params['name'] = 'PGPostProcess' - postprocess_params["score_thresh"] = args.e2e_pgnet_score_thresh - postprocess_params["character_dict_path"] = args.e2e_char_dict_path - postprocess_params["valid_set"] = args.e2e_pgnet_valid_set - postprocess_params["mode"] = args.e2e_pgnet_mode - else: - logger.info("unknown e2e_algorithm:{}".format(self.e2e_algorithm)) - sys.exit(0) - - self.preprocess_op = create_operators(pre_process_list) - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors, _ = utility.create_predictor( - args, 'e2e', logger) # paddle.jit.load(args.det_model_dir) - # self.predictor.eval() - - def clip_det_res(self, points, img_height, img_width): - for pno in range(points.shape[0]): - points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) - points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) - return points - - def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.clip_det_res(box, img_height, img_width) - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def __call__(self, img): - - ori_im = img.copy() - data = {'image': img} - data = transform(data, self.preprocess_op) - img, shape_list = data - if img is None: - return None, 0 - img = np.expand_dims(img, axis=0) - shape_list = np.expand_dims(shape_list, axis=0) - img = img.copy() - starttime = time.time() - - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = img - outputs = self.predictor.run(self.output_tensors, input_dict) - preds = {} - preds['f_border'] = outputs[0] - preds['f_char'] = outputs[1] - preds['f_direction'] = outputs[2] - preds['f_score'] = outputs[3] - else: - self.input_tensor.copy_from_cpu(img) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - - preds = {} - if self.e2e_algorithm == 'PGNet': - preds['f_border'] = outputs[0] - preds['f_char'] = outputs[1] - preds['f_direction'] = outputs[2] - preds['f_score'] = outputs[3] - else: - raise NotImplementedError - post_result = self.postprocess_op(preds, shape_list) - points, strs = post_result['points'], post_result['texts'] - dt_boxes = self.filter_tag_det_res_only_clip(points, ori_im.shape) - elapse = time.time() - starttime - return dt_boxes, strs, elapse - - -if __name__ == "__main__": - args = utility.parse_args() - image_file_list = get_image_file_list(args.image_dir) - text_detector = TextE2E(args) - count = 0 - total_time = 0 - draw_img_save = "./inference_results" - if not os.path.exists(draw_img_save): - os.makedirs(draw_img_save) - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - points, strs, elapse = text_detector(img) - if count > 0: - total_time += elapse - count += 1 - logger.info("Predict time of {}: {}".format(image_file, elapse)) - src_im = utility.draw_e2e_res(points, strs, image_file) - img_name_pure = os.path.split(image_file)[-1] - img_path = os.path.join(draw_img_save, - "e2e_res_{}".format(img_name_pure)) - cv2.imwrite(img_path, src_im) - logger.info("The visualized image saved in {}".format(img_path)) - if count > 1: - logger.info("Avg Time: {}".format(total_time / (count - 1))) diff --git a/backend/tools/infer/predict_rec.py b/backend/tools/infer/predict_rec.py deleted file mode 100755 index 3664ef2..0000000 --- a/backend/tools/infer/predict_rec.py +++ /dev/null @@ -1,442 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -from PIL import Image -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import numpy as np -import math -import time -import traceback -import paddle - -import tools.infer.utility as utility -from ppocr.postprocess import build_post_process -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif - -logger = get_logger() - - -class TextRecognizer(object): - def __init__(self, args): - self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] - self.rec_batch_num = args.rec_batch_num - self.rec_algorithm = args.rec_algorithm - postprocess_params = { - 'name': 'CTCLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - if self.rec_algorithm == "SRN": - postprocess_params = { - 'name': 'SRNLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - elif self.rec_algorithm == "RARE": - postprocess_params = { - 'name': 'AttnLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - elif self.rec_algorithm == 'NRTR': - postprocess_params = { - 'name': 'NRTRLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - elif self.rec_algorithm == "SAR": - postprocess_params = { - 'name': 'SARLabelDecode', - "character_dict_path": args.rec_char_dict_path, - "use_space_char": args.use_space_char - } - self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors, self.config = \ - utility.create_predictor(args, 'rec', logger) - self.benchmark = args.benchmark - self.use_onnx = args.use_onnx - if args.benchmark: - import auto_log - pid = os.getpid() - gpu_id = utility.get_infer_gpuid() - self.autolog = auto_log.AutoLogger( - model_name="rec", - model_precision=args.precision, - batch_size=args.rec_batch_num, - data_shape="dynamic", - save_path=None, #args.save_log_path, - inference_config=self.config, - pids=pid, - process_name=None, - gpu_ids=gpu_id if args.use_gpu else None, - time_keys=[ - 'preprocess_time', 'inference_time', 'postprocess_time' - ], - warmup=0, - logger=logger) - - def resize_norm_img(self, img, max_wh_ratio): - imgC, imgH, imgW = self.rec_image_shape - if self.rec_algorithm == 'NRTR': - img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - # return padding_im - image_pil = Image.fromarray(np.uint8(img)) - img = image_pil.resize([100, 32], Image.ANTIALIAS) - img = np.array(img) - norm_img = np.expand_dims(img, -1) - norm_img = norm_img.transpose((2, 0, 1)) - return norm_img.astype(np.float32) / 128. - 1. - - assert imgC == img.shape[2] - imgW = int((imgH * max_wh_ratio)) - if self.use_onnx: - w = self.input_tensor.shape[3:][0] - if w is not None and w > 0: - imgW = w - - h, w = img.shape[:2] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - if self.rec_algorithm == 'RARE': - if resized_w > self.rec_image_shape[2]: - resized_w = self.rec_image_shape[2] - imgW = self.rec_image_shape[2] - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def resize_norm_img_svtr(self, img, image_shape): - - imgC, imgH, imgW = image_shape - resized_image = cv2.resize( - img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) - resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - return resized_image - - def resize_norm_img_srn(self, img, image_shape): - imgC, imgH, imgW = image_shape - - img_black = np.zeros((imgH, imgW)) - im_hei = img.shape[0] - im_wid = img.shape[1] - - if im_wid <= im_hei * 1: - img_new = cv2.resize(img, (imgH * 1, imgH)) - elif im_wid <= im_hei * 2: - img_new = cv2.resize(img, (imgH * 2, imgH)) - elif im_wid <= im_hei * 3: - img_new = cv2.resize(img, (imgH * 3, imgH)) - else: - img_new = cv2.resize(img, (imgW, imgH)) - - img_np = np.asarray(img_new) - img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) - img_black[:, 0:img_np.shape[1]] = img_np - img_black = img_black[:, :, np.newaxis] - - row, col, c = img_black.shape - c = 1 - - return np.reshape(img_black, (c, row, col)).astype(np.float32) - - def srn_other_inputs(self, image_shape, num_heads, max_text_length): - - imgC, imgH, imgW = image_shape - feature_dim = int((imgH / 8) * (imgW / 8)) - - encoder_word_pos = np.array(range(0, feature_dim)).reshape( - (feature_dim, 1)).astype('int64') - gsrm_word_pos = np.array(range(0, max_text_length)).reshape( - (max_text_length, 1)).astype('int64') - - gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) - gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape( - [-1, 1, max_text_length, max_text_length]) - gsrm_slf_attn_bias1 = np.tile( - gsrm_slf_attn_bias1, - [1, num_heads, 1, 1]).astype('float32') * [-1e9] - - gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape( - [-1, 1, max_text_length, max_text_length]) - gsrm_slf_attn_bias2 = np.tile( - gsrm_slf_attn_bias2, - [1, num_heads, 1, 1]).astype('float32') * [-1e9] - - encoder_word_pos = encoder_word_pos[np.newaxis, :] - gsrm_word_pos = gsrm_word_pos[np.newaxis, :] - - return [ - encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2 - ] - - def process_image_srn(self, img, image_shape, num_heads, max_text_length): - norm_img = self.resize_norm_img_srn(img, image_shape) - norm_img = norm_img[np.newaxis, :] - - [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \ - self.srn_other_inputs(image_shape, num_heads, max_text_length) - - gsrm_slf_attn_bias1 = gsrm_slf_attn_bias1.astype(np.float32) - gsrm_slf_attn_bias2 = gsrm_slf_attn_bias2.astype(np.float32) - encoder_word_pos = encoder_word_pos.astype(np.int64) - gsrm_word_pos = gsrm_word_pos.astype(np.int64) - - return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, - gsrm_slf_attn_bias2) - - def resize_norm_img_sar(self, img, image_shape, - width_downsample_ratio=0.25): - imgC, imgH, imgW_min, imgW_max = image_shape - h = img.shape[0] - w = img.shape[1] - valid_ratio = 1.0 - # make sure new_width is an integral multiple of width_divisor. - width_divisor = int(1 / width_downsample_ratio) - # resize - ratio = w / float(h) - resize_w = math.ceil(imgH * ratio) - if resize_w % width_divisor != 0: - resize_w = round(resize_w / width_divisor) * width_divisor - if imgW_min is not None: - resize_w = max(imgW_min, resize_w) - if imgW_max is not None: - valid_ratio = min(1.0, 1.0 * resize_w / imgW_max) - resize_w = min(imgW_max, resize_w) - resized_image = cv2.resize(img, (resize_w, imgH)) - resized_image = resized_image.astype('float32') - # norm - if image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - resize_shape = resized_image.shape - padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32) - padding_im[:, :, 0:resize_w] = resized_image - pad_shape = padding_im.shape - - return padding_im, resize_shape, pad_shape, valid_ratio - - def __call__(self, img_list): - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the recognition process - indices = np.argsort(np.array(width_list)) - rec_res = [['', 0.0]] * img_num - batch_num = self.rec_batch_num - st = time.time() - if self.benchmark: - self.autolog.times.start() - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - imgC, imgH, imgW = self.rec_image_shape - max_wh_ratio = imgW / imgH - # max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - - if self.rec_algorithm == "SAR": - norm_img, _, _, valid_ratio = self.resize_norm_img_sar( - img_list[indices[ino]], self.rec_image_shape) - norm_img = norm_img[np.newaxis, :] - valid_ratio = np.expand_dims(valid_ratio, axis=0) - valid_ratios = [] - valid_ratios.append(valid_ratio) - norm_img_batch.append(norm_img) - elif self.rec_algorithm == "SRN": - norm_img = self.process_image_srn( - img_list[indices[ino]], self.rec_image_shape, 8, 25) - encoder_word_pos_list = [] - gsrm_word_pos_list = [] - gsrm_slf_attn_bias1_list = [] - gsrm_slf_attn_bias2_list = [] - encoder_word_pos_list.append(norm_img[1]) - gsrm_word_pos_list.append(norm_img[2]) - gsrm_slf_attn_bias1_list.append(norm_img[3]) - gsrm_slf_attn_bias2_list.append(norm_img[4]) - norm_img_batch.append(norm_img[0]) - elif self.rec_algorithm == "SVTR": - norm_img = self.resize_norm_img_svtr(img_list[indices[ino]], - self.rec_image_shape) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - else: - norm_img = self.resize_norm_img(img_list[indices[ino]], - max_wh_ratio) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - if self.benchmark: - self.autolog.times.stamp() - - if self.rec_algorithm == "SRN": - encoder_word_pos_list = np.concatenate(encoder_word_pos_list) - gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list) - gsrm_slf_attn_bias1_list = np.concatenate( - gsrm_slf_attn_bias1_list) - gsrm_slf_attn_bias2_list = np.concatenate( - gsrm_slf_attn_bias2_list) - - inputs = [ - norm_img_batch, - encoder_word_pos_list, - gsrm_word_pos_list, - gsrm_slf_attn_bias1_list, - gsrm_slf_attn_bias2_list, - ] - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, - input_dict) - preds = {"predict": outputs[2]} - else: - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle( - input_names[i]) - input_tensor.copy_from_cpu(inputs[i]) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - preds = {"predict": outputs[2]} - elif self.rec_algorithm == "SAR": - valid_ratios = np.concatenate(valid_ratios) - inputs = [ - norm_img_batch, - valid_ratios, - ] - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, - input_dict) - preds = outputs[0] - else: - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle( - input_names[i]) - input_tensor.copy_from_cpu(inputs[i]) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - preds = outputs[0] - else: - if self.use_onnx: - input_dict = {} - input_dict[self.input_tensor.name] = norm_img_batch - outputs = self.predictor.run(self.output_tensors, - input_dict) - preds = outputs[0] - else: - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - if len(outputs) != 1: - preds = outputs - else: - preds = outputs[0] - rec_result = self.postprocess_op(preds) - for rno in range(len(rec_result)): - rec_res[indices[beg_img_no + rno]] = rec_result[rno] - if self.benchmark: - self.autolog.times.end(stamp=True) - return rec_res, time.time() - st - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - text_recognizer = TextRecognizer(args) - valid_image_file_list = [] - img_list = [] - - logger.info( - "In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', " - "if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320" - ) - # warmup 2 times - if args.warmup: - img = np.random.uniform(0, 255, [48, 320, 3]).astype(np.uint8) - for i in range(2): - res = text_recognizer([img] * int(args.rec_batch_num)) - - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - valid_image_file_list.append(image_file) - img_list.append(img) - try: - rec_res, _ = text_recognizer(img_list) - - except Exception as E: - logger.info(traceback.format_exc()) - logger.info(E) - exit() - for ino in range(len(img_list)): - logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], - rec_res[ino])) - if args.benchmark: - text_recognizer.autolog.report() - - -if __name__ == "__main__": - main(utility.parse_args()) diff --git a/backend/tools/infer/predict_system.py b/backend/tools/infer/predict_system.py deleted file mode 100755 index 4af3da7..0000000 --- a/backend/tools/infer/predict_system.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import subprocess - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import copy -import numpy as np -import json -import time -import logging -from PIL import Image -import tools.infer.utility as utility -import tools.infer.predict_rec as predict_rec -import tools.infer.predict_det as predict_det -import tools.infer.predict_cls as predict_cls -from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppocr.utils.logging import get_logger -from tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image -logger = get_logger() - - -class TextSystem(object): - def __init__(self, args): - if not args.show_log: - logger.setLevel(logging.INFO) - - self.text_detector = predict_det.TextDetector(args) - self.text_recognizer = predict_rec.TextRecognizer(args) - self.use_angle_cls = args.use_angle_cls - self.drop_score = args.drop_score - if self.use_angle_cls: - self.text_classifier = predict_cls.TextClassifier(args) - - self.args = args - self.crop_image_res_index = 0 - - def draw_crop_rec_res(self, output_dir, img_crop_list, rec_res): - os.makedirs(output_dir, exist_ok=True) - bbox_num = len(img_crop_list) - for bno in range(bbox_num): - cv2.imwrite( - os.path.join(output_dir, - f"mg_crop_{bno+self.crop_image_res_index}.jpg"), - img_crop_list[bno]) - logger.debug(f"{bno}, {rec_res[bno]}") - self.crop_image_res_index += bbox_num - - def __call__(self, img, cls=True): - ori_im = img.copy() - dt_boxes, elapse = self.text_detector(img) - - if dt_boxes is None: - return None, None - img_crop_list = [] - - dt_boxes = sorted_boxes(dt_boxes) - - for bno in range(len(dt_boxes)): - tmp_box = copy.deepcopy(dt_boxes[bno]) - img_crop = get_rotate_crop_image(ori_im, tmp_box) - img_crop_list.append(img_crop) - if self.use_angle_cls and cls: - img_crop_list, angle_list, elapse = self.text_classifier( - img_crop_list) - - - rec_res, elapse = self.text_recognizer(img_crop_list) - if self.args.save_crop_res: - self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list, - rec_res) - filter_boxes, filter_rec_res = [], [] - for box, rec_result in zip(dt_boxes, rec_res): - text, score = rec_result - if score >= self.drop_score: - filter_boxes.append(box) - filter_rec_res.append(rec_result) - return filter_boxes, filter_rec_res - - -def sorted_boxes(dt_boxes): - """ - Sort text boxes in order from top to bottom, left to right - args: - dt_boxes(array):detected text boxes with shape [4, 2] - return: - sorted boxes(array) with shape [4, 2] - """ - num_boxes = dt_boxes.shape[0] - sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) - _boxes = list(sorted_boxes) - - for i in range(num_boxes - 1): - if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ - (_boxes[i + 1][0][0] < _boxes[i][0][0]): - tmp = _boxes[i] - _boxes[i] = _boxes[i + 1] - _boxes[i + 1] = tmp - return _boxes - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - image_file_list = image_file_list[args.process_id::args.total_process_num] - text_sys = TextSystem(args) - is_visualize = True - font_path = args.vis_font_path - drop_score = args.drop_score - draw_img_save_dir = args.draw_img_save_dir - os.makedirs(draw_img_save_dir, exist_ok=True) - save_results = [] - - logger.info("In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', " - "if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320") - - # warm up 10 times - if args.warmup: - img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8) - for i in range(10): - res = text_sys(img) - - total_time = 0 - cpu_mem, gpu_mem, gpu_util = 0, 0, 0 - _st = time.time() - count = 0 - for idx, image_file in enumerate(image_file_list): - - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.debug("error in loading image:{}".format(image_file)) - continue - starttime = time.time() - dt_boxes, rec_res = text_sys(img) - elapse = time.time() - starttime - total_time += elapse - - - res = [{ - "transcription": rec_res[idx][0], - "points": np.array(dt_boxes[idx]).astype(np.int32).tolist(), - } for idx in range(len(dt_boxes))] - save_pred = os.path.basename(image_file) + "\t" + json.dumps( - res, ensure_ascii=False) + "\n" - save_results.append(save_pred) - - if is_visualize: - image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) - boxes = dt_boxes - txts = [rec_res[i][0] for i in range(len(rec_res))] - scores = [rec_res[i][1] for i in range(len(rec_res))] - - draw_img = draw_ocr_box_txt( - image, - boxes, - txts, - scores, - drop_score=drop_score, - font_path=font_path) - if flag: - image_file = image_file[:-3] + "png" - cv2.imwrite( - os.path.join(draw_img_save_dir, os.path.basename(image_file)), - draw_img[:, :, ::-1]) - - - logger.info("The predict total time is {}".format(time.time() - _st)) - if args.benchmark: - text_sys.text_detector.autolog.report() - text_sys.text_recognizer.autolog.report() - - with open(os.path.join(draw_img_save_dir, "system_results.txt"), 'w', encoding='utf-8') as f: - f.writelines(save_results) - - -if __name__ == "__main__": - args = utility.parse_args() - if args.use_mp: - p_list = [] - total_process_num = args.total_process_num - for process_id in range(total_process_num): - cmd = [sys.executable, "-u"] + sys.argv + [ - "--process_id={}".format(process_id), - "--use_mp={}".format(False) - ] - p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout) - p_list.append(p) - for p in p_list: - p.wait() - else: - main(args) diff --git a/backend/tools/infer/utility.py b/backend/tools/infer/utility.py deleted file mode 100644 index 33f0a48..0000000 --- a/backend/tools/infer/utility.py +++ /dev/null @@ -1,645 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import sys -import platform -import cv2 -import numpy as np -import paddle -from PIL import Image, ImageDraw, ImageFont -import math -from paddle import inference -import time -from ppocr.utils.logging import get_logger - - -def str2bool(v): - return v.lower() in ("true", "t", "1") - - -def init_args(): - parser = argparse.ArgumentParser() - # params for prediction engine - parser.add_argument("--use_gpu", type=str2bool, default=True) - parser.add_argument("--ir_optim", type=str2bool, default=True) - parser.add_argument("--use_tensorrt", type=str2bool, default=False) - parser.add_argument("--min_subgraph_size", type=int, default=15) - parser.add_argument("--precision", type=str, default="fp32") - parser.add_argument("--gpu_mem", type=int, default=500) - - # params for text detector - parser.add_argument("--image_dir", type=str) - parser.add_argument("--det_algorithm", type=str, default='DB') - parser.add_argument("--det_model_dir", type=str) - parser.add_argument("--det_limit_side_len", type=float, default=960) - parser.add_argument("--det_limit_type", type=str, default='max') - - # DB parmas - parser.add_argument("--det_db_thresh", type=float, default=0.3) - parser.add_argument("--det_db_box_thresh", type=float, default=0.6) - parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5) - parser.add_argument("--max_batch_size", type=int, default=10) - parser.add_argument("--use_dilation", type=str2bool, default=False) - parser.add_argument("--det_db_score_mode", type=str, default="fast") - # EAST parmas - parser.add_argument("--det_east_score_thresh", type=float, default=0.8) - parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) - parser.add_argument("--det_east_nms_thresh", type=float, default=0.2) - - # SAST parmas - parser.add_argument("--det_sast_score_thresh", type=float, default=0.5) - parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2) - parser.add_argument("--det_sast_polygon", type=str2bool, default=False) - - # PSE parmas - parser.add_argument("--det_pse_thresh", type=float, default=0) - parser.add_argument("--det_pse_box_thresh", type=float, default=0.85) - parser.add_argument("--det_pse_min_area", type=float, default=16) - parser.add_argument("--det_pse_box_type", type=str, default='quad') - parser.add_argument("--det_pse_scale", type=int, default=1) - - # FCE parmas - parser.add_argument("--scales", type=list, default=[8, 16, 32]) - parser.add_argument("--alpha", type=float, default=1.0) - parser.add_argument("--beta", type=float, default=1.0) - parser.add_argument("--fourier_degree", type=int, default=5) - parser.add_argument("--det_fce_box_type", type=str, default='poly') - - # params for text recognizer - parser.add_argument("--rec_algorithm", type=str, default='CRNN') - parser.add_argument("--rec_model_dir", type=str) - parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320") - parser.add_argument("--rec_batch_num", type=int, default=6) - parser.add_argument("--max_text_length", type=int, default=25) - parser.add_argument( - "--rec_char_dict_path", - type=str, - default="./ppocr/utils/ppocr_keys_v1.txt") - parser.add_argument("--use_space_char", type=str2bool, default=True) - parser.add_argument( - "--vis_font_path", type=str, default="./doc/fonts/simfang.ttf") - parser.add_argument("--drop_score", type=float, default=0.5) - - # params for e2e - parser.add_argument("--e2e_algorithm", type=str, default='PGNet') - parser.add_argument("--e2e_model_dir", type=str) - parser.add_argument("--e2e_limit_side_len", type=float, default=768) - parser.add_argument("--e2e_limit_type", type=str, default='max') - - # PGNet parmas - parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5) - parser.add_argument( - "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt") - parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext') - parser.add_argument("--e2e_pgnet_mode", type=str, default='fast') - - # params for text classifier - parser.add_argument("--use_angle_cls", type=str2bool, default=False) - parser.add_argument("--cls_model_dir", type=str) - parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") - parser.add_argument("--label_list", type=list, default=['0', '180']) - parser.add_argument("--cls_batch_num", type=int, default=6) - parser.add_argument("--cls_thresh", type=float, default=0.9) - - parser.add_argument("--enable_mkldnn", type=str2bool, default=False) - parser.add_argument("--cpu_threads", type=int, default=10) - parser.add_argument("--use_pdserving", type=str2bool, default=False) - parser.add_argument("--warmup", type=str2bool, default=False) - - # - parser.add_argument( - "--draw_img_save_dir", type=str, default="./inference_results") - parser.add_argument("--save_crop_res", type=str2bool, default=False) - parser.add_argument("--crop_res_save_dir", type=str, default="./output") - - # multi-process - parser.add_argument("--use_mp", type=str2bool, default=False) - parser.add_argument("--total_process_num", type=int, default=1) - parser.add_argument("--process_id", type=int, default=0) - - parser.add_argument("--benchmark", type=str2bool, default=False) - parser.add_argument("--save_log_path", type=str, default="./log_output/") - - parser.add_argument("--show_log", type=str2bool, default=True) - parser.add_argument("--use_onnx", type=str2bool, default=False) - return parser - - -def parse_args(): - parser = init_args() - return parser.parse_args() - - -def create_predictor(args, mode, logger): - if mode == "det": - model_dir = args.det_model_dir - elif mode == 'cls': - model_dir = args.cls_model_dir - elif mode == 'rec': - model_dir = args.rec_model_dir - elif mode == 'table': - model_dir = args.table_model_dir - else: - model_dir = args.e2e_model_dir - - if model_dir is None: - logger.info("not find {} model file path {}".format(mode, model_dir)) - sys.exit(0) - if args.use_onnx: - import onnxruntime as ort - model_file_path = model_dir - if not os.path.exists(model_file_path): - raise ValueError("not find model file path {}".format( - model_file_path)) - sess = ort.InferenceSession(model_file_path) - return sess, sess.get_inputs()[0], None, None - - else: - model_file_path = model_dir + "/inference.pdmodel" - params_file_path = model_dir + "/inference.pdiparams" - if not os.path.exists(model_file_path): - raise ValueError("not find model file path {}".format( - model_file_path)) - if not os.path.exists(params_file_path): - raise ValueError("not find params file path {}".format( - params_file_path)) - - config = inference.Config(model_file_path, params_file_path) - - if hasattr(args, 'precision'): - if args.precision == "fp16" and args.use_tensorrt: - precision = inference.PrecisionType.Half - elif args.precision == "int8": - precision = inference.PrecisionType.Int8 - else: - precision = inference.PrecisionType.Float32 - else: - precision = inference.PrecisionType.Float32 - - if args.use_gpu: - gpu_id = get_infer_gpuid() - if gpu_id is None: - logger.warning( - "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson." - ) - config.enable_use_gpu(args.gpu_mem, 0) - if args.use_tensorrt: - config.enable_tensorrt_engine( - workspace_size=1 << 30, - precision_mode=precision, - max_batch_size=args.max_batch_size, - min_subgraph_size=args.min_subgraph_size) - # skip the minmum trt subgraph - use_dynamic_shape = True - if mode == "det": - min_input_shape = { - "x": [1, 3, 50, 50], - "conv2d_92.tmp_0": [1, 120, 20, 20], - "conv2d_91.tmp_0": [1, 24, 10, 10], - "conv2d_59.tmp_0": [1, 96, 20, 20], - "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10], - "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20], - "conv2d_124.tmp_0": [1, 256, 20, 20], - "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20], - "elementwise_add_7": [1, 56, 2, 2], - "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2] - } - max_input_shape = { - "x": [1, 3, 1536, 1536], - "conv2d_92.tmp_0": [1, 120, 400, 400], - "conv2d_91.tmp_0": [1, 24, 200, 200], - "conv2d_59.tmp_0": [1, 96, 400, 400], - "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200], - "conv2d_124.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400], - "elementwise_add_7": [1, 56, 400, 400], - "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400] - } - opt_input_shape = { - "x": [1, 3, 640, 640], - "conv2d_92.tmp_0": [1, 120, 160, 160], - "conv2d_91.tmp_0": [1, 24, 80, 80], - "conv2d_59.tmp_0": [1, 96, 160, 160], - "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80], - "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160], - "conv2d_124.tmp_0": [1, 256, 160, 160], - "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160], - "elementwise_add_7": [1, 56, 40, 40], - "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40] - } - min_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20], - "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20] - } - max_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400] - } - opt_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160], - "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160] - } - min_input_shape.update(min_pact_shape) - max_input_shape.update(max_pact_shape) - opt_input_shape.update(opt_pact_shape) - elif mode == "rec": - if args.rec_algorithm != "CRNN": - use_dynamic_shape = False - imgH = int(args.rec_image_shape.split(',')[-2]) - min_input_shape = {"x": [1, 3, imgH, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 1536]} - opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]} - elif mode == "cls": - min_input_shape = {"x": [1, 3, 48, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]} - opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]} - else: - use_dynamic_shape = False - if use_dynamic_shape: - config.set_trt_dynamic_shape_info( - min_input_shape, max_input_shape, opt_input_shape) - - else: - config.disable_gpu() - if hasattr(args, "cpu_threads"): - config.set_cpu_math_library_num_threads(args.cpu_threads) - else: - # default cpu threads as 10 - config.set_cpu_math_library_num_threads(10) - if args.enable_mkldnn: - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) - config.enable_mkldnn() - if args.precision == "fp16": - config.enable_mkldnn_bfloat16() - # enable memory optim - config.enable_memory_optim() - config.disable_glog_info() - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - config.delete_pass("matmul_transpose_reshape_fuse_pass") - if mode == 'table': - config.delete_pass("fc_fuse_pass") # not supported for table - config.switch_use_feed_fetch_ops(False) - config.switch_ir_optim(True) - - # create predictor - predictor = inference.create_predictor(config) - input_names = predictor.get_input_names() - for name in input_names: - input_tensor = predictor.get_input_handle(name) - output_tensors = get_output_tensors(args, mode, predictor) - return predictor, input_tensor, output_tensors, config - - -def get_output_tensors(args, mode, predictor): - output_names = predictor.get_output_names() - output_tensors = [] - if mode == "rec" and args.rec_algorithm == "CRNN": - output_name = 'softmax_0.tmp_0' - if output_name in output_names: - return [predictor.get_output_handle(output_name)] - else: - for output_name in output_names: - output_tensor = predictor.get_output_handle(output_name) - output_tensors.append(output_tensor) - else: - for output_name in output_names: - output_tensor = predictor.get_output_handle(output_name) - output_tensors.append(output_tensor) - return output_tensors - - -def get_infer_gpuid(): - sysstr = platform.system() - if sysstr == "Windows": - return 0 - - if not paddle.fluid.core.is_compiled_with_rocm(): - cmd = "env | grep CUDA_VISIBLE_DEVICES" - else: - cmd = "env | grep HIP_VISIBLE_DEVICES" - env_cuda = os.popen(cmd).readlines() - if len(env_cuda) == 0: - return 0 - else: - gpu_id = env_cuda[0].strip().split("=")[1] - return int(gpu_id[0]) - - -def draw_e2e_res(dt_boxes, strs, img_path): - src_im = cv2.imread(img_path) - for box, str in zip(dt_boxes, strs): - box = box.astype(np.int32).reshape((-1, 1, 2)) - cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) - cv2.putText( - src_im, - str, - org=(int(box[0, 0, 0]), int(box[0, 0, 1])), - fontFace=cv2.FONT_HERSHEY_COMPLEX, - fontScale=0.7, - color=(0, 255, 0), - thickness=1) - return src_im - - -def draw_text_det_res(dt_boxes, img_path): - src_im = cv2.imread(img_path) - for box in dt_boxes: - box = np.array(box).astype(np.int32).reshape(-1, 2) - cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) - return src_im - - -def resize_img(img, input_size=600): - """ - resize img and limit the longest side of the image to input_size - """ - img = np.array(img) - im_shape = img.shape - im_size_max = np.max(im_shape[0:2]) - im_scale = float(input_size) / float(im_size_max) - img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) - return img - - -def draw_ocr(image, - boxes, - txts=None, - scores=None, - drop_score=0.5, - font_path="./doc/fonts/simfang.ttf"): - """ - Visualize the results of OCR detection and recognition - args: - image(Image|array): RGB image - boxes(list): boxes with shape(N, 4, 2) - txts(list): the texts - scores(list): txxs corresponding scores - drop_score(float): only scores greater than drop_threshold will be visualized - font_path: the path of font which is used to draw text - return(array): - the visualized img - """ - if scores is None: - scores = [1] * len(boxes) - box_num = len(boxes) - for i in range(box_num): - if scores is not None and (scores[i] < drop_score or - math.isnan(scores[i])): - continue - box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - if txts is not None: - img = np.array(resize_img(image, input_size=600)) - txt_img = text_visual( - txts, - scores, - img_h=img.shape[0], - img_w=600, - threshold=drop_score, - font_path=font_path) - img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) - return img - return image - - -def draw_ocr_box_txt(image, - boxes, - txts, - scores=None, - drop_score=0.5, - font_path="./doc/simfang.ttf"): - h, w = image.height, image.width - img_left = image.copy() - img_right = Image.new('RGB', (w, h), (255, 255, 255)) - - import random - - random.seed(0) - draw_left = ImageDraw.Draw(img_left) - draw_right = ImageDraw.Draw(img_right) - for idx, (box, txt) in enumerate(zip(boxes, txts)): - if scores is not None and scores[idx] < drop_score: - continue - color = (random.randint(0, 255), random.randint(0, 255), - random.randint(0, 255)) - draw_left.polygon(box, fill=color) - draw_right.polygon( - [ - box[0][0], box[0][1], box[1][0], box[1][1], box[2][0], - box[2][1], box[3][0], box[3][1] - ], - outline=color) - box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][ - 1])**2) - box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][ - 1])**2) - if box_height > 2 * box_width: - font_size = max(int(box_width * 0.9), 10) - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - cur_y = box[0][1] - for c in txt: - char_size = font.getsize(c) - draw_right.text( - (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font) - cur_y += char_size[1] - else: - font_size = max(int(box_height * 0.8), 10) - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - draw_right.text( - [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) - img_left = Image.blend(image, img_left, 0.5) - img_show = Image.new('RGB', (w * 2, h), (255, 255, 255)) - img_show.paste(img_left, (0, 0, w, h)) - img_show.paste(img_right, (w, 0, w * 2, h)) - return np.array(img_show) - - -def str_count(s): - """ - Count the number of Chinese characters, - a single English character and a single number - equal to half the length of Chinese characters. - args: - s(string): the input of string - return(int): - the number of Chinese characters - """ - import string - count_zh = count_pu = 0 - s_len = len(s) - en_dg_count = 0 - for c in s: - if c in string.ascii_letters or c.isdigit() or c.isspace(): - en_dg_count += 1 - elif c.isalpha(): - count_zh += 1 - else: - count_pu += 1 - return s_len - math.ceil(en_dg_count / 2) - - -def text_visual(texts, - scores, - img_h=400, - img_w=600, - threshold=0., - font_path="./doc/simfang.ttf"): - """ - create new blank img and draw txt on it - args: - texts(list): the text will be draw - scores(list|None): corresponding score of each txt - img_h(int): the height of blank img - img_w(int): the width of blank img - font_path: the path of font which is used to draw text - return(array): - """ - if scores is not None: - assert len(texts) == len( - scores), "The number of txts and corresponding scores must match" - - def create_blank_img(): - blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 - blank_img[:, img_w - 1:] = 0 - blank_img = Image.fromarray(blank_img).convert("RGB") - draw_txt = ImageDraw.Draw(blank_img) - return blank_img, draw_txt - - blank_img, draw_txt = create_blank_img() - - font_size = 20 - txt_color = (0, 0, 0) - font = ImageFont.truetype(font_path, font_size, encoding="utf-8") - - gap = font_size + 5 - txt_img_list = [] - count, index = 1, 0 - for idx, txt in enumerate(texts): - index += 1 - if scores[idx] < threshold or math.isnan(scores[idx]): - index -= 1 - continue - first_line = True - while str_count(txt) >= img_w // font_size - 4: - tmp = txt - txt = tmp[:img_w // font_size - 4] - if first_line: - new_txt = str(index) + ': ' + txt - first_line = False - else: - new_txt = ' ' + txt - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - txt = tmp[img_w // font_size - 4:] - if count >= img_h // gap - 1: - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - if first_line: - new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) - else: - new_txt = " " + txt + " " + '%.3f' % (scores[idx]) - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - # whether add new blank img or not - if count >= img_h // gap - 1 and idx + 1 < len(texts): - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - txt_img_list.append(np.array(blank_img)) - if len(txt_img_list) == 1: - blank_img = np.array(txt_img_list[0]) - else: - blank_img = np.concatenate(txt_img_list, axis=1) - return np.array(blank_img) - - -def base64_to_cv2(b64str): - import base64 - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def draw_boxes(image, boxes, scores=None, drop_score=0.5): - if scores is None: - scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score: - continue - box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - return image - - -def get_rotate_crop_image(img, points): - ''' - img_height, img_width = img.shape[0:2] - left = int(np.min(points[:, 0])) - right = int(np.max(points[:, 0])) - top = int(np.min(points[:, 1])) - bottom = int(np.max(points[:, 1])) - img_crop = img[top:bottom, left:right, :].copy() - points[:, 0] = points[:, 0] - left - points[:, 1] = points[:, 1] - top - ''' - assert len(points) == 4, "shape of points must be 4*2" - img_crop_width = int( - max( - np.linalg.norm(points[0] - points[1]), - np.linalg.norm(points[2] - points[3]))) - img_crop_height = int( - max( - np.linalg.norm(points[0] - points[3]), - np.linalg.norm(points[1] - points[2]))) - pts_std = np.float32([[0, 0], [img_crop_width, 0], - [img_crop_width, img_crop_height], - [0, img_crop_height]]) - M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective( - img, - M, (img_crop_width, img_crop_height), - borderMode=cv2.BORDER_REPLICATE, - flags=cv2.INTER_CUBIC) - dst_img_height, dst_img_width = dst_img.shape[0:2] - if dst_img_height * 1.0 / dst_img_width >= 1.5: - dst_img = np.rot90(dst_img) - return dst_img - - -def check_gpu(use_gpu): - if use_gpu and not paddle.is_compiled_with_cuda(): - use_gpu = False - return use_gpu - - -if __name__ == '__main__': - pass diff --git a/gui.py b/gui.py index f63aa10..fada385 100644 --- a/gui.py +++ b/gui.py @@ -21,9 +21,6 @@ from backend.tools.common_tools import is_image_file class SubtitleRemoverGUI: def __init__(self): - # 初次运行检查运行环境是否正常 - from paddle import utils - utils.run_check() self.font = 'Arial 10' self.theme = 'LightBrown12' sg.theme(self.theme) diff --git a/requirements.txt b/requirements.txt index f578821..09b04c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,7 @@ numpy==1.23.1 protobuf==3.20.0 av==11.0.0 einops==0.7.0 +paddleocr==2.10.0 +paddle2onnx==1.3.1 +onnxruntime-gpu==1.20.1 +onnxruntime-directml==1.20.1; sys_platform == 'win32'