Spaces:
Sleeping
Sleeping
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import json | |
| import os | |
| import tempfile | |
| import unittest | |
| from mmocr.utils import (check_integrity, get_md5, is_archive, list_files, | |
| list_from_file, list_to_file) | |
| lists = [ | |
| [], | |
| [' '], | |
| ['\t'], | |
| ['a'], | |
| [1], | |
| [1.], | |
| ['a', 'b'], | |
| ['a', 1, 1.], | |
| [1, 1., 'a'], | |
| ['啊', '啊啊'], | |
| ['選択', 'noël', 'Информацией', 'ÄÆä'], | |
| ] | |
| dicts = [ | |
| [{ | |
| 'text': [] | |
| }], | |
| [{ | |
| 'text': [' '] | |
| }], | |
| [{ | |
| 'text': ['\t'] | |
| }], | |
| [{ | |
| 'text': ['a'] | |
| }], | |
| [{ | |
| 'text': [1] | |
| }], | |
| [{ | |
| 'text': [1.] | |
| }], | |
| [{ | |
| 'text': ['a', 'b'] | |
| }], | |
| [{ | |
| 'text': ['a', 1, 1.] | |
| }], | |
| [{ | |
| 'text': [1, 1., 'a'] | |
| }], | |
| [{ | |
| 'text': ['啊', '啊啊'] | |
| }], | |
| [{ | |
| 'text': ['選択', 'noël', 'Информацией', 'ÄÆä'] | |
| }], | |
| ] | |
| def test_list_to_file(): | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # test txt | |
| for i, lines in enumerate(lists): | |
| filename = f'{tmpdirname}/{i}.txt' | |
| list_to_file(filename, lines) | |
| lines2 = [ | |
| line.rstrip('\r\n') | |
| for line in open(filename, encoding='utf-8').readlines() | |
| ] | |
| lines = list(map(str, lines)) | |
| assert len(lines) == len(lines2) | |
| assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) | |
| # test jsonl | |
| for i, lines in enumerate(dicts): | |
| filename = f'{tmpdirname}/{i}.jsonl' | |
| list_to_file(filename, [json.dumps(line) for line in lines]) | |
| lines2 = [ | |
| json.loads(line.rstrip('\r\n'))['text'] | |
| for line in open(filename, encoding='utf-8').readlines() | |
| ][0] | |
| lines = list(lines[0]['text']) | |
| assert len(lines) == len(lines2) | |
| assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) | |
| def test_list_from_file(): | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # test txt file | |
| for i, lines in enumerate(lists): | |
| filename = f'{tmpdirname}/{i}.txt' | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| f.writelines(f'{line}\n' for line in lines) | |
| lines2 = list_from_file(filename, encoding='utf-8') | |
| lines = list(map(str, lines)) | |
| assert len(lines) == len(lines2) | |
| assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) | |
| # test jsonl file | |
| for i, lines in enumerate(dicts): | |
| filename = f'{tmpdirname}/{i}.jsonl' | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| f.writelines(f'{line}\n' for line in lines) | |
| lines2 = list_from_file(filename, encoding='utf-8') | |
| lines = list(map(str, lines)) | |
| assert len(lines) == len(lines2) | |
| assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) | |
| class TestIsArchive(unittest.TestCase): | |
| def setUp(self) -> None: | |
| self.zip = 'data/annotations_123.zip' | |
| self.tar = 'data/img.abc.tar' | |
| self.targz = 'data/img12345_.tar.gz' | |
| self.rar = '/m/abc/t.rar' | |
| self.dir = '/a/b/c/' | |
| def test_is_archive(self): | |
| # test zip | |
| self.assertTrue(is_archive(self.zip)) | |
| # test tar | |
| self.assertTrue(is_archive(self.tar)) | |
| # test tar.gz | |
| self.assertTrue(is_archive(self.targz)) | |
| # test rar | |
| self.assertFalse(is_archive(self.rar)) | |
| # test dir | |
| self.assertFalse(is_archive(self.dir)) | |
| class TestCheckIntegrity(unittest.TestCase): | |
| def setUp(self) -> None: | |
| # Do not use text files for tests, because the md5 value of text files | |
| # is different on different platforms (CR - CRLF) | |
| self.file1 = ('tests/data/det_toy_dataset/imgs/test/img_2.jpg', | |
| '52b28b5dfc92d9027e70ec3ff95d8702') | |
| self.file2 = ('tests/data/det_toy_dataset/imgs/test/img_1.jpg', | |
| 'abc123') | |
| self.file3 = ('abc/abc.jpg', 'abc123') | |
| def test_check_integrity(self): | |
| file, md5 = self.file1 | |
| self.assertTrue(check_integrity(file, md5)) | |
| file, md5 = self.file2 | |
| self.assertFalse(check_integrity(file, md5)) | |
| self.assertTrue(check_integrity(file, None)) | |
| file, md5 = self.file3 | |
| self.assertFalse(check_integrity(file, md5)) | |
| class TextGetMD5(unittest.TestCase): | |
| def setUp(self) -> None: | |
| # Do not use text files for tests, because the md5 value of text files | |
| # is different on different platforms (CR - CRLF) | |
| self.file1 = ('tests/data/det_toy_dataset/imgs/test/img_2.jpg', | |
| '52b28b5dfc92d9027e70ec3ff95d8702') | |
| self.file2 = ('tests/data/det_toy_dataset/imgs/test/img_1.jpg', | |
| 'abc123') | |
| def test_get_md5(self): | |
| file, md5 = self.file1 | |
| self.assertEqual(get_md5(file), md5) | |
| file, md5 = self.file2 | |
| self.assertNotEqual(get_md5(file), md5) | |
| class TestListFiles(unittest.TestCase): | |
| def setUp(self) -> None: | |
| self.path = 'tests/data/det_toy_dataset/imgs/test' | |
| def test_check_integrity(self): | |
| suffix = 'jpg' | |
| files = list_files(self.path, suffix) | |
| for file in os.listdir(self.path): | |
| if file.endswith(suffix): | |
| self.assertIn(os.path.join(self.path, file), files) | |